Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into add-fp-filter
Browse files Browse the repository at this point in the history
  • Loading branch information
gferraro committed Jan 22, 2025
2 parents 1d6f50a + 91a18dc commit 373d992
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 4 deletions.
7 changes: 7 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,19 @@ def main():
Processor.api = API(conf.api_url, conf.user, conf.password, logger)

processors = Processors()
processors.add(
"audio",
["FINISHED"],
audio_analysis.track_analyse,
conf.audio_analysis_workers,
)
processors.add(
"audio",
["analyse", "reprocess"],
audio_analysis.process,
conf.audio_analysis_workers,
)

if conf.ir_tracking_workers > 0:
processors.add(
"irRaw",
Expand Down
86 changes: 83 additions & 3 deletions processing/audio_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,86 @@
MAX_FRQUENCY = 48000 / 2



def track_analyse(recording, jwtKey, conf):
"""Reprocess the audio file.
Downloads the file, runs the AI model on tracks that don't have AI tags
Args:
recording: The recording to process.
jwtKey: The JWT key to use for the API.
conf: The configuration object.
Returns:
The API response.
"""

# this used to work by default then just stopped, so will explicitly add it
mimetypes.add_type("audio/mp4", ".m4a")

logger = logs.worker_logger("audio.analysis", recording["id"])

api = API(conf.api_url, conf.user, conf.password, logger)

input_extension = mimetypes.guess_extension(recording["rawMimeType"])

if not input_extension:
# Unsupported mimetype. If needed more mimetypes can be added above.
logger.error(
"unsupported mimetype. Not processing %s", recording["rawMimeType"]
)
api.report_done(recording, recording["rawFileKey"], recording["rawMimeType"])
return
new_metadata = {"additionalMetadata": {}}
with tempfile.TemporaryDirectory() as temp:
temp_path = Path(temp)
input_filename = temp_path / ("recording" + input_extension)
logger.debug("downloading recording to %s", input_filename)

api.download_file(jwtKey, str(input_filename))
track_info = api.get_track_info(recording["id"]).get("tracks")
track_info = [ t for t in track_info if not any(tag for tag in t["tags"] if tag["automatic"])]
recording["Tracks"] = track_info
filename = input_filename.with_suffix(".txt")
with filename.open("w") as f:
json.dump(recording, f)


analysis = analyse(input_filename, conf,analyse_tracks=True)
if analysis["species_identify"]:
species_identify = analysis.pop("species_identify")
for analysis_result in species_identify:
model_name = analysis_result.get("model", "Unnamed")
predictions = analysis_result["predictions"]
for i, prediction in enumerate(predictions):
species = prediction["species"]
confidences = prediction["likelihood"]
del prediction["species"]
raw_tag = None
if len(confidences) == 0 and "raw_tag" in prediction:
raw_tag = prediction["raw_tag"]
species = [UNIDENTIFIED]
confidences = [prediction["raw_confidence"]]

for confidence, s in zip(confidences, species):
prediction["confidence"] = confidence
prediction["tag"] = s
data = {"name": "Master"}
if raw_tag is not None:
data["raw_tag"] = raw_tag

if i == 0:
# just add master tag for first prediction
api.add_track_tag(recording, analysis_result["track_id"], prediction, data)
data["name"] = prediction["model"]
api.add_track_tag(recording, analysis_result["track_id"], prediction, data)

api.report_done(recording, metadata=new_metadata)
logger.info("Completed classifying for file: %s", recording["id"])



def process(recording, jwtKey, conf):
"""Process the audio file.
Expand Down Expand Up @@ -145,10 +225,10 @@ def process(recording, jwtKey, conf):
logger.info("Completed processing for file: %s", recording["id"])


def analyse(filename, conf):
def analyse(filename, conf,analyse_tracks=False):
command = conf.audio_analysis_cmd.format(
folder=filename.parent, basename=filename.name, tag=conf.audio_analysis_tag
folder=filename.parent, basename=filename.name, tag=conf.audio_analysis_tag,analyse_tracks=analyse_tracks
)
with HandleCalledProcessError():
output = subprocess.check_output(command, shell=True, stderr=subprocess.PIPE)
return json.loads(output.decode("utf-8"))
return json.loads(output.decode("utf-8"))
2 changes: 1 addition & 1 deletion processing_TEMPLATE.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,6 @@ audio:
analysis_workers: 2

# The command will be called to perform analysis on audio recordings (e.g. Cacophony Index, speech detection) using AI models
analysis_command: 'docker run --rm -v {folder}:/io cacophonyproject/audio-analysis:{tag} /io/"{basename}"'
analysis_command: 'docker run --rm -v {folder}:/io cacophonyproject/audio-analysis:{tag} /io/"{basename}" --morepork-model none --analyse-tracks {analyse_tracks}'

analysis_tag: latest

0 comments on commit 373d992

Please sign in to comment.