Merge remote-tracking branch 'origin/master' into add-fp-filter

TheCacophonyProject · Jan 22, 2025 · 373d992 · 373d992
2 parents 1d6f50a + 91a18dc
commit 373d992
Show file tree

Hide file tree

Showing 3 changed files with 91 additions and 4 deletions.
diff --git a/main.py b/main.py
@@ -106,12 +106,19 @@ def main():
     Processor.api = API(conf.api_url, conf.user, conf.password, logger)
 
     processors = Processors()
+    processors.add(
+        "audio",
+        ["FINISHED"],
+        audio_analysis.track_analyse,
+        conf.audio_analysis_workers,
+    )
     processors.add(
         "audio",
         ["analyse", "reprocess"],
         audio_analysis.process,
         conf.audio_analysis_workers,
     )
+
     if conf.ir_tracking_workers > 0:
         processors.add(
             "irRaw",

diff --git a/processing/audio_analysis.py b/processing/audio_analysis.py
@@ -31,6 +31,86 @@
 MAX_FRQUENCY = 48000 / 2
 
 
+
+def track_analyse(recording, jwtKey, conf):
+    """Reprocess the audio file.
+
+    Downloads the file, runs the AI model on tracks that don't have AI tags
+
+    Args:
+        recording: The recording to process.
+        jwtKey: The JWT key to use for the API.
+        conf: The configuration object.
+
+    Returns:
+        The API response.
+    """
+
+    # this used to work by default then  just stopped, so will explicitly add it
+    mimetypes.add_type("audio/mp4", ".m4a")
+
+    logger = logs.worker_logger("audio.analysis", recording["id"])
+
+    api = API(conf.api_url, conf.user, conf.password, logger)
+
+    input_extension = mimetypes.guess_extension(recording["rawMimeType"])
+
+    if not input_extension:
+        # Unsupported mimetype. If needed more mimetypes can be added above.
+        logger.error(
+            "unsupported mimetype. Not processing %s", recording["rawMimeType"]
+        )
+        api.report_done(recording, recording["rawFileKey"], recording["rawMimeType"])
+        return
+    new_metadata = {"additionalMetadata": {}}
+    with tempfile.TemporaryDirectory() as temp:
+        temp_path = Path(temp)
+        input_filename = temp_path / ("recording" + input_extension)
+        logger.debug("downloading recording to %s", input_filename)
+
+        api.download_file(jwtKey, str(input_filename))
+        track_info = api.get_track_info(recording["id"]).get("tracks")
+        track_info = [ t for t in track_info if not any(tag for tag in t["tags"] if tag["automatic"])]
+        recording["Tracks"] = track_info
+        filename = input_filename.with_suffix(".txt")
+        with filename.open("w") as f:
+            json.dump(recording, f)
+
+
+        analysis = analyse(input_filename, conf,analyse_tracks=True)
+        if analysis["species_identify"]:
+            species_identify = analysis.pop("species_identify")
+            for analysis_result in species_identify:
+                model_name = analysis_result.get("model", "Unnamed")
+                predictions = analysis_result["predictions"]
+                for i, prediction in enumerate(predictions):
+                    species = prediction["species"]
+                    confidences = prediction["likelihood"]
+                    del prediction["species"]
+                    raw_tag = None
+                    if len(confidences) == 0 and "raw_tag" in prediction:
+                        raw_tag = prediction["raw_tag"]
+                        species = [UNIDENTIFIED]
+                        confidences = [prediction["raw_confidence"]]
+
+                    for confidence, s in zip(confidences, species):
+                        prediction["confidence"] = confidence
+                        prediction["tag"] = s
+                        data = {"name": "Master"}
+                        if raw_tag is not None:
+                            data["raw_tag"] = raw_tag
+
+                        if i == 0:
+                            # just add master tag for first prediction
+                            api.add_track_tag(recording, analysis_result["track_id"], prediction, data)
+                        data["name"] = prediction["model"]
+                        api.add_track_tag(recording, analysis_result["track_id"], prediction, data)
+
+    api.report_done(recording, metadata=new_metadata)
+    logger.info("Completed classifying for file: %s", recording["id"])
+
+
+
 def process(recording, jwtKey, conf):
     """Process the audio file.
 
@@ -145,10 +225,10 @@ def process(recording, jwtKey, conf):
     logger.info("Completed processing for file: %s", recording["id"])
 
 
-def analyse(filename, conf):
+def analyse(filename, conf,analyse_tracks=False):
     command = conf.audio_analysis_cmd.format(
-        folder=filename.parent, basename=filename.name, tag=conf.audio_analysis_tag
+        folder=filename.parent, basename=filename.name, tag=conf.audio_analysis_tag,analyse_tracks=analyse_tracks
     )
     with HandleCalledProcessError():
         output = subprocess.check_output(command, shell=True, stderr=subprocess.PIPE)
-    return json.loads(output.decode("utf-8"))
+    return json.loads(output.decode("utf-8"))
diff --git a/processing_TEMPLATE.yaml b/processing_TEMPLATE.yaml
@@ -66,6 +66,6 @@ audio:
     analysis_workers: 2
 
     # The command will be called to perform analysis on audio recordings (e.g. Cacophony Index, speech detection) using AI models
-    analysis_command: 'docker run --rm -v {folder}:/io cacophonyproject/audio-analysis:{tag} /io/"{basename}"'
+    analysis_command: 'docker run --rm -v {folder}:/io cacophonyproject/audio-analysis:{tag} /io/"{basename}"  --morepork-model none --analyse-tracks {analyse_tracks}'
 
     analysis_tag: latest