From 0386df5658f280b88a50933510775c72d8944ae1 Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Mon, 17 Jun 2024 22:09:45 +0100 Subject: [PATCH] refactor/alt_classifier (#502) * refactor/alt_classifier put the media type classifier behind a config flag during experimental phase, fallback to a keyword based tagging mechanism a compromise between old OCP media type handling and the new pipeline TODO - decide default value for 0.0.8 stable * split into "experimental_binary_classifier" + "experimental_media_classifier" * add tests * timeout * timeout --- .github/workflows/unit_tests.yml | 2 +- .../locale/en-us/ADKeyword.voc | 2 + .../locale/en-us/ASMRKeyword.voc | 2 + .../locale/en-us/AdultKeyword.voc | 3 + .../locale/en-us/AnimeKeyword.voc | 1 + .../locale/en-us/AudioBookKeyword.voc | 6 + .../locale/en-us/AudioDramaKeyword.voc | 2 + .../locale/en-us/AudioKeyword.voc | 2 + .../locale/en-us/BWKeyword.voc | 2 + .../locale/en-us/CartoonKeyword.voc | 2 + .../locale/en-us/ComicBookKeyword.voc | 3 + .../locale/en-us/DocumentaryKeyword.voc | 2 + .../locale/en-us/GameKeyword.voc | 1 + .../locale/en-us/HentaiKeyword.voc | 1 + .../locale/en-us/MovieKeyword.voc | 2 + .../locale/en-us/MusicKeyword.voc | 4 + .../locale/en-us/NewsKeyword.voc | 1 + .../locale/en-us/PodcastKeyword.voc | 2 + .../locale/en-us/RadioKeyword.voc | 2 + .../locale/en-us/SeriesKeyword.voc | 4 + .../locale/en-us/ShortKeyword.voc | 1 + .../locale/en-us/SilentKeyword.voc | 1 + .../locale/en-us/TVKeyword.voc | 3 + .../locale/en-us/TrailerKeyword.voc | 1 + .../locale/en-us/VideoKeyword.voc | 1 + ovos_core/intent_services/ocp_service.py | 449 ++++++++++-------- test/unittests/skills/test_ocp.py | 87 +++- translations/en-us/vocabs.json | 99 ++++ 28 files changed, 490 insertions(+), 198 deletions(-) create mode 100644 ovos_core/intent_services/locale/en-us/ADKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/ASMRKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/AdultKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/AnimeKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/AudioBookKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/AudioDramaKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/AudioKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/BWKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/CartoonKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/ComicBookKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/DocumentaryKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/GameKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/HentaiKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/MovieKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/MusicKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/NewsKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/PodcastKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/RadioKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/SeriesKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/ShortKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/SilentKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/TVKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/TrailerKeyword.voc create mode 100644 ovos_core/intent_services/locale/en-us/VideoKeyword.voc diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 9f687c7e3931..4fc749fdc1a1 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -36,7 +36,7 @@ jobs: matrix: python-version: [3.9] runs-on: ubuntu-latest - timeout-minutes: 15 + timeout-minutes: 35 steps: - uses: actions/checkout@v2 - name: Set up python ${{ matrix.python-version }} diff --git a/ovos_core/intent_services/locale/en-us/ADKeyword.voc b/ovos_core/intent_services/locale/en-us/ADKeyword.voc new file mode 100644 index 000000000000..f6cfd046cc55 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/ADKeyword.voc @@ -0,0 +1,2 @@ +audio description +movie description \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/ASMRKeyword.voc b/ovos_core/intent_services/locale/en-us/ASMRKeyword.voc new file mode 100644 index 000000000000..5115b319a683 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/ASMRKeyword.voc @@ -0,0 +1,2 @@ +ASMR +autonomous sensory meridian response \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/AdultKeyword.voc b/ovos_core/intent_services/locale/en-us/AdultKeyword.voc new file mode 100644 index 000000000000..51078337fb82 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/AdultKeyword.voc @@ -0,0 +1,3 @@ +porn +18 plus +pornography \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/AnimeKeyword.voc b/ovos_core/intent_services/locale/en-us/AnimeKeyword.voc new file mode 100644 index 000000000000..3db76df244a7 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/AnimeKeyword.voc @@ -0,0 +1 @@ +anime \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/AudioBookKeyword.voc b/ovos_core/intent_services/locale/en-us/AudioBookKeyword.voc new file mode 100644 index 000000000000..0912b34193e7 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/AudioBookKeyword.voc @@ -0,0 +1,6 @@ +audiobook +audiobooks +book +books +narrate +read \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/AudioDramaKeyword.voc b/ovos_core/intent_services/locale/en-us/AudioDramaKeyword.voc new file mode 100644 index 000000000000..ee59ead35751 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/AudioDramaKeyword.voc @@ -0,0 +1,2 @@ +drama +theatre \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/AudioKeyword.voc b/ovos_core/intent_services/locale/en-us/AudioKeyword.voc new file mode 100644 index 000000000000..158585778d32 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/AudioKeyword.voc @@ -0,0 +1,2 @@ +audio +sound \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/BWKeyword.voc b/ovos_core/intent_services/locale/en-us/BWKeyword.voc new file mode 100644 index 000000000000..1e05ea19de3b --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/BWKeyword.voc @@ -0,0 +1,2 @@ +black and white +monochrome \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/CartoonKeyword.voc b/ovos_core/intent_services/locale/en-us/CartoonKeyword.voc new file mode 100644 index 000000000000..75f627b3b6f2 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/CartoonKeyword.voc @@ -0,0 +1,2 @@ +cartoon +cartoons \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/ComicBookKeyword.voc b/ovos_core/intent_services/locale/en-us/ComicBookKeyword.voc new file mode 100644 index 000000000000..0fd3e8186af0 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/ComicBookKeyword.voc @@ -0,0 +1,3 @@ +animated comic +comic +visual story \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/DocumentaryKeyword.voc b/ovos_core/intent_services/locale/en-us/DocumentaryKeyword.voc new file mode 100644 index 000000000000..b205cc4c8fb7 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/DocumentaryKeyword.voc @@ -0,0 +1,2 @@ +documentary +documentaries \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/GameKeyword.voc b/ovos_core/intent_services/locale/en-us/GameKeyword.voc new file mode 100644 index 000000000000..2928a3cb5444 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/GameKeyword.voc @@ -0,0 +1 @@ +game \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/HentaiKeyword.voc b/ovos_core/intent_services/locale/en-us/HentaiKeyword.voc new file mode 100644 index 000000000000..a65b8097d886 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/HentaiKeyword.voc @@ -0,0 +1 @@ +hentai \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/MovieKeyword.voc b/ovos_core/intent_services/locale/en-us/MovieKeyword.voc new file mode 100644 index 000000000000..1fd81367f1ca --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/MovieKeyword.voc @@ -0,0 +1,2 @@ +movie +film \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/MusicKeyword.voc b/ovos_core/intent_services/locale/en-us/MusicKeyword.voc new file mode 100644 index 000000000000..9b95471e40e9 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/MusicKeyword.voc @@ -0,0 +1,4 @@ +music +song +soundtrack +sound track \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/NewsKeyword.voc b/ovos_core/intent_services/locale/en-us/NewsKeyword.voc new file mode 100644 index 000000000000..7ee44d966fe4 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/NewsKeyword.voc @@ -0,0 +1 @@ +news \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/PodcastKeyword.voc b/ovos_core/intent_services/locale/en-us/PodcastKeyword.voc new file mode 100644 index 000000000000..f0da502d5994 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/PodcastKeyword.voc @@ -0,0 +1,2 @@ +podcast +podcasts \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/RadioKeyword.voc b/ovos_core/intent_services/locale/en-us/RadioKeyword.voc new file mode 100644 index 000000000000..565d7b941b23 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/RadioKeyword.voc @@ -0,0 +1,2 @@ +radio +internet radio \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/SeriesKeyword.voc b/ovos_core/intent_services/locale/en-us/SeriesKeyword.voc new file mode 100644 index 000000000000..545ecca73a37 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/SeriesKeyword.voc @@ -0,0 +1,4 @@ +series +tv show +episode +episodes \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/ShortKeyword.voc b/ovos_core/intent_services/locale/en-us/ShortKeyword.voc new file mode 100644 index 000000000000..eba61a783ded --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/ShortKeyword.voc @@ -0,0 +1 @@ +short \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/SilentKeyword.voc b/ovos_core/intent_services/locale/en-us/SilentKeyword.voc new file mode 100644 index 000000000000..280a1bc2dbc2 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/SilentKeyword.voc @@ -0,0 +1 @@ +silent \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/TVKeyword.voc b/ovos_core/intent_services/locale/en-us/TVKeyword.voc new file mode 100644 index 000000000000..bfa12c8c8fee --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/TVKeyword.voc @@ -0,0 +1,3 @@ +TV +television +channel \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/TrailerKeyword.voc b/ovos_core/intent_services/locale/en-us/TrailerKeyword.voc new file mode 100644 index 000000000000..823ae2890ed6 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/TrailerKeyword.voc @@ -0,0 +1 @@ +trailer \ No newline at end of file diff --git a/ovos_core/intent_services/locale/en-us/VideoKeyword.voc b/ovos_core/intent_services/locale/en-us/VideoKeyword.voc new file mode 100644 index 000000000000..a227d2912b06 --- /dev/null +++ b/ovos_core/intent_services/locale/en-us/VideoKeyword.voc @@ -0,0 +1 @@ +video \ No newline at end of file diff --git a/ovos_core/intent_services/ocp_service.py b/ovos_core/intent_services/ocp_service.py index 7daf0b8a1c17..e5afcc865e23 100644 --- a/ovos_core/intent_services/ocp_service.py +++ b/ovos_core/intent_services/ocp_service.py @@ -468,81 +468,6 @@ class OCPPlayerProxy: media_type: MediaType = MediaType.GENERIC -class OCPFeaturizer: - # ignore_list accounts for "noise" keywords in the csv file - ocp_keywords = KeywordFeaturesVectorizer(ignore_list=["play", "stop"]) - # defined at training time - _clf_labels = ['ad_keyword', 'album_name', 'anime_genre', 'anime_name', 'anime_streaming_service', - 'artist_name', 'asmr_keyword', 'asmr_trigger', 'audio_genre', 'audiobook_narrator', - 'audiobook_streaming_service', 'book_author', 'book_genre', 'book_name', - 'bw_movie_name', 'cartoon_genre', 'cartoon_name', 'cartoon_streaming_service', - 'comic_name', 'comic_streaming_service', 'comics_genre', 'country_name', - 'documentary_genre', 'documentary_name', 'documentary_streaming_service', - 'film_genre', 'film_studio', 'game_genre', 'game_name', 'gaming_console_name', - 'generic_streaming_service', 'hentai_name', 'hentai_streaming_service', - 'media_type_adult', 'media_type_adult_audio', 'media_type_anime', 'media_type_audio', - 'media_type_audiobook', 'media_type_bts', 'media_type_bw_movie', 'media_type_cartoon', - 'media_type_documentary', 'media_type_game', 'media_type_hentai', 'media_type_movie', - 'media_type_music', 'media_type_news', 'media_type_podcast', 'media_type_radio', - 'media_type_radio_theatre', 'media_type_short_film', 'media_type_silent_movie', - 'media_type_sound', 'media_type_trailer', 'media_type_tv', 'media_type_video', - 'media_type_video_episodes', 'media_type_visual_story', 'movie_actor', - 'movie_director', 'movie_name', 'movie_streaming_service', 'music_genre', - 'music_streaming_service', 'news_provider', 'news_streaming_service', - 'play_verb_audio', 'play_verb_video', 'playback_device', 'playlist_name', - 'podcast_genre', 'podcast_name', 'podcast_streaming_service', 'podcaster', - 'porn_film_name', 'porn_genre', 'porn_streaming_service', 'pornstar_name', - 'radio_drama_actor', 'radio_drama_genre', 'radio_drama_name', 'radio_program', - 'radio_program_name', 'radio_streaming_service', 'radio_theatre_company', - 'radio_theatre_streaming_service', 'record_label', 'series_name', - 'short_film_name', 'shorts_streaming_service', 'silent_movie_name', - 'song_name', 'sound_name', 'soundtrack_keyword', 'tv_channel', 'tv_genre', - 'tv_streaming_service', 'video_genre', 'video_streaming_service', 'youtube_channel'] - - def __init__(self, base_clf=None): - self.clf_feats = None - if base_clf: - if isinstance(base_clf, str): - clf_path = f"{dirname(__file__)}/models/{base_clf}.clf" - assert os.path.isfile(clf_path) - base_clf = SklearnOVOSClassifier.from_file(clf_path) - self.clf_feats = ClassifierProbaVectorizer(base_clf) - for l in self._clf_labels: # no samples, just to ensure featurizer has right number of feats - self.ocp_keywords.register_entity(l, []) - - @classmethod - def load_csv(cls, entity_csvs: list): - for csv in entity_csvs or []: - if not os.path.isfile(csv): - # check for bundled files - if os.path.isfile(f"{dirname(__file__)}/models/{csv}"): - csv = f"{dirname(__file__)}/models/{csv}" - else: - LOG.error(f"Requested OCP entities file does not exist? {csv}") - continue - OCPFeaturizer.ocp_keywords.load_entities(csv) - LOG.info(f"Loaded OCP keywords: {csv}") - - @classproperty - def labels(cls): - """ - in V0 classifier using synth dataset - this is tied to the classifier model""" - return cls._clf_labels - - def transform(self, X): - if self.clf_feats: - vec = FeatureUnion([ - ("kw", self.ocp_keywords), - ("clf", self.clf_feats) - ]) - return vec.transform(X) - return self.ocp_keywords.transform(X) - - @classmethod - def extract_entities(cls, utterance) -> dict: - return cls.ocp_keywords._transformer.wordlist.extract(utterance) - - class OCPPipelineMatcher(OVOSAbstractApplication): intents = ["play.intent", "open.intent", "media_stop.intent", "next.intent", "prev.intent", "pause.intent", "play_favorites.intent", @@ -573,25 +498,30 @@ def __init__(self, bus=None, config=None): self.bus.emit(Message("ovos.common_play.SEI.get")) def load_classifiers(self): - # warm up the featurizer so intent matches faster (lazy loaded) if self.entity_csvs: OCPFeaturizer.load_csv(self.entity_csvs) OCPFeaturizer.extract_entities("UNLEASH THE AUTOMATONS") - b = f"{dirname(__file__)}/models" - # lang agnostic classifiers - c = SklearnOVOSClassifier.from_file(f"{b}/media_ocp_kw_small.clf") - self._media_clf = (c, OCPFeaturizer()) - c = SklearnOVOSClassifier.from_file(f"{b}/binary_ocp_kw_small.clf") - self._binary_clf = (c, OCPFeaturizer()) - - # lang specific classifiers - # (english only for now) - c = SklearnOVOSClassifier.from_file(f"{b}/media_ocp_cv2_kw_medium.clf") - self._media_en_clf = (c, OCPFeaturizer("media_ocp_cv2_medium")) - c = SklearnOVOSClassifier.from_file(f"{b}/binary_ocp_cv2_kw_medium.clf") - self._binary_en_clf = (c, OCPFeaturizer("binary_ocp_cv2_small")) + if self.config.get("experimental_binary_classifier", True): # ocp_medium + LOG.info("Using experimental OCP binary classifier") + # TODO - train a single multilingual model instead of this + b = f"{dirname(__file__)}/models" + c = SklearnOVOSClassifier.from_file(f"{b}/binary_ocp_kw_small.clf") + self._binary_clf = (c, OCPFeaturizer()) + # lang specific classifiers (english only for now) + c = SklearnOVOSClassifier.from_file(f"{b}/binary_ocp_cv2_kw_medium.clf") + self._binary_en_clf = (c, OCPFeaturizer("binary_ocp_cv2_small")) + + if self.config.get("experimental_media_classifier", True): + LOG.info("Using experimental OCP media type classifier") + # TODO - train a single multilingual model instead of this + b = f"{dirname(__file__)}/models" + c = SklearnOVOSClassifier.from_file(f"{b}/media_ocp_kw_small.clf") + self._media_clf = (c, OCPFeaturizer()) + # lang specific classifiers (english only for now) + c = SklearnOVOSClassifier.from_file(f"{b}/media_ocp_cv2_kw_medium.clf") + self._media_en_clf = (c, OCPFeaturizer("media_ocp_cv2_medium")) def load_resource_files(self): intents = {} @@ -1078,110 +1008,115 @@ def handle_search_error_intent(self, message: Message): self.ocp_api.stop() # NLP - @staticmethod - def label2media(label: str) -> MediaType: - if isinstance(label, MediaType): - return label - if label == "ad": - mt = MediaType.AUDIO_DESCRIPTION - elif label == "adult": - mt = MediaType.ADULT - elif label == "adult_asmr": - mt = MediaType.ADULT_AUDIO - elif label == "anime": - mt = MediaType.ANIME - elif label == "audio": - mt = MediaType.AUDIO - elif label == "asmr": - mt = MediaType.ASMR - elif label == "audiobook": - mt = MediaType.AUDIOBOOK - elif label == "bts": - mt = MediaType.BEHIND_THE_SCENES - elif label == "bw_movie": - mt = MediaType.BLACK_WHITE_MOVIE - elif label == "cartoon": - mt = MediaType.CARTOON - elif label == "comic": - mt = MediaType.VISUAL_STORY - elif label == "documentary": - mt = MediaType.DOCUMENTARY - elif label == "game": - mt = MediaType.GAME - elif label == "hentai": - mt = MediaType.HENTAI - elif label == "movie": - mt = MediaType.MOVIE - elif label == "music": - mt = MediaType.MUSIC - elif label == "news": - mt = MediaType.NEWS - elif label == "podcast": - mt = MediaType.PODCAST - elif label == "radio": - mt = MediaType.RADIO - elif label == "radio_drama": - mt = MediaType.RADIO_THEATRE - elif label == "series": - mt = MediaType.VIDEO_EPISODES - elif label == "short_film": - mt = MediaType.SHORT_FILM - elif label == "silent_movie": - mt = MediaType.SILENT_MOVIE - elif label == "trailer": - mt = MediaType.TRAILER - elif label == "tv_channel": - mt = MediaType.TV - elif label == "video": - mt = MediaType.VIDEO - else: - LOG.error(f"bad label {label}") - mt = MediaType.GENERIC - return mt + def voc_match_media(self, query: str, lang: str) -> Tuple[MediaType, float]: + # simplistic approach via voc_match, works anywhere + # and it's easy to localize, but isn't very accurate + if self.voc_match(query, "MusicKeyword", lang=lang): + # NOTE - before movie to handle "{movie_name} soundtrack" + return MediaType.MUSIC, 0.6 + elif self.voc_match(query, "MovieKeyword", lang=lang): + if self.voc_match(query, "ShortKeyword", lang=lang): + return MediaType.SHORT_FILM, 0.7 + elif self.voc_match(query, "SilentKeyword", lang=lang): + return MediaType.SILENT_MOVIE, 0.7 + elif self.voc_match(query, "BWKeyword", lang=lang): + return MediaType.BLACK_WHITE_MOVIE, 0.7 + return MediaType.MOVIE, 0.6 + elif self.voc_match(query, "DocumentaryKeyword", lang=lang): + return MediaType.DOCUMENTARY, 0.6 + elif self.voc_match(query, "AudioBookKeyword", lang=lang): + return MediaType.AUDIOBOOK, 0.6 + elif self.voc_match(query, "NewsKeyword", lang=lang): + return MediaType.NEWS, 0.6 + elif self.voc_match(query, "AnimeKeyword", lang=lang): + return MediaType.ANIME, 0.6 + elif self.voc_match(query, "CartoonKeyword", lang=lang): + return MediaType.CARTOON, 0.6 + elif self.voc_match(query, "PodcastKeyword", lang=lang): + return MediaType.PODCAST, 0.6 + elif self.voc_match(query, "TVKeyword", lang=lang): + return MediaType.TV, 0.6 + elif self.voc_match(query, "SeriesKeyword", lang=lang): + return MediaType.VIDEO_EPISODES, 0.6 + elif self.voc_match(query, "AudioDramaKeyword", lang=lang): + # NOTE - before "radio" to allow "radio theatre" + return MediaType.RADIO_THEATRE, 0.6 + elif self.voc_match(query, "RadioKeyword", lang=lang): + return MediaType.RADIO, 0.6 + elif self.voc_match(query, "ComicBookKeyword", lang=lang): + return MediaType.VISUAL_STORY, 0.4 + elif self.voc_match(query, "GameKeyword", lang=lang): + return MediaType.GAME, 0.4 + elif self.voc_match(query, "ADKeyword", lang=lang): + return MediaType.AUDIO_DESCRIPTION, 0.4 + elif self.voc_match(query, "ASMRKeyword", lang=lang): + return MediaType.ASMR, 0.4 + elif self.voc_match(query, "AdultKeyword", lang=lang): + if self.voc_match(query, "CartoonKeyword", lang=lang) or \ + self.voc_match(query, "AnimeKeyword", lang=lang) or \ + self.voc_match(query, "HentaiKeyword", lang=lang): + return MediaType.HENTAI, 0.4 + elif self.voc_match(query, "AudioKeyword", lang=lang) or \ + self.voc_match(query, "ASMRKeyword", lang=lang): + return MediaType.ADULT_AUDIO, 0.4 + return MediaType.ADULT, 0.4 + elif self.voc_match(query, "HentaiKeyword", lang=lang): + return MediaType.HENTAI, 0.4 + elif self.voc_match(query, "VideoKeyword", lang=lang): + return MediaType.VIDEO, 0.4 + elif self.voc_match(query, "AudioKeyword", lang=lang): + return MediaType.AUDIO, 0.4 + return MediaType.GENERIC, 0.0 def classify_media(self, query: str, lang: str) -> Tuple[MediaType, float]: """ determine what media type is being requested """ + # using a trained classifier (Experimental) + if self.config.get("experimental_media_classifier", True): + try: + if lang.startswith("en"): + clf: SklearnOVOSClassifier = self._media_en_clf[0] + featurizer: OCPFeaturizer = self._media_en_clf[1] + else: + clf: SklearnOVOSClassifier = self._media_clf[0] + featurizer: OCPFeaturizer = self._media_clf[1] + X = featurizer.transform([query]) + preds = clf.predict_labels(X)[0] + label = max(preds, key=preds.get) + prob = float(round(preds[label], 3)) + LOG.info(f"OVOSCommonPlay MediaType prediction: {label} confidence: {prob}") + LOG.debug(f" utterance: {query}") + if prob < self.config.get("classifier_threshold", 0.4): + LOG.info("ignoring MediaType classifier, low confidence prediction") + return MediaType.GENERIC, prob + else: + return OCPFeaturizer.label2media(label), prob + except: + LOG.exception(f"OCP classifier exception: {query}") + return self.voc_match_media(query, lang) - if lang.startswith("en"): - clf: SklearnOVOSClassifier = self._media_en_clf[0] - featurizer: OCPFeaturizer = self._media_en_clf[1] - else: - clf: SklearnOVOSClassifier = self._media_clf[0] - featurizer: OCPFeaturizer = self._media_clf[1] - - try: - X = featurizer.transform([query]) - preds = clf.predict_labels(X)[0] - label = max(preds, key=preds.get) - prob = float(round(preds[label], 3)) - LOG.info(f"OVOSCommonPlay MediaType prediction: {label} confidence: {prob}") - LOG.debug(f" utterance: {query}") - if prob < self.config.get("classifier_threshold", 0.4): - LOG.info("ignoring MediaType classifier, low confidence prediction") - return MediaType.GENERIC, prob - else: - return self.label2media(label), prob - except: - LOG.exception(f"OCP classifier exception: {query}") - return MediaType.GENERIC, 0.0 - - def is_ocp_query(self, query: str, lang: str) -> Tuple[MediaType, float]: + def is_ocp_query(self, query: str, lang: str) -> Tuple[bool, float]: """ determine if a playback question is being asked""" - - if lang.startswith("en"): - clf: SklearnOVOSClassifier = self._binary_en_clf[0] - featurizer: OCPFeaturizer = self._binary_en_clf[1] - else: - clf: SklearnOVOSClassifier = self._binary_clf[0] - featurizer: OCPFeaturizer = self._binary_clf[1] - - X = featurizer.transform([query]) - preds = clf.predict_labels(X)[0] - label = max(preds, key=preds.get) - prob = round(preds[label], 3) - LOG.info(f"OVOSCommonPlay prediction: {label} confidence: {prob}") - LOG.debug(f" utterance: {query}") - return label == "OCP", float(prob) + if self.config.get("experimental_binary_classifier", True): + try: + # TODO - train a single multilingual classifier + if lang.startswith("en"): + clf: SklearnOVOSClassifier = self._binary_en_clf[0] + featurizer: OCPFeaturizer = self._binary_en_clf[1] + else: + clf: SklearnOVOSClassifier = self._binary_clf[0] + featurizer: OCPFeaturizer = self._binary_clf[1] + + X = featurizer.transform([query]) + preds = clf.predict_labels(X)[0] + label = max(preds, key=preds.get) + prob = round(preds[label], 3) + LOG.info(f"OVOSCommonPlay prediction: {label} confidence: {prob}") + LOG.debug(f" utterance: {query}") + return label == "OCP", float(prob) + except: + LOG.exception("OCP binary classifier failure") + m, p = self.voc_match_media(query, lang) + return m != MediaType.GENERIC, p def _should_resume(self, phrase: str, lang: str, message: Optional[Message] = None) -> bool: """ @@ -1600,3 +1535,141 @@ def cps2media(res: dict, media_type=MediaType.GENERIC) -> Tuple[MediaEntry, dict match_confidence=res["conf"] * 100, skill_id=res["skill_id"]) return entry, res['callback_data'] + + +class OCPFeaturizer: + """used by the experimental media type classifier, + API should be considered unstable""" + # ignore_list accounts for "noise" keywords in the csv file + ocp_keywords = KeywordFeaturesVectorizer(ignore_list=["play", "stop"]) + # defined at training time + _clf_labels = ['ad_keyword', 'album_name', 'anime_genre', 'anime_name', 'anime_streaming_service', + 'artist_name', 'asmr_keyword', 'asmr_trigger', 'audio_genre', 'audiobook_narrator', + 'audiobook_streaming_service', 'book_author', 'book_genre', 'book_name', + 'bw_movie_name', 'cartoon_genre', 'cartoon_name', 'cartoon_streaming_service', + 'comic_name', 'comic_streaming_service', 'comics_genre', 'country_name', + 'documentary_genre', 'documentary_name', 'documentary_streaming_service', + 'film_genre', 'film_studio', 'game_genre', 'game_name', 'gaming_console_name', + 'generic_streaming_service', 'hentai_name', 'hentai_streaming_service', + 'media_type_adult', 'media_type_adult_audio', 'media_type_anime', 'media_type_audio', + 'media_type_audiobook', 'media_type_bts', 'media_type_bw_movie', 'media_type_cartoon', + 'media_type_documentary', 'media_type_game', 'media_type_hentai', 'media_type_movie', + 'media_type_music', 'media_type_news', 'media_type_podcast', 'media_type_radio', + 'media_type_radio_theatre', 'media_type_short_film', 'media_type_silent_movie', + 'media_type_sound', 'media_type_trailer', 'media_type_tv', 'media_type_video', + 'media_type_video_episodes', 'media_type_visual_story', 'movie_actor', + 'movie_director', 'movie_name', 'movie_streaming_service', 'music_genre', + 'music_streaming_service', 'news_provider', 'news_streaming_service', + 'play_verb_audio', 'play_verb_video', 'playback_device', 'playlist_name', + 'podcast_genre', 'podcast_name', 'podcast_streaming_service', 'podcaster', + 'porn_film_name', 'porn_genre', 'porn_streaming_service', 'pornstar_name', + 'radio_drama_actor', 'radio_drama_genre', 'radio_drama_name', 'radio_program', + 'radio_program_name', 'radio_streaming_service', 'radio_theatre_company', + 'radio_theatre_streaming_service', 'record_label', 'series_name', + 'short_film_name', 'shorts_streaming_service', 'silent_movie_name', + 'song_name', 'sound_name', 'soundtrack_keyword', 'tv_channel', 'tv_genre', + 'tv_streaming_service', 'video_genre', 'video_streaming_service', 'youtube_channel'] + + def __init__(self, base_clf=None): + self.clf_feats = None + if base_clf: + if isinstance(base_clf, str): + clf_path = f"{dirname(__file__)}/models/{base_clf}.clf" + assert os.path.isfile(clf_path) + base_clf = SklearnOVOSClassifier.from_file(clf_path) + self.clf_feats = ClassifierProbaVectorizer(base_clf) + for l in self._clf_labels: # no samples, just to ensure featurizer has right number of feats + self.ocp_keywords.register_entity(l, []) + + @classmethod + def load_csv(cls, entity_csvs: list): + for csv in entity_csvs or []: + if not os.path.isfile(csv): + # check for bundled files + if os.path.isfile(f"{dirname(__file__)}/models/{csv}"): + csv = f"{dirname(__file__)}/models/{csv}" + else: + LOG.error(f"Requested OCP entities file does not exist? {csv}") + continue + OCPFeaturizer.ocp_keywords.load_entities(csv) + LOG.info(f"Loaded OCP keywords: {csv}") + + @classproperty + def labels(cls): + """ + in V0 classifier using synth dataset - this is tied to the classifier model""" + return cls._clf_labels + + @staticmethod + def label2media(label: str) -> MediaType: + if isinstance(label, MediaType): + return label + if label == "ad": + mt = MediaType.AUDIO_DESCRIPTION + elif label == "adult": + mt = MediaType.ADULT + elif label == "adult_asmr": + mt = MediaType.ADULT_AUDIO + elif label == "anime": + mt = MediaType.ANIME + elif label == "audio": + mt = MediaType.AUDIO + elif label == "asmr": + mt = MediaType.ASMR + elif label == "audiobook": + mt = MediaType.AUDIOBOOK + elif label == "bts": + mt = MediaType.BEHIND_THE_SCENES + elif label == "bw_movie": + mt = MediaType.BLACK_WHITE_MOVIE + elif label == "cartoon": + mt = MediaType.CARTOON + elif label == "comic": + mt = MediaType.VISUAL_STORY + elif label == "documentary": + mt = MediaType.DOCUMENTARY + elif label == "game": + mt = MediaType.GAME + elif label == "hentai": + mt = MediaType.HENTAI + elif label == "movie": + mt = MediaType.MOVIE + elif label == "music": + mt = MediaType.MUSIC + elif label == "news": + mt = MediaType.NEWS + elif label == "podcast": + mt = MediaType.PODCAST + elif label == "radio": + mt = MediaType.RADIO + elif label == "radio_drama": + mt = MediaType.RADIO_THEATRE + elif label == "series": + mt = MediaType.VIDEO_EPISODES + elif label == "short_film": + mt = MediaType.SHORT_FILM + elif label == "silent_movie": + mt = MediaType.SILENT_MOVIE + elif label == "trailer": + mt = MediaType.TRAILER + elif label == "tv_channel": + mt = MediaType.TV + elif label == "video": + mt = MediaType.VIDEO + else: + LOG.error(f"bad label {label}") + mt = MediaType.GENERIC + return mt + + def transform(self, X): + if self.clf_feats: + vec = FeatureUnion([ + ("kw", self.ocp_keywords), + ("clf", self.clf_feats) + ]) + return vec.transform(X) + return self.ocp_keywords.transform(X) + + @classmethod + def extract_entities(cls, utterance) -> dict: + return cls.ocp_keywords._transformer.wordlist.extract(utterance) diff --git a/test/unittests/skills/test_ocp.py b/test/unittests/skills/test_ocp.py index 364b71a21496..bff04560e201 100644 --- a/test/unittests/skills/test_ocp.py +++ b/test/unittests/skills/test_ocp.py @@ -1,14 +1,15 @@ import os.path import unittest from unittest.mock import patch, Mock -import ovos_core.intent_services.ocp_service -from ovos_core.intent_services.ocp_service import OCPFeaturizer, OCPPipelineMatcher -from ovos_classifiers.skovos.classifier import SklearnOVOSClassifier -from ovos_classifiers.skovos.features import ClassifierProbaVectorizer, KeywordFeaturesVectorizer + +from ovos_classifiers.skovos.features import ClassifierProbaVectorizer from sklearn.pipeline import FeatureUnion -from ovos_utils.log import LOG + +import ovos_core.intent_services.ocp_service from ovos_bus_client.message import Message from ovos_core.intent_services.ocp_service import MediaType +from ovos_core.intent_services.ocp_service import OCPFeaturizer, OCPPipelineMatcher +from ovos_utils.log import LOG class TestOCPFeaturizer(unittest.TestCase): @@ -40,13 +41,82 @@ def test_transform(self, mock_transform): self.assertEqual(result, 'mock_transform_result') +class TestOCPPipelineNoClassifierMatcher(unittest.TestCase): + + def setUp(self): + config = { + "experimental_media_classifier": False, + "experimental_binary_classifier": False, + "entity_csvs": [ + os.path.dirname(ovos_core.intent_services.ocp_service.__file__) + "/models/ocp_entities_v0.csv" + ]} + self.ocp = OCPPipelineMatcher(config=config) + + def test_match_high(self): + result = self.ocp.match_high(["play metallica"], "en-us") + self.assertIsNotNone(result) + self.assertEqual(result.intent_service, 'OCP_intents') + self.assertEqual(result.intent_type, 'ocp:play') + + def test_match_high_with_invalid_input(self): + result = self.ocp.match_high(["put on some music"], "en-us") + self.assertIsNone(result) + + def test_match_medium(self): + result = self.ocp.match_medium(["put on some movie"], "en-us") + self.assertIsNotNone(result) + self.assertEqual(result.intent_service, 'OCP_media') + self.assertEqual(result.intent_type, 'ocp:play') + + def test_match_medium_with_invalid_input(self): + result = self.ocp.match_medium(["i wanna hear metallica"], "en-us") + self.assertIsNone(result) + + def test_match_fallback(self): + result = self.ocp.match_fallback(["i want music"], "en-us") + self.assertIsNotNone(result) + self.assertEqual(result.intent_service, 'OCP_fallback') + self.assertEqual(result.intent_type, 'ocp:play') + + def test_match_fallback_with_invalid_input(self): + result = self.ocp.match_fallback(["do the thing"], "en-us") + self.assertIsNone(result) + + def test_predict(self): + self.assertTrue(self.ocp.is_ocp_query("play a song", "en-us")[0]) + self.assertTrue(self.ocp.is_ocp_query("play a movie", "en-us")[0]) + self.assertTrue(self.ocp.is_ocp_query("play a podcast", "en-us")[0]) + self.assertFalse(self.ocp.is_ocp_query("tell me a joke", "en-us")[0]) + self.assertFalse(self.ocp.is_ocp_query("who are you", "en-us")[0]) + self.assertFalse(self.ocp.is_ocp_query("you suck", "en-us")[0]) + + def test_predict_prob(self): + noise = "hglisjerhksrtjhdgsf" + self.assertEqual(self.ocp.classify_media(f"play {noise} music", "en-us")[0], MediaType.MUSIC) + self.assertIsInstance(self.ocp.classify_media(f"play music {noise}", "en-us")[1], float) + self.assertEqual(self.ocp.classify_media(f"play {noise} movie soundtrack", "en-us")[0], MediaType.MUSIC) + self.assertEqual(self.ocp.classify_media(f"play movie {noise}", "en-us")[0], MediaType.MOVIE) + self.assertEqual(self.ocp.classify_media(f"play silent {noise} movie", "en-us")[0], MediaType.SILENT_MOVIE) + self.assertEqual(self.ocp.classify_media(f"play {noise} black and white movie", "en-us")[0], + MediaType.BLACK_WHITE_MOVIE) + self.assertEqual(self.ocp.classify_media(f"play short {noise} film", "en-us")[0], MediaType.SHORT_FILM) + self.assertEqual(self.ocp.classify_media(f"play cartoons {noise}", "en-us")[0], MediaType.CARTOON) + self.assertEqual(self.ocp.classify_media(f"play {noise} episode", "en-us")[0], MediaType.VIDEO_EPISODES) + self.assertEqual(self.ocp.classify_media(f"play {noise} podcast", "en-us")[0], MediaType.PODCAST) + self.assertEqual(self.ocp.classify_media(f"play {noise} book", "en-us")[0], MediaType.AUDIOBOOK) + self.assertEqual(self.ocp.classify_media(f"play radio {noise} FM", "en-us")[0], MediaType.RADIO) + self.assertEqual(self.ocp.classify_media(f"read {noise}", "en-us")[0], MediaType.AUDIOBOOK) + class TestOCPPipelineMatcher(unittest.TestCase): def setUp(self): - config = {"entity_csvs": [ - os.path.dirname(ovos_core.intent_services.ocp_service.__file__) + "/models/ocp_entities_v0.csv" - ]} + config = { + "experimental_media_classifier": True, + "experimental_binary_classifier": True, + "entity_csvs": [ + os.path.dirname(ovos_core.intent_services.ocp_service.__file__) + "/models/ocp_entities_v0.csv" + ]} self.ocp = OCPPipelineMatcher(config=config) def test_match_high(self): @@ -116,6 +186,5 @@ def test_predict_prob_with_unknown_entity(self): self.assertEqual(self.ocp.classify_media("play klownevilus", "en-us")[0], MediaType.MOVIE) - if __name__ == '__main__': unittest.main() diff --git a/translations/en-us/vocabs.json b/translations/en-us/vocabs.json index 4601444e57ed..7e12c595266e 100644 --- a/translations/en-us/vocabs.json +++ b/translations/en-us/vocabs.json @@ -6,6 +6,17 @@ "only video", "video only" ], + "AudioKeyword.voc": [ + "audio", + "sound" + ], + "HentaiKeyword.voc": [ + "hentai" + ], + "PodcastKeyword.voc": [ + "podcast", + "podcasts" + ], "Play.voc": [ "search", "read", @@ -19,10 +30,84 @@ "only sound", "sound only" ], + "NewsKeyword.voc": [ + "news" + ], + "GameKeyword.voc": [ + "game" + ], + "ADKeyword.voc": [ + "audio description", + "movie description" + ], + "TrailerKeyword.voc": [ + "trailer" + ], + "MovieKeyword.voc": [ + "movie", + "film" + ], + "CartoonKeyword.voc": [ + "cartoon", + "cartoons" + ], + "VideoKeyword.voc": [ + "video" + ], + "ASMRKeyword.voc": [ + "ASMR", + "autonomous sensory meridian response" + ], + "DocumentaryKeyword.voc": [ + "documentary", + "documentaries" + ], + "SeriesKeyword.voc": [ + "series", + "tv show", + "episode", + "episodes" + ], + "BWKeyword.voc": [ + "black and white", + "monochrome" + ], + "AdultKeyword.voc": [ + "porn", + "18 plus", + "pornography" + ], + "RadioKeyword.voc": [ + "radio", + "internet radio" + ], + "AudioDramaKeyword.voc": [ + "drama", + "theatre" + ], "Resume.voc": [ "resume", "unpause" ], + "ComicBookKeyword.voc": [ + "animated comic", + "comic", + "visual story" + ], + "TVKeyword.voc": [ + "TV", + "television", + "channel" + ], + "AnimeKeyword.voc": [ + "anime" + ], + "MusicKeyword.voc": [ + "music", + "song", + "soundtrack", + "sound track" + ], "QuestionWord.voc": [ "who", "what", @@ -31,5 +116,19 @@ "when", "question", "tell me about" + ], + "AudioBookKeyword.voc": [ + "book", + "books", + "read", + "audiobook", + "audiobooks", + "narrate" + ], + "SilentKeyword.voc": [ + "silent" + ], + "ShortKeyword.voc": [ + "short" ] }