From ecec13edaf89a971a8b496ae3ae659a219148edb Mon Sep 17 00:00:00 2001
From: sekarpdkt <sekarpdkt@gmail.com>
Date: Fri, 30 Nov 2018 16:10:14 +0530
Subject: [PATCH 1/2] Created two functions to decouple decoder cofiguration
 from decoding

I split `    def recognize_sphinx(self, audio_data, language="en-US", keyword_entries=None, grammar=None, show_all=False):` into two functions. First one will return decoder and second one will use the supplied decoder. This will increase the speed of decoding. Tested by using this code using `threaded_workersV2.py`

Note: I have not modified existing one. Just added two new functions
1. get_sphinx_decoder
2. recognize_sphinx_byDecoder
---
 speech_recognition/__init__.py | 101 +++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index c104390a..a672d428 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -743,6 +743,107 @@ def stopper(wait_for_stop=True):
         listener_thread.start()
         return stopper
 
+
+    def get_sphinx_decoder(self,  language="en-US", keyword_entries=None, grammar=None):
+        """
+        Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using CMU Sphinx.
+
+        The recognition language is determined by ``language``, an RFC5646 language tag like ``"en-US"`` or ``"en-GB"``, defaulting to US English. Out of the box, only ``en-US`` is supported. See `Notes on using `PocketSphinx <https://github.com/Uberi/speech_recognition/blob/master/reference/pocketsphinx.rst>`__ for information about installing other languages. This document is also included under ``reference/pocketsphinx.rst``. The ``language`` parameter can also be a tuple of filesystem paths, of the form ``(acoustic_parameters_directory, language_model_file, phoneme_dictionary_file)`` - this allows you to load arbitrary Sphinx models.
+
+        If specified, the keywords to search for are determined by ``keyword_entries``, an iterable of tuples of the form ``(keyword, sensitivity)``, where ``keyword`` is a phrase, and ``sensitivity`` is how sensitive to this phrase the recognizer should be, on a scale of 0 (very insensitive, more false negatives) to 1 (very sensitive, more false positives) inclusive. If not specified or ``None``, no keywords are used and Sphinx will simply transcribe whatever words it recognizes. Specifying ``keyword_entries`` is more accurate than just looking for those same keywords in non-keyword-based transcriptions, because Sphinx knows specifically what sounds to look for.
+
+        Sphinx can also handle FSG or JSGF grammars. The parameter ``grammar`` expects a path to the grammar file. Note that if a JSGF grammar is passed, an FSG grammar will be created at the same location to speed up execution in the next run. If ``keyword_entries`` are passed, content of ``grammar`` will be ignored.
+
+        Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the Sphinx ``pocketsphinx.pocketsphinx.Decoder`` object resulting from the recognition.
+
+        Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if there are any issues with the Sphinx installation.
+        """
+        assert isinstance(language, str) or (isinstance(language, tuple) and len(language) == 3), "``language`` must be a string or 3-tuple of Sphinx data file paths of the form ``(acoustic_parameters, language_model, phoneme_dictionary)``"
+        assert keyword_entries is None or all(isinstance(keyword, (type(""), type(u""))) and 0 <= sensitivity <= 1 for keyword, sensitivity in keyword_entries), "``keyword_entries`` must be ``None`` or a list of pairs of strings and numbers between 0 and 1"
+
+        # import the PocketSphinx speech recognition module
+        try:
+            from pocketsphinx import pocketsphinx, Jsgf, FsgModel
+
+        except ImportError:
+            raise RequestError("missing PocketSphinx module: ensure that PocketSphinx is set up correctly.")
+        except ValueError:
+            raise RequestError("bad PocketSphinx installation; try reinstalling PocketSphinx version 0.0.9 or better.")
+        if not hasattr(pocketsphinx, "Decoder") or not hasattr(pocketsphinx.Decoder, "default_config"):
+            raise RequestError("outdated PocketSphinx installation; ensure you have PocketSphinx version 0.0.9 or better.")
+
+        if isinstance(language, str):  # directory containing language data
+            language_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pocketsphinx-data", language)
+            if not os.path.isdir(language_directory):
+                raise RequestError("missing PocketSphinx language data directory: \"{}\"".format(language_directory))
+            acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model")
+            language_model_file = os.path.join(language_directory, "language-model.lm.bin")
+            phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict")
+        else:  # 3-tuple of Sphinx data file paths
+            acoustic_parameters_directory, language_model_file, phoneme_dictionary_file = language
+        if not os.path.isdir(acoustic_parameters_directory):
+            raise RequestError("missing PocketSphinx language model parameters directory: \"{}\"".format(acoustic_parameters_directory))
+        if not os.path.isfile(language_model_file):
+            raise RequestError("missing PocketSphinx language model file: \"{}\"".format(language_model_file))
+        if not os.path.isfile(phoneme_dictionary_file):
+            raise RequestError("missing PocketSphinx phoneme dictionary file: \"{}\"".format(phoneme_dictionary_file))
+
+        # create decoder object
+        config = pocketsphinx.Decoder.default_config()
+        config.set_string("-hmm", acoustic_parameters_directory)  # set the path of the hidden Markov model (HMM) parameter files
+        config.set_string("-lm", language_model_file)
+        config.set_string("-dict", phoneme_dictionary_file)
+        config.set_string("-logfn", os.devnull)  # disable logging (logging causes unwanted output in terminal)
+        decoder = pocketsphinx.Decoder(config)
+
+        # obtain recognition results
+        if keyword_entries is not None:  # explicitly specified set of keywords
+            with PortableNamedTemporaryFile("w") as f:
+                # generate a keywords file - Sphinx documentation recommendeds sensitivities between 1e-50 and 1e-5
+                f.writelines("{} /1e{}/\n".format(keyword, 100 * sensitivity - 110) for keyword, sensitivity in keyword_entries)
+                f.flush()
+
+                # perform the speech recognition with the keywords file (this is inside the context manager so the file isn;t deleted until we're done)
+                decoder.set_kws("keywords", f.name)
+                decoder.set_search("keywords")
+
+        elif grammar is not None:  # a path to a FSG or JSGF grammar
+            if not os.path.exists(grammar):
+                raise ValueError("Grammar '{0}' does not exist.".format(grammar))
+            grammar_path = os.path.abspath(os.path.dirname(grammar))
+            grammar_name = os.path.splitext(os.path.basename(grammar))[0]
+            fsg_path = "{0}/{1}.fsg".format(grammar_path, grammar_name)
+            if not os.path.exists(fsg_path):  # create FSG grammar if not available
+                jsgf = Jsgf(grammar)
+                rule = jsgf.get_rule("{0}.{0}".format(grammar_name))
+                fsg = jsgf.build_fsg(rule, decoder.get_logmath(), 7.5)
+                fsg.writefile(fsg_path)
+            else:
+                fsg = FsgModel(fsg_path, decoder.get_logmath(), 7.5)
+            decoder.set_fsg(grammar_name, fsg)
+            decoder.set_search(grammar_name)
+            
+        return decoder
+
+    def recognize_sphinx_byDecoder(self,decoder, audio_data, show_all=False):
+        
+        assert isinstance(audio_data, AudioData), "``audio_data`` must be audio data"
+
+        # obtain audio data
+        raw_data = audio_data.get_raw_data(convert_rate=16000, convert_width=2)  # the included language models require audio to be 16-bit mono 16 kHz in little-endian format
+        decoder.start_utt()  # begin utterance processing
+        decoder.process_raw(raw_data, False, True)  # process audio data with recognition enabled (no_search = False), as a full utterance (full_utt = True)
+        decoder.end_utt()  # stop utterance processing
+
+        if show_all: return decoder
+
+        # return results
+        hypothesis = decoder.hyp()
+        if hypothesis is not None: return hypothesis.hypstr
+        raise UnknownValueError()  # no transcriptions available
+        
+    
+    
     def recognize_sphinx(self, audio_data, language="en-US", keyword_entries=None, grammar=None, show_all=False):
         """
         Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using CMU Sphinx.

From b30b6295c4592550f54d3fa3da65e5238d88b2ea Mon Sep 17 00:00:00 2001
From: sekarpdkt <sekarpdkt@gmail.com>
Date: Fri, 30 Nov 2018 16:13:55 +0530
Subject: [PATCH 2/2] Faster decoding for sphinx by decoupling decoder creation
 from decoder

This is a POC for two new functions
---
 speech_recognition/threaded_workersV2.py | 89 ++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 speech_recognition/threaded_workersV2.py

diff --git a/speech_recognition/threaded_workersV2.py b/speech_recognition/threaded_workersV2.py
new file mode 100644
index 00000000..11abf4f2
--- /dev/null
+++ b/speech_recognition/threaded_workersV2.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+
+# NOTE: this example requires PyAudio because it uses the Microphone class
+
+from threading import Thread
+try:
+    from queue import Queue  # Python 3 import
+except ImportError:
+    from Queue import Queue  # Python 2 import
+
+import speech_recognition as sr
+
+
+r = sr.Recognizer()
+audio_queue = Queue()
+
+
+
+r.energy_threshold = 300  # minimum audio energy to consider for recording
+r.dynamic_energy_threshold = True
+r.dynamic_energy_adjustment_damping = 0.5
+r.dynamic_energy_ratio = 1.5
+r.pause_threshold = 0.05  # seconds of non-speaking audio before a phrase is considered complete
+r.operation_timeout = None  # seconds after an internal operation (e.g., an API request) starts before it times out, or ``None`` for no timeout
+
+r.phrase_threshold = 0.1  # minimum seconds of speaking audio before we consider the speaking audio a phrase - values below this are ignored (for filtering out clicks and pops)
+r.non_speaking_duration = 0.025  # seconds of non-speaking audio to keep on both sides of the recording
+
+
+myDecoder =r.get_sphinx_decoder(grammar='counting.gram');
+
+j=0
+def recognize_worker():
+    # this runs in a background thread
+    global j,myDecoder;
+    while True:
+        audio = audio_queue.get()  # retrieve the next audio processing job from the main thread
+        if audio is None: break  # stop processing if the main thread is done
+        """
+        # received audio data, now we'll recognize it using Google Speech Recognition
+        try:
+            # for testing purposes, we're just using the default API key
+            # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
+            # instead of `r.recognize_google(audio)`
+            print("Calling Google API")
+            print("Google Speech Recognition thinks you said " + r.recognize_google(audio))
+        except sr.UnknownValueError:
+            print("Google Speech Recognition could not understand audio")
+        except sr.RequestError as e:
+            print("Could not request results from Google Speech Recognition service; {0}".format(e))
+        """
+        try:
+            print("Sphinx recognition for \"one two three\" for counting grammar:")
+            #print(r.recognize_sphinx(audio, grammar='counting.gram'))
+            print(r.recognize_sphinx_byDecoder(myDecoder,audio));
+        except sr.UnknownValueError:
+            print("Sphinx could not understand audio")
+        except sr.RequestError as e:
+            print("Sphinx error; {0}".format(e))
+            
+
+        with open("microphone-results{0}.wav".format(j), "wb") as f:
+            j=j+1
+            #f.write(audio.get_wav_data())
+            
+        audio_queue.task_done()  # mark the audio processing job as completed in the queue
+
+
+# start a new thread to recognize audio, while this thread focuses on listening
+recognize_thread = Thread(target=recognize_worker)
+recognize_thread.daemon = True
+recognize_thread.start()
+with sr.Microphone() as source:
+    try:
+        while True:  # repeatedly listen for phrases and put the resulting audio on the audio processing job queue
+            audio_queue.put(r.listen(source))
+    except KeyboardInterrupt:  # allow Ctrl + C to shut down the program
+        pass
+
+audio_queue.join()  # block until all current audio processing jobs are done
+audio_queue.put(None)  # tell the recognize_thread to stop
+recognize_thread.join()  # wait for the recognize_thread to actually stop
+
+
+
+
+
+
+