From c88f772a81a99629f767229e0198a938cde2f9a5 Mon Sep 17 00:00:00 2001 From: Mathieu Lecarme Date: Thu, 30 Jan 2020 21:21:58 +0100 Subject: [PATCH 1/5] Split all the things. --- sonic/client.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/sonic/client.py b/sonic/client.py index bc6a64a..16b27ee 100644 --- a/sonic/client.py +++ b/sonic/client.py @@ -487,6 +487,21 @@ def _execute_command_async(self, cmd, *args): self.pool.release(active) return resp + def _split(self, text, bloat): + size = self.bufsize - bloat + 20 + print(self.bufsize) + if len(text) <= size: + yield text + return + while len(text) > 0: + # Split words with violence + yield text[:size] + if len(text) > size: + text = text[size:] + else: + text = '' + + class CommonCommandsMixin: """Mixin of the commands used by all sonic channels.""" @@ -532,8 +547,10 @@ def push(self, collection: str, bucket: str, object: str, text: str, lang: str=N """ lang = "LANG({})".format(lang) if lang else '' - text = quote_text(text) - return self._execute_command("PUSH", collection, bucket, object, text, lang) + for group in self._split(text, len("".join(["PUSH", collection, bucket]))): + text = quote_text(text) + resp = self._execute_command("PUSH", collection, bucket, object, text, lang) + return resp def pop(self, collection: str, bucket: str, object: str, text: str): """Pop search data from the index From dfb53fc28a2bdc11f927fea813de8b9b5c208024 Mon Sep 17 00:00:00 2001 From: Mathieu Lecarme Date: Thu, 30 Jan 2020 22:10:00 +0100 Subject: [PATCH 2/5] range is better. --- sonic/client.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/sonic/client.py b/sonic/client.py index 16b27ee..4dfecc1 100644 --- a/sonic/client.py +++ b/sonic/client.py @@ -489,17 +489,8 @@ def _execute_command_async(self, cmd, *args): def _split(self, text, bloat): size = self.bufsize - bloat + 20 - print(self.bufsize) - if len(text) <= size: - yield text - return - while len(text) > 0: - # Split words with violence - yield text[:size] - if len(text) > size: - text = text[size:] - else: - text = '' + for i in range(0, len(text), size): + yield text[i:i + size] class CommonCommandsMixin: From d078cfc71fc1f3e224c4d3469b9f6ffbd67329ab Mon Sep 17 00:00:00 2001 From: Mathieu Lecarme Date: Thu, 30 Jan 2020 23:03:17 +0100 Subject: [PATCH 3/5] Fix: use buffsize, from the server. --- sonic/client.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sonic/client.py b/sonic/client.py index 4dfecc1..bccc4e2 100644 --- a/sonic/client.py +++ b/sonic/client.py @@ -118,7 +118,7 @@ def _parse_protocol_version(text): return matches[0] -def _parse_buffer_size(text): +def _parse_buffer_size(text: str) -> int: """Extracts buffering from response message Arguments: @@ -128,13 +128,13 @@ def _parse_buffer_size(text): ValueError -- Raised when s doesn't have buffering information Returns: - str -- buffering. + int -- buffer size. """ - matches = re.findall("buffer\((\w+)\)", text) + matches = re.findall("buffer\(([0-9]+)\)", text) if not matches: raise ValueError("{} doesn't contain buffer(NUMBER)".format(text)) - return matches[0] + return int(matches[0]) def _get_async_response_id(text): @@ -463,6 +463,7 @@ def _execute_command(self, cmd, *args): str|object -- result of execution """ active = self.get_active_connection() + self.bufsize = active.bufsize try: res = active._execute_command(cmd, *args) finally: From ca7cb6e52c88c4c736d8eeb64ee56d9a3790ae92 Mon Sep 17 00:00:00 2001 From: Mathieu Lecarme Date: Sat, 1 Feb 2020 19:46:46 +0100 Subject: [PATCH 4/5] Split on spaces or punctuation. --- sonic/client.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/sonic/client.py b/sonic/client.py index bccc4e2..d93a714 100644 --- a/sonic/client.py +++ b/sonic/client.py @@ -488,10 +488,31 @@ def _execute_command_async(self, cmd, *args): self.pool.release(active) return resp - def _split(self, text, bloat): - size = self.bufsize - bloat + 20 - for i in range(0, len(text), size): - yield text[i:i + size] + +def split(text: str, size: int, sep:str=" "): + if sep != " ": + text = text.translate(str.maketrans(dict((a, " ") for a in sep))) + poz = 0 + while poz < len(text): + chunk = text[poz:poz+size] + if len(chunk) < size: # this is the end + yield chunk + return + if (text[poz+size-1] not in sep) and (text[poz+size] not in sep): + x = chunk.rfind(" ") + if x != -1: # Can split on space + poz += x + yield chunk[:x] + continue + poz += size + yield chunk + + +def test_split(): + txt = "The lazy dog jump over the wizard, that's all." + groups = list(split(txt, 10, " ,;.:")) + print(groups) + assert len(groups) == 6 class CommonCommandsMixin: @@ -539,7 +560,9 @@ def push(self, collection: str, bucket: str, object: str, text: str, lang: str=N """ lang = "LANG({})".format(lang) if lang else '' - for group in self._split(text, len("".join(["PUSH", collection, bucket]))): + for group in split(text, + self.bufsize - len("".join(["PUSH", collection, bucket])), + ";.:,\n\r\t"): text = quote_text(text) resp = self._execute_command("PUSH", collection, bucket, object, text, lang) return resp @@ -740,6 +763,7 @@ def test_control(): if __name__ == "__main__": + test_split() test_ingest() test_search() test_control() From 61fecf49c4d38379a26f596d2ca23ab25711e185 Mon Sep 17 00:00:00 2001 From: Mathieu Lecarme Date: Sat, 1 Feb 2020 20:46:30 +0100 Subject: [PATCH 5/5] Fix: already translated. --- sonic/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic/client.py b/sonic/client.py index d93a714..3db3b8c 100644 --- a/sonic/client.py +++ b/sonic/client.py @@ -498,7 +498,7 @@ def split(text: str, size: int, sep:str=" "): if len(chunk) < size: # this is the end yield chunk return - if (text[poz+size-1] not in sep) and (text[poz+size] not in sep): + if (text[poz+size-1] != " ") and (text[poz+size] != " "): x = chunk.rfind(" ") if x != -1: # Can split on space poz += x