From fd04fefbc61527de1c28804d98aff9cb87cbf66b Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 3 Jul 2024 17:39:36 -0400 Subject: [PATCH] First attempt on normalization --- .../MLSentenceTransformertPostProcessor.php | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Plugin/StrawberryRunnersPostProcessor/MLSentenceTransformertPostProcessor.php b/src/Plugin/StrawberryRunnersPostProcessor/MLSentenceTransformertPostProcessor.php index d6775a3..9086582 100644 --- a/src/Plugin/StrawberryRunnersPostProcessor/MLSentenceTransformertPostProcessor.php +++ b/src/Plugin/StrawberryRunnersPostProcessor/MLSentenceTransformertPostProcessor.php @@ -113,17 +113,22 @@ protected function runTextMLfromJSON($io, NlpClient $nlpClient): \stdClass setlocale(LC_CTYPE, 'en_US.UTF-8'); if (isset($io->input->{$input_property})) { - $page_text = $io->input->{$input_property}->plaintext ?? NULL; + // depending on the sources of $io->input->{$input_property}. + // If generated/enqueued directly by a parent or recycled from pre-generated data found at the SBflavor storage + // this might be either an object or an array. + // So we are going to normalize here + $input_normalized = (object) $io->input->{$input_property}; + $page_text = $input_normalized->plaintext ?? NULL; if ($page_text) { $labels = []; $output->plugin = NULL; $labels = []; $ML = $this->callTextML($page_text, false); $output->searchapi['vector_384'] = isset($ML['sentence_transformer']['vector']) && is_array($ML['sentence_transformer']['vector']) && count($ML['sentence_transformer']['vector']) == 384 ? $ML['sentence_transformer']['vector'] : NULL; - $output->searchapi['metadata'] = $io->input->{$input_property}->metadata ?? []; + $output->searchapi['metadata'] = $input_normalized->metadata ?? []; $output->searchapi['service_md5'] = isset($ML['mobilenet']['modelinfo']) ? md5(json_encode($ML['mobilenet']['modelinfo'])) : NULL; $output->searchapi['plaintext'] = $page_text ?? ''; - $output->searchapi['fulltext'] = $io->input->{$input_property}->fulltext ?? []; + $output->searchapi['fulltext'] = $input_normalized->fulltext ?? []; $output->searchapi['processlang'] = $file_languages; $output->searchapi['ts'] = date("c"); $output->searchapi['label'] = $this->t("Sentence Transformer ML Text Embeddings & Vectors") . ' ' . $sequence_number;