From 1b3aa19fc8c94183d89be0ac14e4438698d8fc11 Mon Sep 17 00:00:00 2001
From: isc-lperezra <lperezra@intersystems.com>
Date: Fri, 20 Dec 2024 23:09:03 +0100
Subject: [PATCH] Major change, added LLM with Ollama

---
 docker-compose.yml                            | 15 +++++-
 .../src/app/analyzer/analyzer.component.ts    |  6 +--
 iris/Dockerfile                               |  2 +-
 iris/shared/hl7/messagesa01_es.hl7            |  2 +-
 ollama/Dockerfile                             |  6 +++
 ollama/entrypoint.sh                          | 14 ++++++
 requirements.txt                              |  2 +-
 src/ENCODER/BP/AnalyzeTextProcess.cls         | 46 ++++++-------------
 src/ENCODER/Message/ConfigureRequest.cls      |  4 ++
 9 files changed, 57 insertions(+), 40 deletions(-)
 create mode 100644 ollama/Dockerfile
 create mode 100644 ollama/entrypoint.sh
 create mode 100644 src/ENCODER/Message/ConfigureRequest.cls

diff --git a/docker-compose.yml b/docker-compose.yml
index 4adf865..c5f594c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -17,6 +17,8 @@ services:
     command: --check-caps false --ISCAgent false
     mem_limit: 30G
     memswap_limit: 32G
+    depends_on:
+      - ollama
 
   # web gateway container
   webgateway:
@@ -49,4 +51,15 @@ services:
       - 80:80
       - 443:443
     depends_on:
-      - iris
\ No newline at end of file
+      - iris
+
+  ## llm locally installed
+  ollama:
+    build:
+      context: .
+      dockerfile: ollama/Dockerfile
+    container_name: ollama
+    volumes:
+    - ./ollama/shared:/ollama-shared
+    ports:
+      - "11434:11434"
\ No newline at end of file
diff --git a/encoder-ui/src/app/analyzer/analyzer.component.ts b/encoder-ui/src/app/analyzer/analyzer.component.ts
index bf635ab..ba53af6 100644
--- a/encoder-ui/src/app/analyzer/analyzer.component.ts
+++ b/encoder-ui/src/app/analyzer/analyzer.component.ts
@@ -53,10 +53,8 @@ export class AnalyzerComponent {
     var textHTML = this.textToAnalyze?.value;
     var textOriginal = textHTML;
     var textToProcess = this.textToAnalyze?.value.split(".").filter(Boolean);
-    var piecedTextToProcess: any[] = [];
-    for (var index in textToProcess){
-      piecedTextToProcess = piecedTextToProcess.concat(textToProcess[index].split(","))
-    }
+    var piecedTextToProcess: any[] = [textOriginal];
+    
     var forReading = 100/(piecedTextToProcess.length);
     this.totalReceived = 0;
     this.error = false;
diff --git a/iris/Dockerfile b/iris/Dockerfile
index caf5b19..41561fd 100644
--- a/iris/Dockerfile
+++ b/iris/Dockerfile
@@ -6,7 +6,7 @@ USER root
 WORKDIR /opt/irisapp
 RUN chown -R irisowner:irisowner /opt/irisapp
 
-RUN apt-get update && apt-get install -y python3
+RUN apt-get update && apt-get install -y python3 && apt-get -y install curl
 
 # install required packages
 COPY --chown=$ISC_PACKAGE_MGRUSER:$ISC_PACKAGE_IRISGROUP /requirements.txt /
diff --git a/iris/shared/hl7/messagesa01_es.hl7 b/iris/shared/hl7/messagesa01_es.hl7
index a0e90ff..086d056 100644
--- a/iris/shared/hl7/messagesa01_es.hl7
+++ b/iris/shared/hl7/messagesa01_es.hl7
@@ -2,7 +2,7 @@ MSH|^~\&|HIS|HULP|EMPI||||ADT^A01|592956|P|2.5.1
 EVN|A01|
 PID|||1556655212^^^SERMAS^SN~922210^^^HULP^PI||GARCÍA PÉREZ^JUAN^^^||20150403|M|||PASEO PEDRO ÁLVAREZ 195 1 CENTRO^^LEGANÉS^MADRID^28379^SPAIN||555283055^PRN^^JUAN.GARCIA@YAHOO.COM|||||||||||||||||N|
 PV1||N
-DG1|1|||Hipertensión gestacional||A||
+DG1|1|||Tendinopatía del supraespinoso||A||
 
 
 MSH|^~\&|HIS|HULP|EMPI||||ADT^A01|628547|P|2.5.1
diff --git a/ollama/Dockerfile b/ollama/Dockerfile
new file mode 100644
index 0000000..b3da95b
--- /dev/null
+++ b/ollama/Dockerfile
@@ -0,0 +1,6 @@
+FROM ollama/ollama:latest
+
+COPY /ollama/entrypoint.sh /
+RUN chmod +x /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
\ No newline at end of file
diff --git a/ollama/entrypoint.sh b/ollama/entrypoint.sh
new file mode 100644
index 0000000..c6caa9b
--- /dev/null
+++ b/ollama/entrypoint.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+echo "Starting Ollama server..."
+ollama serve &
+SERVE_PID=$!
+
+echo "Waiting for Ollama server to be active..."
+while ! ollama list | grep -q 'NAME'; do
+  sleep 1
+done
+
+ollama pull llama3.2
+
+wait $SERVE_PID
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index a6e93de..c3c099a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
 sentence-transformers
 numpy
 pandas
-spacy
\ No newline at end of file
+spacy
diff --git a/src/ENCODER/BP/AnalyzeTextProcess.cls b/src/ENCODER/BP/AnalyzeTextProcess.cls
index 3ca6ecd..bf8e07c 100644
--- a/src/ENCODER/BP/AnalyzeTextProcess.cls
+++ b/src/ENCODER/BP/AnalyzeTextProcess.cls
@@ -15,46 +15,28 @@ Method AnalyzeText(text As %String, analysisId As %String, language As %String)
     import sentence_transformers
     import iris
     import spacy
+    import requests
 
     try:
-        lematize = ''
-        if language == 'es':
-            lematize = 'es_dep_news_trf'
-        else:
-            lematize = 'en_core_web_md'
+        url = "http://ollama:11434/api/generate"
+        data = {
+            "model": "llama3.2",
+            "prompt": "Extrae en formato CSV el texto literal de los diagnósticos encontrados en el paciente del siguiente texto sin mostrar diagnósticos que no se observen ni para los que no hay hallazgos patológicos, devuelve únicamente el CSV, evita fórmulas de cortesía: "+text,
+            "stream": False
+        }
+        response = requests.post(url, json=data)
+        analyzedText = response.json()
         
-        iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", "Loading lematizer")
-        nlp = spacy.load(lematize)
-
         model = sentence_transformers.SentenceTransformer('/iris-shared/model/')
-        phrases = text.split(",")
+        phrases = analyzedText['response'].replace('"',"").split(",")
         sqlsentence = ""
         iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", "Starting process")
         for phraseToAnalyze in phrases :
             if phraseToAnalyze != "":
-                doc = nlp(phraseToAnalyze)
-                phrase = ""
-                phrases = []
-                for token in doc:
-                    if (token.pos_ == "NOUN" or token.pos_ == "ADJ" or token.pos_ == "DET"):
-                        if phrase == "":
-                            phrase = token.text
-                        else :
-                            phrase += " "+token.text
-                    else :
-                        if phrase != "" and len(phrase.split(" ")) > 1:
-                            phrases.append(phrase)
-                        phrase = ""
-                embeddings = model.encode(phrases, normalize_embeddings=True)
-                embeddingList = embeddings.tolist()
-                i = 0
-                for text in phrases:
-                    iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", text)
-                    sqlsentence = "INSERT INTO ENCODER_Object.TextMatches (CodeId, Description, Similarity, AnalysisId, RawText) SELECT TOP 50 * FROM (SELECT CodeId, Description, VECTOR_DOT_PRODUCT(VectorDescription, TO_VECTOR('"+str(embeddingList[i])+"', DECIMAL)) AS Similarity, '"+analysisId+"', '"+text+"' FROM ENCODER_Object.Codes) WHERE Similarity > 0.6 ORDER BY Similarity DESC"
-                    iris.cls("ENCODER.Utils.Manager").ExecuteInsertQuery(sqlsentence)
-                    iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", "Sentence finished")
-                    
-                    i = i + 1
+                embedding = model.encode(phraseToAnalyze, normalize_embeddings=True).tolist()
+                sqlsentence = "INSERT INTO ENCODER_Object.TextMatches (CodeId, Description, Similarity, AnalysisId, RawText) SELECT TOP 50 * FROM (SELECT CodeId, Description, VECTOR_DOT_PRODUCT(VectorDescription, TO_VECTOR('"+str(embedding)+"', DECIMAL)) AS Similarity, '"+analysisId+"', '"+phraseToAnalyze+"' FROM ENCODER_Object.Codes) ORDER BY Similarity DESC"                    
+                iris.cls("ENCODER.Utils.Manager").ExecuteInsertQuery(sqlsentence)
+                iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", sqlsentence)                    
     except Exception as err:
         iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", repr(err))
         return repr(err)
diff --git a/src/ENCODER/Message/ConfigureRequest.cls b/src/ENCODER/Message/ConfigureRequest.cls
new file mode 100644
index 0000000..9e7a812
--- /dev/null
+++ b/src/ENCODER/Message/ConfigureRequest.cls
@@ -0,0 +1,4 @@
+Class ENCODER.Message.ConfigureRequest Extends Ens.Request
+{
+    Property Model As %String;
+}
\ No newline at end of file