From 1b3aa19fc8c94183d89be0ac14e4438698d8fc11 Mon Sep 17 00:00:00 2001 From: isc-lperezra Date: Fri, 20 Dec 2024 23:09:03 +0100 Subject: [PATCH] Major change, added LLM with Ollama --- docker-compose.yml | 15 +++++- .../src/app/analyzer/analyzer.component.ts | 6 +-- iris/Dockerfile | 2 +- iris/shared/hl7/messagesa01_es.hl7 | 2 +- ollama/Dockerfile | 6 +++ ollama/entrypoint.sh | 14 ++++++ requirements.txt | 2 +- src/ENCODER/BP/AnalyzeTextProcess.cls | 46 ++++++------------- src/ENCODER/Message/ConfigureRequest.cls | 4 ++ 9 files changed, 57 insertions(+), 40 deletions(-) create mode 100644 ollama/Dockerfile create mode 100644 ollama/entrypoint.sh create mode 100644 src/ENCODER/Message/ConfigureRequest.cls diff --git a/docker-compose.yml b/docker-compose.yml index 4adf865..c5f594c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,6 +17,8 @@ services: command: --check-caps false --ISCAgent false mem_limit: 30G memswap_limit: 32G + depends_on: + - ollama # web gateway container webgateway: @@ -49,4 +51,15 @@ services: - 80:80 - 443:443 depends_on: - - iris \ No newline at end of file + - iris + + ## llm locally installed + ollama: + build: + context: . + dockerfile: ollama/Dockerfile + container_name: ollama + volumes: + - ./ollama/shared:/ollama-shared + ports: + - "11434:11434" \ No newline at end of file diff --git a/encoder-ui/src/app/analyzer/analyzer.component.ts b/encoder-ui/src/app/analyzer/analyzer.component.ts index bf635ab..ba53af6 100644 --- a/encoder-ui/src/app/analyzer/analyzer.component.ts +++ b/encoder-ui/src/app/analyzer/analyzer.component.ts @@ -53,10 +53,8 @@ export class AnalyzerComponent { var textHTML = this.textToAnalyze?.value; var textOriginal = textHTML; var textToProcess = this.textToAnalyze?.value.split(".").filter(Boolean); - var piecedTextToProcess: any[] = []; - for (var index in textToProcess){ - piecedTextToProcess = piecedTextToProcess.concat(textToProcess[index].split(",")) - } + var piecedTextToProcess: any[] = [textOriginal]; + var forReading = 100/(piecedTextToProcess.length); this.totalReceived = 0; this.error = false; diff --git a/iris/Dockerfile b/iris/Dockerfile index caf5b19..41561fd 100644 --- a/iris/Dockerfile +++ b/iris/Dockerfile @@ -6,7 +6,7 @@ USER root WORKDIR /opt/irisapp RUN chown -R irisowner:irisowner /opt/irisapp -RUN apt-get update && apt-get install -y python3 +RUN apt-get update && apt-get install -y python3 && apt-get -y install curl # install required packages COPY --chown=$ISC_PACKAGE_MGRUSER:$ISC_PACKAGE_IRISGROUP /requirements.txt / diff --git a/iris/shared/hl7/messagesa01_es.hl7 b/iris/shared/hl7/messagesa01_es.hl7 index a0e90ff..086d056 100644 --- a/iris/shared/hl7/messagesa01_es.hl7 +++ b/iris/shared/hl7/messagesa01_es.hl7 @@ -2,7 +2,7 @@ MSH|^~\&|HIS|HULP|EMPI||||ADT^A01|592956|P|2.5.1 EVN|A01| PID|||1556655212^^^SERMAS^SN~922210^^^HULP^PI||GARCÍA PÉREZ^JUAN^^^||20150403|M|||PASEO PEDRO ÁLVAREZ 195 1 CENTRO^^LEGANÉS^MADRID^28379^SPAIN||555283055^PRN^^JUAN.GARCIA@YAHOO.COM|||||||||||||||||N| PV1||N -DG1|1|||Hipertensión gestacional||A|| +DG1|1|||Tendinopatía del supraespinoso||A|| MSH|^~\&|HIS|HULP|EMPI||||ADT^A01|628547|P|2.5.1 diff --git a/ollama/Dockerfile b/ollama/Dockerfile new file mode 100644 index 0000000..b3da95b --- /dev/null +++ b/ollama/Dockerfile @@ -0,0 +1,6 @@ +FROM ollama/ollama:latest + +COPY /ollama/entrypoint.sh / +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] \ No newline at end of file diff --git a/ollama/entrypoint.sh b/ollama/entrypoint.sh new file mode 100644 index 0000000..c6caa9b --- /dev/null +++ b/ollama/entrypoint.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +echo "Starting Ollama server..." +ollama serve & +SERVE_PID=$! + +echo "Waiting for Ollama server to be active..." +while ! ollama list | grep -q 'NAME'; do + sleep 1 +done + +ollama pull llama3.2 + +wait $SERVE_PID \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a6e93de..c3c099a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ sentence-transformers numpy pandas -spacy \ No newline at end of file +spacy diff --git a/src/ENCODER/BP/AnalyzeTextProcess.cls b/src/ENCODER/BP/AnalyzeTextProcess.cls index 3ca6ecd..bf8e07c 100644 --- a/src/ENCODER/BP/AnalyzeTextProcess.cls +++ b/src/ENCODER/BP/AnalyzeTextProcess.cls @@ -15,46 +15,28 @@ Method AnalyzeText(text As %String, analysisId As %String, language As %String) import sentence_transformers import iris import spacy + import requests try: - lematize = '' - if language == 'es': - lematize = 'es_dep_news_trf' - else: - lematize = 'en_core_web_md' + url = "http://ollama:11434/api/generate" + data = { + "model": "llama3.2", + "prompt": "Extrae en formato CSV el texto literal de los diagnósticos encontrados en el paciente del siguiente texto sin mostrar diagnósticos que no se observen ni para los que no hay hallazgos patológicos, devuelve únicamente el CSV, evita fórmulas de cortesía: "+text, + "stream": False + } + response = requests.post(url, json=data) + analyzedText = response.json() - iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", "Loading lematizer") - nlp = spacy.load(lematize) - model = sentence_transformers.SentenceTransformer('/iris-shared/model/') - phrases = text.split(",") + phrases = analyzedText['response'].replace('"',"").split(",") sqlsentence = "" iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", "Starting process") for phraseToAnalyze in phrases : if phraseToAnalyze != "": - doc = nlp(phraseToAnalyze) - phrase = "" - phrases = [] - for token in doc: - if (token.pos_ == "NOUN" or token.pos_ == "ADJ" or token.pos_ == "DET"): - if phrase == "": - phrase = token.text - else : - phrase += " "+token.text - else : - if phrase != "" and len(phrase.split(" ")) > 1: - phrases.append(phrase) - phrase = "" - embeddings = model.encode(phrases, normalize_embeddings=True) - embeddingList = embeddings.tolist() - i = 0 - for text in phrases: - iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", text) - sqlsentence = "INSERT INTO ENCODER_Object.TextMatches (CodeId, Description, Similarity, AnalysisId, RawText) SELECT TOP 50 * FROM (SELECT CodeId, Description, VECTOR_DOT_PRODUCT(VectorDescription, TO_VECTOR('"+str(embeddingList[i])+"', DECIMAL)) AS Similarity, '"+analysisId+"', '"+text+"' FROM ENCODER_Object.Codes) WHERE Similarity > 0.6 ORDER BY Similarity DESC" - iris.cls("ENCODER.Utils.Manager").ExecuteInsertQuery(sqlsentence) - iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", "Sentence finished") - - i = i + 1 + embedding = model.encode(phraseToAnalyze, normalize_embeddings=True).tolist() + sqlsentence = "INSERT INTO ENCODER_Object.TextMatches (CodeId, Description, Similarity, AnalysisId, RawText) SELECT TOP 50 * FROM (SELECT CodeId, Description, VECTOR_DOT_PRODUCT(VectorDescription, TO_VECTOR('"+str(embedding)+"', DECIMAL)) AS Similarity, '"+analysisId+"', '"+phraseToAnalyze+"' FROM ENCODER_Object.Codes) ORDER BY Similarity DESC" + iris.cls("ENCODER.Utils.Manager").ExecuteInsertQuery(sqlsentence) + iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", sqlsentence) except Exception as err: iris.cls("Ens.Util.Log").LogInfo("ENCODER.BP.AnalyzeTextProcess", "AnalyzeText", repr(err)) return repr(err) diff --git a/src/ENCODER/Message/ConfigureRequest.cls b/src/ENCODER/Message/ConfigureRequest.cls new file mode 100644 index 0000000..9e7a812 --- /dev/null +++ b/src/ENCODER/Message/ConfigureRequest.cls @@ -0,0 +1,4 @@ +Class ENCODER.Message.ConfigureRequest Extends Ens.Request +{ + Property Model As %String; +} \ No newline at end of file