From e7dcd835b161aa8c9bce3f83f5bdab58e996de28 Mon Sep 17 00:00:00 2001 From: tas09009 Date: Thu, 25 Feb 2021 10:11:00 -0500 Subject: [PATCH] changed json output --- .gitignore | 16 +++++++++++++++- books/semanticSearch.py | 5 ++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 0042fed..b6d2ef3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,20 @@ +# Source: https://djangowaves.com/tips-tricks/gitignore-for-a-django-project/ + + +# Django # +*.log +*.pot +*.pyc +db.sqlite3 +media + __pycache__ **/*.pyc *.pyc __pycache__/ *.py[cod] -*.sqlite3 \ No newline at end of file +*.sqlite3 + + +# Visual Studio Code # +**/.vscode/ \ No newline at end of file diff --git a/books/semanticSearch.py b/books/semanticSearch.py index 675a0d6..2cb1886 100644 --- a/books/semanticSearch.py +++ b/books/semanticSearch.py @@ -6,6 +6,7 @@ import pickle import time import hnswlib +import json # bi-encoder model=SentenceTransformer('msmarco-distilbert-base-v2') @@ -66,7 +67,9 @@ def handleQuestion(question): # results = results.sort_values("cross_score", ascending=True) end = time.time() results.duration = end-start - return pd.merge(results,db,"inner",left_on="corpus_id", right_index=True).to_json(orient="record") + result_merge = pd.merge(results,db,"inner",left_on="corpus_id", right_index=True).to_json(orient="records") + parsed = json.loads(result_merge) + return json.dumps(parsed, indent=4) # corpus_ids, distances = index.knn_query(q_embedding, k=top_k) # hits = [{'corpus_id': id, 'score': 1-score} for id, score in zip(corpus_ids[0], distances[0])] # hits = pd.DataFrame(sorted(hits, key=lambda x: x['score'], reverse=True))