Skip to content

Commit

Permalink
Moved database entities into same file
Browse files Browse the repository at this point in the history
  • Loading branch information
AntounMichael committed Nov 11, 2023
1 parent fb26688 commit f8b7b6c
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 43 deletions.
37 changes: 36 additions & 1 deletion VectorDatabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,4 +302,39 @@ def get_embeddings_for_pub(self, id):
texts.append(fragment.content)
embeddings.append(fragment.vector)
text_embeddings = list(zip(texts, embeddings))
return text_embeddings
return text_embeddings

# Class to represent a publication with attributes id, title, pmc, pubmed, and doi
class Publication:

id = ""
title = ""
pmc = ""
pubmed = ""
doi = ""

def __init__(self, id, title, pmc, pubmed, doi):
self.id = id # (DOI) Unique identifier for the publication
self.title = title # Title of the publication
self.pmc = pmc # PubMed Central (PMC) Link
self.pubmed = pubmed # PubMed Link
self.doi = doi # Digital Object Identifier (DOI) Link for the publication

# Class to represent a fragment of a publication with attributes id, header, content, and vector
class Fragment:


# Class variables to store default values for attributes
id = ""
header = ""
content = ""
vector = ""

def __init__(self, id, header, content, vector):
# Constructor to initialize the attributes of the Fragment object

# Set the attributes of the object with the values provided during instantiation
self.id = id # (DOI) Unique identifier for the fragment
self.header = header # Header or title of the fragment
self.content = content # Content or text of the fragment
self.vector = vector # Vector representation of the fragment
3 changes: 1 addition & 2 deletions analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@

from VectorDatabase import Lantern
from database_entities import Publication, Fragment
from VectorDatabase import Lantern, Publication, Fragment
from google_sheets import SheetsApiClient

from langchain.text_splitter import RecursiveCharacterTextSplitter
Expand Down
34 changes: 0 additions & 34 deletions database_entities.py

This file was deleted.

4 changes: 1 addition & 3 deletions hackathon_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
from paperscraper.pdf import save_pdf
from paperscraper.get_dumps import biorxiv

from fragment import Fragment
from publication import Publication
from VectorDatabase import Lantern
from VectorDatabase import Lantern, Fragment, Publication
import openai
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
Expand Down
4 changes: 1 addition & 3 deletions tests/test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from fragment import Fragment
from publication import Publication
from VectorDatabase import Latern
from VectorDatabase import Lantern, Fragment, Publication
from tqdm.auto import tqdm
from sentence_transformers import SentenceTransformer
import torch
Expand Down

0 comments on commit f8b7b6c

Please sign in to comment.