Skip to content

Commit

Permalink
241225
Browse files Browse the repository at this point in the history
  • Loading branch information
ssocean committed Dec 25, 2024
1 parent a188465 commit 5a949df
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 84 deletions.
32 changes: 0 additions & 32 deletions database/update_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,38 +445,6 @@ def insert_idLiterature_into_CoP(session):



def gpt_process(session):
results = session.query(PaperMapping).all()
for row in tqdm(results):
# print(row.title)
if row.gpt_keywords is None:
keywords = get_chatgpt_keyword(row.title, row.abstract)
# if list_result[0] == 'Y':
# row.is_review = 1
# elif list_result[0] == 'N':
# row.is_review = 0
# print(list_result[1])

keywords = [keyword.replace('.', '').replace("'", "").replace('"', "") for keyword in keywords]

keywords = keywords[:5]
# row.gpt_keywords = ','.join(keywords)
row.gpt_keywords = ','.join(keywords)

session.commit()
else:
row.gpt_keywords = row.gpt_keywords.replace('.', '').replace("'", "").replace('"', "")
session.commit()
if row.is_review is None:
status = check_PAMIreview(row.title, row.abstract)
if status == ('N' or 'n'):
row.is_review = 0
if status == ('Y' or 'y'):
row.is_review = 1
session.commit()
print(f'{row.title}||{row.is_review}||{row.gpt_keywords}')
session.close()


# gpt_process(session)
# update_s2_ref(session)
Expand Down
69 changes: 17 additions & 52 deletions tools/gpt_util.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
# https://api.chatanywhere.cn
import os

import openai
from langchain.chains.llm import LLMChain
from langchain_community.chat_models.openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from openai import OpenAI
from retry import retry

from config.config import openai_key

# openai.api_base = "https://api.chatanywhere.com.cn"
# openai.api_key = openai_key
API_SECRET_KEY = "xxx"
BASE_URL = "xxx"
os.environ["OPENAI_API_KEY"] = API_SECRET_KEY
os.environ["OPENAI_API_BASE"] = BASE_URL
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
from langchain_openai import ChatOpenAI

Expand All @@ -34,7 +24,7 @@ def _get_ref_list(text):
]

chat = ChatOpenAI(model="gpt-3.5-turbo")
chat.invoke(messages)


return chat.batch([messages])[0].content

Expand All @@ -52,34 +42,11 @@ def get_chatgpt_keyword(title, abstract):
]

chat = ChatOpenAI(model="gpt-3.5-turbo")
chat.invoke(messages)

return chat.batch([messages])[0].content

return chat.batch([messages])[0].content

@retry(delay=6,)
def check_PAMIreview(title, abstract):
messages = [
SystemMessage(
content="You are a profound researcher in the field of artificial intelligence who is good at "
"identifying whether a paper is a survey or review paper in the field of pattern analysis and "
"machine intelligence."
"Note that not all paper that its title contain survey or review is a review paper. "
"Here are some examples: 'transformers in medical image analysis: a review' is a survey paper. "
"'Creating a Scholarly Knowledge Graph from Survey Article Tables' is Not a survey. "
"'Providing Insights for Open-Response Surveys via End-to-End Context-Aware Clustering' is "
"Not a survey. 'sifn: a sentiment-aware interactive fusion network for review-based item "
"recommendation' is Not a review."
),
HumanMessage(content=f'''Given title and abstract, identify whether the given paper is a review or survey paper (answer with Y or N)
Given Title: {title}
Given Abstract: {abstract}
Answer with the exact following format:Y||N'''),
]
chat = ChatOpenAI(model="gpt-3.5-turbo")
chat.invoke(messages)

return chat.batch([messages])[0].content


def get_unnum_sectitle(sectitle):
Expand All @@ -92,30 +59,26 @@ def get_unnum_sectitle(sectitle):
HumanMessage(content=f'This is the title of section, extract the title without chapter numbering(If chapter numbering exists). Answer with the following format: xxx. \n Section Title: {sectitle}'),
]
chat = ChatOpenAI(model="gpt-3.5-turbo")
chat.invoke(messages)


return chat.batch([messages])[0].content
@retry()
def get_chatgpt_field(title, abstract=None, sys_content=None, usr_prompt=None, extra_prompt=True):
def get_chatgpt_field(title, abstract=None, sys_content=None, usr_prompt=None, extra_prompt=True,model="gpt-3.5-turbo-0125",temperature=0):

if not sys_content:
sys_content = (
"You are a profound researcher who is good at identifying the topic keyword from paper's title and "
"abstract. The keyword will be used to retrieve related paper from online scholar search engines.")
"You are a profound researcher who is good at identifying the topic key phrase from paper's title and "
"abstract. Ensure that the topic key phrase precisely defines the research area within the article. For effective academic searching, such as on Google Scholar, the field should be specifically targeted rather than broadly categorized. For instance, use 'image classification' instead of the general 'computer vision' to enhance relevance and searchability of related literature.")
if not usr_prompt:
usr_prompt = (
"Identifying the topic of the paper based on the given title and abstract. So that I can use it as "
"keyword to search highly related papers from Google Scholar. Avoid using broad or overly general "
"terms like 'deep learning', 'computer vision', or 'artificial intelligence'. Instead, focus on keyword that are unique "
"and directly pertinent to the paper's subject.Answer with the word only in the"
"following format: xxx")
usr_prompt = ("Given the title and abstract below, determine the specific research field by focusing on the main application area and the key technology. You MUST respond with the keyword ONLY in this format: xxx")

messages = [SystemMessage(content=sys_content)]

extra_abs_content = '''
Given Title: A Survey of Self-Supervised and Few-Shot Object Detection
Given Abstract: Labeling data is often expensive and time-consuming, especially for tasks such as object detection and instance segmentation, which require dense labeling of the image. While few-shot object detection is about training a model on novel(unseen) object classeswith little data, it still requires prior training onmany labeled examples of base(seen) classes. On the other hand, self-supervisedmethods aimat learning representations fromunlabeled data which transfer well to downstream tasks such as object detection. Combining few-shot and self-supervised object detection is a promising research direction. In this survey, we reviewand characterize themost recent approaches on few-shot and self-supervised object detection. Then, we give our main takeaways and discuss future research directions. Project page: https://gabrielhuang.github.io/fsod-survey/''' if abstract else ''
Given Title: Large Selective Kernel Network for Remote Sensing Object Detection
Given Abstract: Recent research on remote sensing object detection has largely focused on improving the representation of oriented bounding boxes but has overlooked the unique prior knowledge presented in remote sensing scenarios. Such prior knowledge can be useful because tiny remote sensing objects may be mistakenly detected without referencing a sufficiently long-range context, which can vary for different objects. This paper considers these priors and proposes the lightweight Large Selective Kernel Network (LSKNet). LSKNet can dynamically adjust its large spatial receptive field to better model the ranging context of various objects in remote sensing scenarios. To our knowledge, large and selective kernel mechanisms have not been previously explored in remote sensing object detection. Without bells and whistles, our lightweight LSKNet sets new state-of-the-art scores on standard benchmarks, i.e., HRSC2016 (98.46% mAP), DOTA-v1.0 (81.85% mAP), and FAIR1M-v1.0 (47.87% mAP).''' if abstract else ''
if extra_prompt:
messages += [HumanMessage(content=f'''{usr_prompt}\n\n{extra_abs_content}'''), AIMessage(content='few-shot objection detection')]
messages += [HumanMessage(content=f'''{usr_prompt}\n\n{extra_abs_content}'''), AIMessage(content='remote sensing object detection')]

content = f'''{usr_prompt}
Given Title: {title}
Expand All @@ -124,12 +87,14 @@ def get_chatgpt_field(title, abstract=None, sys_content=None, usr_prompt=None, e
content += f'Given Abstract: {abstract}'
messages.append(HumanMessage(content=content))

chat = ChatOpenAI(model="gpt-3.5-turbo")
chat.invoke(messages)
chat = ChatOpenAI(model=model,temperature=temperature)



return chat.batch([messages])[0].content
@retry()
def get_chatgpt_fields(title, abstract, extra_prompt=True,sys_content=None,usr_prompt=None):

if not sys_content:
sys_content = ("You are a profound researcher who is good at conduct a literature review based on given title and abstract.")
if not usr_prompt:
Expand Down Expand Up @@ -168,7 +133,7 @@ def get_chatgpt_fields(title, abstract, extra_prompt=True,sys_content=None,usr_p
Given Abstract: {abstract}
''')]
chat = ChatOpenAI(model="gpt-3.5-turbo")
chat.invoke(messages)


return chat.batch([messages])[0].content

Expand Down Expand Up @@ -215,7 +180,7 @@ def extract_keywords_from_article_with_gpt(text):
The text of the first page:{text}''')
]
chat = ChatOpenAI(model="gpt-3.5-turbo")
chat.invoke(messages)


return chat.batch([messages])[0].content

0 comments on commit 5a949df

Please sign in to comment.