Siddhi5826

pubmed_fetcher.py

import requests import pandas as pd from typing import List, Tuple, Optional

class PubMedFetcher: BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"

def __init__(self, query: str):
    self.query = query

def fetch_papers(self) -> List[dict]:
    """Fetch papers from PubMed based on the query."""
    url = f"{self.BASE_URL}esearch.fcgi"
    params = {
        "db": "pubmed",
        "term": self.query,
        "retmode": "xml",
        "retmax": 100
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return self.parse_paper_ids(response.text)

def parse_paper_ids(self, xml_data: str) -> List[dict]:
    """Parse the XML response to extract paper IDs."""
    # Implement XML parsing logic here
    # For simplicity, let's assume we get a list of PubMed IDs
    # This is a placeholder for actual XML parsing
    return [{"PubmedID": "12345678"}]  # Replace with actual parsing logic

def fetch_details(self, paper_ids: List[str]) -> List[dict]:
    """Fetch detailed information for each paper."""
    ids = ",".join(paper_ids)
    url = f"{self.BASE_URL}esummary.fcgi"
    params = {
        "db": "pubmed",
        "id": ids,
        "retmode": "xml"
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return self.parse_details(response.text)

def parse_details(self, xml_data: str) -> List[dict]:
    """Parse the XML response to extract paper details."""
    # Implement XML parsing logic here
    # This is a placeholder for actual XML parsing
    return [{"PubmedID": "12345678", "Title": "Sample Title", "PubDate": "2023-01-01", "Authors": [], "CorrespondingAuthorEmail": ""}]  # Replace with actual parsing logic

def filter_non_academic_authors(self, authors: List[str]) -> Tuple[List[str], List[str]]:
    """Filter authors to find non-academic affiliations."""
    non_academic_authors = []
    company_affiliations = []
    for author in authors:
        if "university" not in author.lower() and "lab" not in author.lower():
            non_academic_authors.append(author)
            # Assume we can identify companies by certain keywords
            if "pharma" in author.lower() or "biotech" in author.lower():
                company_affiliations.append(author)
    return non_academic_authors, company_affiliations

def save_to_csv(self, papers: List[dict], filename: str) -> None:
    """Save the papers to a CSV file."""
    df = pd.DataFrame(papers)
    df.to_csv(filename, index=False)

# cli.py

import argparse import logging from pubmed_fetcher import PubMedFetcher

def main(): parser = argparse.ArgumentParser(description="Fetch research papers from PubMed.") parser.add_argument("query", type=str, help="Search query for PubMed.") parser.add_argument("-f", "--file", type=str, help="Filename to save results.") parser.add_argument("-d", "--debug", action="store_true", help="Enable debug output.")

args = parser.parse_args()

if args.debug:
    logging.basicConfig(level=logging.DEBUG)

fetcher = PubMedFetcher(args.query)

try:
    paper_ids = fetcher.fetch_papers()
    details = fetcher.fetch_details([
    from typing import List, Dict, Optional

import csv import logging from Bio import Entrez

Configure logging

logging.basicConfig(level=logging.INFO)

Set your email for Entrez API

Entrez.email = "your-email@example.com"

def fetch_pubmed_papers(query: str, max_results: int = 10) -> List[Dict]: """ Fetches research papers from PubMed based on the given query. :param query: Search query in PubMed format. :param max_results: Maximum number of results to fetch. :return: List of papers with metadata. """ try: handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results) record = Entrez.read(handle) handle.close() pubmed_ids = record.get("IdList", [])

    papers = []
    for pubmed_id in pubmed_ids:
        handle = Entrez.efetch(db="pubmed", id=pubmed_id, rettype="xml", retmode="text")
        details = Entrez.read(handle)
        handle.close()
        
        paper_info = extract_paper_details(details)
        if paper_info:
            papers.append(paper_info)
    
    return papers
except Exception as e:
    logging.error(f"Error fetching PubMed data: {e}")
    return []

def extract_paper_details(details) -> Optional[Dict]: """Extracts relevant information from a PubMed entry.""" try: article = details["PubmedArticle"][0]["MedlineCitation"]["Article"] authors = article.get("AuthorList", [])

    non_academic_authors = []
    company_affiliations = []
    corresponding_email = None
    
    for author in authors:
        if "AffiliationInfo" in author:
            affiliation = author["AffiliationInfo"][0].get("Affiliation", "")
            if is_non_academic(affiliation):
                non_academic_authors.append(author.get("ForeName", "") + " " + author.get("LastName", ""))
                company_affiliations.append(affiliation)
        if "ElectronicAddress" in author:
            corresponding_email = author["ElectronicAddress"]
    
    return {
        "PubmedID": details["PubmedArticle"][0]["MedlineCitation"]["PMID"],
        "Title": article.get("ArticleTitle", "Unknown"),
        "Publication Date": article.get("ArticleDate", [{}])[0].get("Year", "Unknown"),
        "Non-academic Author(s)": ", ".join(non_academic_authors),
        "Company Affiliation(s)": ", ".join(company_affiliations),
        "Corresponding Author Email": corresponding_email or "N/A"
    }
except Exception as e:
    logging.error(f"Error parsing paper details: {e}")
    return None

def is_non_academic(affiliation: str) -> bool: """Determines if an affiliation is non-academic.""" academic_keywords = ["University", "Institute", "College", "School", "Hospital", "Lab"] return not any(word in affiliation for word in academic_keywords)

def save_to_csv(papers: List[Dict], filename: str): """Saves research paper data to a CSV file.""" with open(filename, mode='w', newline='', encoding='utf-8') as file: writer = csv.DictWriter(file, fieldnames=[ "PubmedID", "Title", "Publication Date", "Non-academic Author(s)", "Company Affiliation(s)", "Corresponding Author Email" ]) writer.writeheader() writer.writerows(papers) logging.info(f"Saved results to {filename}")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Siddhi5826

Achievements

Achievements

Block or report Siddhi5826

pubmed_fetcher.py

Configure logging

Set your email for Entrez API

Popular repositories Loading