Skip to content
View Siddhi5826's full-sized avatar

Block or report Siddhi5826

Block user

Prevent this user from interacting with your repositories and sending you notifications. Learn more about blocking users.

You must be logged in to block users.

Please don't include any personal information such as legal names or email addresses. Maximum 100 characters, markdown supported. This note will be visible to only you.
Report abuse

Contact GitHub support about this user’s behavior. Learn more about reporting abuse.

Report abuse
Siddhi5826/README.md

pubmed_fetcher.py

import requests import pandas as pd from typing import List, Tuple, Optional

class PubMedFetcher: BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"

def __init__(self, query: str):
    self.query = query

def fetch_papers(self) -> List[dict]:
    """Fetch papers from PubMed based on the query."""
    url = f"{self.BASE_URL}esearch.fcgi"
    params = {
        "db": "pubmed",
        "term": self.query,
        "retmode": "xml",
        "retmax": 100
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return self.parse_paper_ids(response.text)

def parse_paper_ids(self, xml_data: str) -> List[dict]:
    """Parse the XML response to extract paper IDs."""
    # Implement XML parsing logic here
    # For simplicity, let's assume we get a list of PubMed IDs
    # This is a placeholder for actual XML parsing
    return [{"PubmedID": "12345678"}]  # Replace with actual parsing logic

def fetch_details(self, paper_ids: List[str]) -> List[dict]:
    """Fetch detailed information for each paper."""
    ids = ",".join(paper_ids)
    url = f"{self.BASE_URL}esummary.fcgi"
    params = {
        "db": "pubmed",
        "id": ids,
        "retmode": "xml"
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return self.parse_details(response.text)

def parse_details(self, xml_data: str) -> List[dict]:
    """Parse the XML response to extract paper details."""
    # Implement XML parsing logic here
    # This is a placeholder for actual XML parsing
    return [{"PubmedID": "12345678", "Title": "Sample Title", "PubDate": "2023-01-01", "Authors": [], "CorrespondingAuthorEmail": ""}]  # Replace with actual parsing logic

def filter_non_academic_authors(self, authors: List[str]) -> Tuple[List[str], List[str]]:
    """Filter authors to find non-academic affiliations."""
    non_academic_authors = []
    company_affiliations = []
    for author in authors:
        if "university" not in author.lower() and "lab" not in author.lower():
            non_academic_authors.append(author)
            # Assume we can identify companies by certain keywords
            if "pharma" in author.lower() or "biotech" in author.lower():
                company_affiliations.append(author)
    return non_academic_authors, company_affiliations

def save_to_csv(self, papers: List[dict], filename: str) -> None:
    """Save the papers to a CSV file."""
    df = pd.DataFrame(papers)
    df.to_csv(filename, index=False)

# cli.py

import argparse import logging from pubmed_fetcher import PubMedFetcher

def main(): parser = argparse.ArgumentParser(description="Fetch research papers from PubMed.") parser.add_argument("query", type=str, help="Search query for PubMed.") parser.add_argument("-f", "--file", type=str, help="Filename to save results.") parser.add_argument("-d", "--debug", action="store_true", help="Enable debug output.")

args = parser.parse_args()

if args.debug:
    logging.basicConfig(level=logging.DEBUG)

fetcher = PubMedFetcher(args.query)

try:
    paper_ids = fetcher.fetch_papers()
    details = fetcher.fetch_details([
    from typing import List, Dict, Optional

import csv import logging from Bio import Entrez

Configure logging

logging.basicConfig(level=logging.INFO)

Set your email for Entrez API

Entrez.email = "your-email@example.com"

def fetch_pubmed_papers(query: str, max_results: int = 10) -> List[Dict]: """ Fetches research papers from PubMed based on the given query. :param query: Search query in PubMed format. :param max_results: Maximum number of results to fetch. :return: List of papers with metadata. """ try: handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results) record = Entrez.read(handle) handle.close() pubmed_ids = record.get("IdList", [])

    papers = []
    for pubmed_id in pubmed_ids:
        handle = Entrez.efetch(db="pubmed", id=pubmed_id, rettype="xml", retmode="text")
        details = Entrez.read(handle)
        handle.close()
        
        paper_info = extract_paper_details(details)
        if paper_info:
            papers.append(paper_info)
    
    return papers
except Exception as e:
    logging.error(f"Error fetching PubMed data: {e}")
    return []

def extract_paper_details(details) -> Optional[Dict]: """Extracts relevant information from a PubMed entry.""" try: article = details["PubmedArticle"][0]["MedlineCitation"]["Article"] authors = article.get("AuthorList", [])

    non_academic_authors = []
    company_affiliations = []
    corresponding_email = None
    
    for author in authors:
        if "AffiliationInfo" in author:
            affiliation = author["AffiliationInfo"][0].get("Affiliation", "")
            if is_non_academic(affiliation):
                non_academic_authors.append(author.get("ForeName", "") + " " + author.get("LastName", ""))
                company_affiliations.append(affiliation)
        if "ElectronicAddress" in author:
            corresponding_email = author["ElectronicAddress"]
    
    return {
        "PubmedID": details["PubmedArticle"][0]["MedlineCitation"]["PMID"],
        "Title": article.get("ArticleTitle", "Unknown"),
        "Publication Date": article.get("ArticleDate", [{}])[0].get("Year", "Unknown"),
        "Non-academic Author(s)": ", ".join(non_academic_authors),
        "Company Affiliation(s)": ", ".join(company_affiliations),
        "Corresponding Author Email": corresponding_email or "N/A"
    }
except Exception as e:
    logging.error(f"Error parsing paper details: {e}")
    return None

def is_non_academic(affiliation: str) -> bool: """Determines if an affiliation is non-academic.""" academic_keywords = ["University", "Institute", "College", "School", "Hospital", "Lab"] return not any(word in affiliation for word in academic_keywords)

def save_to_csv(papers: List[Dict], filename: str): """Saves research paper data to a CSV file.""" with open(filename, mode='w', newline='', encoding='utf-8') as file: writer = csv.DictWriter(file, fieldnames=[ "PubmedID", "Title", "Publication Date", "Non-academic Author(s)", "Company Affiliation(s)", "Corresponding Author Email" ]) writer.writeheader() writer.writerows(papers) logging.info(f"Saved results to {filename}")

Popular repositories Loading

  1. Learn_Fusion_Fibonacci-generator Learn_Fusion_Fibonacci-generator Public

  2. Siddhi5826 Siddhi5826 Public

    Config files for my GitHub profile.

  3. Full-Stack-internship-Assignment Full-Stack-internship-Assignment Public

    User Access Management System

  4. react-calendar-app react-calendar-app Public

    react-calendar app

    JavaScript