all the files are moved from fun007 repo

fam007e · Sep 25, 2024 · 2900186 · 2900186
1 parent 2211483
commit 2900186
Show file tree

Hide file tree

Showing 4 changed files with 274 additions and 2 deletions.
diff --git a/OandALvLQPSDL.py b/OandALvLQPSDL.py
@@ -0,0 +1,176 @@
+import os
+import re
+import requests
+from bs4 import BeautifulSoup
+
+BASE_URL = 'https://papers.xtremepape.rs/'
+
+
+def get_exam_board():
+    """Prompt user to choose the examination board."""
+    while True:
+        print("\nChoose the examination board:")
+        print("1. Cambridge (CAIE)")
+        print("2. Edexcel")
+        choice = input("Enter your choice (1 or 2): ").strip()
+        if choice == '1':
+            return 'CAIE'
+        elif choice == '2':
+            return 'Edexcel'
+        else:
+            print("Invalid choice. Please enter 1 or 2.")
+
+
+def get_exam_level(exam_board):
+    """Prompt user to choose the examination level based on the selected board."""
+    while True:
+        print("\nChoose the examination level:")
+        if exam_board == 'CAIE':
+            print("1. O Level")
+            print("2. AS and A Level")
+        else:  # Edexcel
+            print("1. International GCSE")
+            print("2. Advanced Level")
+        choice = input("Enter your choice (1 or 2): ").strip()
+        if choice == '1':
+            return 'O+Level' if exam_board == 'CAIE' else 'International+GCSE'
+        elif choice == '2':
+            return 'AS+and+A+Level' if exam_board == 'CAIE' else 'Advanced+Level'
+        else:
+            print("Invalid choice. Please enter 1 or 2.")
+
+
+def get_subjects(exam_board, exam_level):
+    """Fetch subjects for the selected exam board and level."""
+    if exam_board == 'CAIE':
+        url = f'{BASE_URL}index.php?dirpath=./CAIE/{exam_level}/&order=0'
+    else:  # Edexcel
+        url = f'{BASE_URL}index.php?dirpath=./Edexcel/{exam_level}/&order=0'
+
+    response = requests.get(url)
+    soup = BeautifulSoup(response.text, 'html.parser')
+    subject_links = soup.find_all('a', class_='directory')
+
+    subjects = {}
+    for link in subject_links:
+        subject_name = link.text.strip('[]')
+        if subject_name != '..':  # Skip the parent directory link
+            subjects[subject_name] = BASE_URL + link['href']
+    return subjects
+
+
+def get_pdfs(subject_url, exam_board):
+    """Fetch PDF links for the selected subject."""
+    response = requests.get(subject_url)
+    soup = BeautifulSoup(response.text, 'html.parser')
+
+    if exam_board == 'Edexcel':
+        return get_edexcel_pdfs(subject_url)
+
+    pdf_links = soup.find_all('a', class_='file', href=re.compile(r'\.pdf$'))
+    return {link.text.strip(): BASE_URL + link['href'] for link in pdf_links}
+
+
+def get_edexcel_pdfs(subject_url):
+    """Fetch PDF links for Edexcel subjects."""
+    pdfs = {}
+    response = requests.get(subject_url)
+    soup = BeautifulSoup(response.text, 'html.parser')
+    year_links = soup.find_all('a', class_='directory')
+
+    for year_link in year_links:
+        if year_link.text.strip('[]') != '..':
+            year_url = BASE_URL + year_link['href']
+            year_response = requests.get(year_url)
+            year_soup = BeautifulSoup(year_response.text, 'html.parser')
+
+            qp_link = year_soup.find('a', class_='directory', text='[Question-paper]')
+            ms_link = year_soup.find('a', class_='directory', text='[Mark-scheme]')
+
+            if qp_link:
+                qp_url = BASE_URL + qp_link['href']
+                qp_pdfs = get_pdfs_from_page(qp_url)
+                pdfs.update(qp_pdfs)
+
+            if ms_link:
+                ms_url = BASE_URL + ms_link['href']
+                ms_pdfs = get_pdfs_from_page(ms_url)
+                pdfs.update(ms_pdfs)
+
+    return pdfs
+
+
+def get_pdfs_from_page(url):
+    """Fetch all PDF links from a specific page."""
+    response = requests.get(url)
+    soup = BeautifulSoup(response.text, 'html.parser')
+    pdf_links = soup.find_all('a', class_='file', href=re.compile(r'\.pdf$'))
+    return {link.text.strip(): BASE_URL + link['href'] for link in pdf_links}
+
+
+def download_pdf(url, filename, subject_dir, exam_board):
+    """Download a PDF and save it in the appropriate directory."""
+    response = requests.get(url)
+    if exam_board == 'CAIE':
+        if '_ms_' in filename:
+            subdir = 'ms'
+        elif '_qp_' in filename:
+            subdir = 'qp'
+        else:
+            subdir = 'misc'
+    else:  # Edexcel
+        if 'question' in filename.lower():
+            subdir = 'qp'
+        elif 'mark' in filename.lower() or 'ms' in filename.lower():
+            subdir = 'ms'
+        else:
+            subdir = 'misc'
+
+    dir_path = os.path.join(subject_dir, subdir)
+    os.makedirs(dir_path, exist_ok=True)
+
+    file_path = os.path.join(dir_path, filename)
+    with open(file_path, 'wb') as f:
+        f.write(response.content)
+    print(f"Downloaded: {filename}")
+
+
+def print_subjects_in_columns(subjects):
+    """Print the available subjects in multiple columns."""
+    terminal_width = os.get_terminal_size().columns
+    max_width = max(len(f"{i}. {subject}") for i, subject in enumerate(subjects, 1))
+    num_columns = max(1, terminal_width // (max_width + 2))
+    subject_list = [f"{i}. {subject}" for i, subject in enumerate(subjects, 1)]
+    for i in range(0, len(subject_list), num_columns):
+        row = subject_list[i:i+num_columns]
+        print("  ".join(item.ljust(max_width) for item in row))
+
+
+def main():
+    """Main function to run the script."""
+    exam_board = get_exam_board()
+    exam_level = get_exam_level(exam_board)
+    subjects = get_subjects(exam_board, exam_level)
+
+    print(f"\nAvailable subjects for {exam_board} {exam_level.replace('+', ' ')}:")
+    print_subjects_in_columns(subjects)
+
+    choices = input("\nEnter the numbers of the subjects you want to download (space-separated): ")
+    selected_indices = [int(x.strip()) for x in choices.split()]
+
+    selected_subjects = list(subjects.keys())
+    for index in selected_indices:
+        subject = selected_subjects[index - 1]
+        subject_url = subjects[subject]
+        print(f"\nProcessing {subject}...")
+
+        pdfs = get_pdfs(subject_url, exam_board)
+        subject_dir = os.path.join(exam_board, exam_level.replace('+', ' '), subject.replace('/', '_').replace('&', 'and'))
+        os.makedirs(subject_dir, exist_ok=True)
+
+        for filename, pdf_url in pdfs.items():
+            download_pdf(pdf_url, filename, subject_dir, exam_board)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/README.md b/README.md
@@ -1,2 +1,63 @@
-# OandALvl-exam-paper-downloader
-This script automates the download of past exam papers and mark schemes from the xtremepapers website for CAIE and Edexcel boards. It allows users to select exam boards, levels, and subjects, and organizes the downloaded files into well-structured directories based on the exam type and content (question papers, mark schemes, etc.).
+# O and A Levels Xtremepapers Downloader
+
+This Python script allows users to download past papers for O Level and AS/A Level examinations from the Xtremepapers website.
+
+## Features
+
+- Choose between O Level and AS/A Level examinations
+- View available subjects for the chosen examination level
+- Select multiple subjects for download
+- Automatically organizes downloaded papers into subject and paper type folders
+
+## Requirements
+
+- Python 3.6 or higher
+- `requests` library
+- `beautifulsoup4` library
+
+## Installation
+
+1. Ensure you have Python 3.6+ installed on your system.
+
+2. Install the required libraries:
+
+```bash
+pip install requests beautifulsoup4
+```
+
+3. Download the script using curl:
+
+```bash
+curl -fsSL https://raw.githubusercontent.com/fam007e/fun007/master/OandALevelsXtremepapersDownloader/OandALvLQPSDL.py -o OandALvLQPSDL.py
+```
+
+## Usage
+
+1. Run the script:
+
+```bash
+python OandALvLQPSDL.py
+```
+
+2. Follow the on-screen prompts:
+   - Choose the examination level (O Level or AS/A Level)
+   - Select the subjects you want to download papers for
+   - Wait for the download to complete
+
+3. The downloaded papers will be organized in folders by examination level, subject, and paper type (`ms` for mark schemes, `qp` for question papers, and `misc` for other types).
+
+## Note
+
+Please be respectful of the Xtremepapers website and avoid overloading their servers with too many requests in a short time.
+
+## License
+
+This project is open source and available under the [LICENSE](../LICENSE).
+
+## Contributing
+
+Contributions, issues, and feature requests are welcome. Feel free to check [issues page](https://github.com/fam007e/fun007repo/issues) if you want to contribute.
+
+## Author
+
+[fam007e](https://github.com/fam007e)
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+requests>=2.25.1
+beautifulsoup4>=4.9.3
diff --git a/setup.py b/setup.py
@@ -0,0 +1,33 @@
+from setuptools import setup, find_packages
+
+# Read the contents of your README file
+with open('README.md', 'r') as f:
+    long_description = f.read()
+
+setup(
+    name='exam_downloader',
+    version='0.1.0',
+    description='A script to download A-Level test papers and mark schemes from xtremepape.rs.',
+    long_description=long_description,
+    long_description_content_type='text/markdown',
+    url='https://github.com/fam007e/fun007/',
+    author='Faisal Ahmed Moshiur',
+    author_email='faisalmoshiur+gitpy@gmail.com',
+    license='MIT',
+    packages=find_packages(),
+    install_requires=[
+        'requests>=2.25.1',
+        'beautifulsoup4>=4.9.3',
+    ],
+    entry_points={
+        'console_scripts': [
+            'exam-downloader=DWNFDCleaner.main:main',
+        ],
+    },
+    classifiers=[
+        'Programming Language :: Python :: 3',
+        'License :: OSI Approved :: MIT License',
+        'Operating System :: OS Independent',
+    ],
+    python_requires='>=3.6',
+)