Skip to content

Commit

Permalink
all the files are moved from fun007 repo
Browse files Browse the repository at this point in the history
  • Loading branch information
fam007e committed Sep 25, 2024
1 parent 2211483 commit 2900186
Show file tree
Hide file tree
Showing 4 changed files with 274 additions and 2 deletions.
176 changes: 176 additions & 0 deletions OandALvLQPSDL.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import os
import re
import requests
from bs4 import BeautifulSoup

BASE_URL = 'https://papers.xtremepape.rs/'


def get_exam_board():
"""Prompt user to choose the examination board."""
while True:
print("\nChoose the examination board:")
print("1. Cambridge (CAIE)")
print("2. Edexcel")
choice = input("Enter your choice (1 or 2): ").strip()
if choice == '1':
return 'CAIE'
elif choice == '2':
return 'Edexcel'
else:
print("Invalid choice. Please enter 1 or 2.")


def get_exam_level(exam_board):
"""Prompt user to choose the examination level based on the selected board."""
while True:
print("\nChoose the examination level:")
if exam_board == 'CAIE':
print("1. O Level")
print("2. AS and A Level")
else: # Edexcel
print("1. International GCSE")
print("2. Advanced Level")
choice = input("Enter your choice (1 or 2): ").strip()
if choice == '1':
return 'O+Level' if exam_board == 'CAIE' else 'International+GCSE'
elif choice == '2':
return 'AS+and+A+Level' if exam_board == 'CAIE' else 'Advanced+Level'
else:
print("Invalid choice. Please enter 1 or 2.")


def get_subjects(exam_board, exam_level):
"""Fetch subjects for the selected exam board and level."""
if exam_board == 'CAIE':
url = f'{BASE_URL}index.php?dirpath=./CAIE/{exam_level}/&order=0'
else: # Edexcel
url = f'{BASE_URL}index.php?dirpath=./Edexcel/{exam_level}/&order=0'

response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
subject_links = soup.find_all('a', class_='directory')

subjects = {}
for link in subject_links:
subject_name = link.text.strip('[]')
if subject_name != '..': # Skip the parent directory link
subjects[subject_name] = BASE_URL + link['href']
return subjects


def get_pdfs(subject_url, exam_board):
"""Fetch PDF links for the selected subject."""
response = requests.get(subject_url)
soup = BeautifulSoup(response.text, 'html.parser')

if exam_board == 'Edexcel':
return get_edexcel_pdfs(subject_url)

pdf_links = soup.find_all('a', class_='file', href=re.compile(r'\.pdf$'))
return {link.text.strip(): BASE_URL + link['href'] for link in pdf_links}


def get_edexcel_pdfs(subject_url):
"""Fetch PDF links for Edexcel subjects."""
pdfs = {}
response = requests.get(subject_url)
soup = BeautifulSoup(response.text, 'html.parser')
year_links = soup.find_all('a', class_='directory')

for year_link in year_links:
if year_link.text.strip('[]') != '..':
year_url = BASE_URL + year_link['href']
year_response = requests.get(year_url)
year_soup = BeautifulSoup(year_response.text, 'html.parser')

qp_link = year_soup.find('a', class_='directory', text='[Question-paper]')
ms_link = year_soup.find('a', class_='directory', text='[Mark-scheme]')

if qp_link:
qp_url = BASE_URL + qp_link['href']
qp_pdfs = get_pdfs_from_page(qp_url)
pdfs.update(qp_pdfs)

if ms_link:
ms_url = BASE_URL + ms_link['href']
ms_pdfs = get_pdfs_from_page(ms_url)
pdfs.update(ms_pdfs)

return pdfs


def get_pdfs_from_page(url):
"""Fetch all PDF links from a specific page."""
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
pdf_links = soup.find_all('a', class_='file', href=re.compile(r'\.pdf$'))
return {link.text.strip(): BASE_URL + link['href'] for link in pdf_links}


def download_pdf(url, filename, subject_dir, exam_board):
"""Download a PDF and save it in the appropriate directory."""
response = requests.get(url)
if exam_board == 'CAIE':
if '_ms_' in filename:
subdir = 'ms'
elif '_qp_' in filename:
subdir = 'qp'
else:
subdir = 'misc'
else: # Edexcel
if 'question' in filename.lower():
subdir = 'qp'
elif 'mark' in filename.lower() or 'ms' in filename.lower():
subdir = 'ms'
else:
subdir = 'misc'

dir_path = os.path.join(subject_dir, subdir)
os.makedirs(dir_path, exist_ok=True)

file_path = os.path.join(dir_path, filename)
with open(file_path, 'wb') as f:
f.write(response.content)
print(f"Downloaded: {filename}")


def print_subjects_in_columns(subjects):
"""Print the available subjects in multiple columns."""
terminal_width = os.get_terminal_size().columns
max_width = max(len(f"{i}. {subject}") for i, subject in enumerate(subjects, 1))
num_columns = max(1, terminal_width // (max_width + 2))
subject_list = [f"{i}. {subject}" for i, subject in enumerate(subjects, 1)]
for i in range(0, len(subject_list), num_columns):
row = subject_list[i:i+num_columns]
print(" ".join(item.ljust(max_width) for item in row))


def main():
"""Main function to run the script."""
exam_board = get_exam_board()
exam_level = get_exam_level(exam_board)
subjects = get_subjects(exam_board, exam_level)

print(f"\nAvailable subjects for {exam_board} {exam_level.replace('+', ' ')}:")
print_subjects_in_columns(subjects)

choices = input("\nEnter the numbers of the subjects you want to download (space-separated): ")
selected_indices = [int(x.strip()) for x in choices.split()]

selected_subjects = list(subjects.keys())
for index in selected_indices:
subject = selected_subjects[index - 1]
subject_url = subjects[subject]
print(f"\nProcessing {subject}...")

pdfs = get_pdfs(subject_url, exam_board)
subject_dir = os.path.join(exam_board, exam_level.replace('+', ' '), subject.replace('/', '_').replace('&', 'and'))
os.makedirs(subject_dir, exist_ok=True)

for filename, pdf_url in pdfs.items():
download_pdf(pdf_url, filename, subject_dir, exam_board)


if __name__ == "__main__":
main()
65 changes: 63 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,63 @@
# OandALvl-exam-paper-downloader
This script automates the download of past exam papers and mark schemes from the xtremepapers website for CAIE and Edexcel boards. It allows users to select exam boards, levels, and subjects, and organizes the downloaded files into well-structured directories based on the exam type and content (question papers, mark schemes, etc.).
# O and A Levels Xtremepapers Downloader

This Python script allows users to download past papers for O Level and AS/A Level examinations from the Xtremepapers website.

## Features

- Choose between O Level and AS/A Level examinations
- View available subjects for the chosen examination level
- Select multiple subjects for download
- Automatically organizes downloaded papers into subject and paper type folders

## Requirements

- Python 3.6 or higher
- `requests` library
- `beautifulsoup4` library

## Installation

1. Ensure you have Python 3.6+ installed on your system.

2. Install the required libraries:

```bash
pip install requests beautifulsoup4
```

3. Download the script using curl:

```bash
curl -fsSL https://raw.githubusercontent.com/fam007e/fun007/master/OandALevelsXtremepapersDownloader/OandALvLQPSDL.py -o OandALvLQPSDL.py
```

## Usage

1. Run the script:

```bash
python OandALvLQPSDL.py
```

2. Follow the on-screen prompts:
- Choose the examination level (O Level or AS/A Level)
- Select the subjects you want to download papers for
- Wait for the download to complete

3. The downloaded papers will be organized in folders by examination level, subject, and paper type (`ms` for mark schemes, `qp` for question papers, and `misc` for other types).

## Note

Please be respectful of the Xtremepapers website and avoid overloading their servers with too many requests in a short time.

## License

This project is open source and available under the [LICENSE](../LICENSE).

## Contributing

Contributions, issues, and feature requests are welcome. Feel free to check [issues page](https://github.com/fam007e/fun007repo/issues) if you want to contribute.

## Author

[fam007e](https://github.com/fam007e)
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
requests>=2.25.1
beautifulsoup4>=4.9.3
33 changes: 33 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from setuptools import setup, find_packages

# Read the contents of your README file
with open('README.md', 'r') as f:
long_description = f.read()

setup(
name='exam_downloader',
version='0.1.0',
description='A script to download A-Level test papers and mark schemes from xtremepape.rs.',
long_description=long_description,
long_description_content_type='text/markdown',
url='https://github.com/fam007e/fun007/',
author='Faisal Ahmed Moshiur',
author_email='faisalmoshiur+gitpy@gmail.com',
license='MIT',
packages=find_packages(),
install_requires=[
'requests>=2.25.1',
'beautifulsoup4>=4.9.3',
],
entry_points={
'console_scripts': [
'exam-downloader=DWNFDCleaner.main:main',
],
},
classifiers=[
'Programming Language :: Python :: 3',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
],
python_requires='>=3.6',
)

0 comments on commit 2900186

Please sign in to comment.