-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
all the files are moved from fun007 repo
- Loading branch information
Showing
4 changed files
with
274 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
import os | ||
import re | ||
import requests | ||
from bs4 import BeautifulSoup | ||
|
||
BASE_URL = 'https://papers.xtremepape.rs/' | ||
|
||
|
||
def get_exam_board(): | ||
"""Prompt user to choose the examination board.""" | ||
while True: | ||
print("\nChoose the examination board:") | ||
print("1. Cambridge (CAIE)") | ||
print("2. Edexcel") | ||
choice = input("Enter your choice (1 or 2): ").strip() | ||
if choice == '1': | ||
return 'CAIE' | ||
elif choice == '2': | ||
return 'Edexcel' | ||
else: | ||
print("Invalid choice. Please enter 1 or 2.") | ||
|
||
|
||
def get_exam_level(exam_board): | ||
"""Prompt user to choose the examination level based on the selected board.""" | ||
while True: | ||
print("\nChoose the examination level:") | ||
if exam_board == 'CAIE': | ||
print("1. O Level") | ||
print("2. AS and A Level") | ||
else: # Edexcel | ||
print("1. International GCSE") | ||
print("2. Advanced Level") | ||
choice = input("Enter your choice (1 or 2): ").strip() | ||
if choice == '1': | ||
return 'O+Level' if exam_board == 'CAIE' else 'International+GCSE' | ||
elif choice == '2': | ||
return 'AS+and+A+Level' if exam_board == 'CAIE' else 'Advanced+Level' | ||
else: | ||
print("Invalid choice. Please enter 1 or 2.") | ||
|
||
|
||
def get_subjects(exam_board, exam_level): | ||
"""Fetch subjects for the selected exam board and level.""" | ||
if exam_board == 'CAIE': | ||
url = f'{BASE_URL}index.php?dirpath=./CAIE/{exam_level}/&order=0' | ||
else: # Edexcel | ||
url = f'{BASE_URL}index.php?dirpath=./Edexcel/{exam_level}/&order=0' | ||
|
||
response = requests.get(url) | ||
soup = BeautifulSoup(response.text, 'html.parser') | ||
subject_links = soup.find_all('a', class_='directory') | ||
|
||
subjects = {} | ||
for link in subject_links: | ||
subject_name = link.text.strip('[]') | ||
if subject_name != '..': # Skip the parent directory link | ||
subjects[subject_name] = BASE_URL + link['href'] | ||
return subjects | ||
|
||
|
||
def get_pdfs(subject_url, exam_board): | ||
"""Fetch PDF links for the selected subject.""" | ||
response = requests.get(subject_url) | ||
soup = BeautifulSoup(response.text, 'html.parser') | ||
|
||
if exam_board == 'Edexcel': | ||
return get_edexcel_pdfs(subject_url) | ||
|
||
pdf_links = soup.find_all('a', class_='file', href=re.compile(r'\.pdf$')) | ||
return {link.text.strip(): BASE_URL + link['href'] for link in pdf_links} | ||
|
||
|
||
def get_edexcel_pdfs(subject_url): | ||
"""Fetch PDF links for Edexcel subjects.""" | ||
pdfs = {} | ||
response = requests.get(subject_url) | ||
soup = BeautifulSoup(response.text, 'html.parser') | ||
year_links = soup.find_all('a', class_='directory') | ||
|
||
for year_link in year_links: | ||
if year_link.text.strip('[]') != '..': | ||
year_url = BASE_URL + year_link['href'] | ||
year_response = requests.get(year_url) | ||
year_soup = BeautifulSoup(year_response.text, 'html.parser') | ||
|
||
qp_link = year_soup.find('a', class_='directory', text='[Question-paper]') | ||
ms_link = year_soup.find('a', class_='directory', text='[Mark-scheme]') | ||
|
||
if qp_link: | ||
qp_url = BASE_URL + qp_link['href'] | ||
qp_pdfs = get_pdfs_from_page(qp_url) | ||
pdfs.update(qp_pdfs) | ||
|
||
if ms_link: | ||
ms_url = BASE_URL + ms_link['href'] | ||
ms_pdfs = get_pdfs_from_page(ms_url) | ||
pdfs.update(ms_pdfs) | ||
|
||
return pdfs | ||
|
||
|
||
def get_pdfs_from_page(url): | ||
"""Fetch all PDF links from a specific page.""" | ||
response = requests.get(url) | ||
soup = BeautifulSoup(response.text, 'html.parser') | ||
pdf_links = soup.find_all('a', class_='file', href=re.compile(r'\.pdf$')) | ||
return {link.text.strip(): BASE_URL + link['href'] for link in pdf_links} | ||
|
||
|
||
def download_pdf(url, filename, subject_dir, exam_board): | ||
"""Download a PDF and save it in the appropriate directory.""" | ||
response = requests.get(url) | ||
if exam_board == 'CAIE': | ||
if '_ms_' in filename: | ||
subdir = 'ms' | ||
elif '_qp_' in filename: | ||
subdir = 'qp' | ||
else: | ||
subdir = 'misc' | ||
else: # Edexcel | ||
if 'question' in filename.lower(): | ||
subdir = 'qp' | ||
elif 'mark' in filename.lower() or 'ms' in filename.lower(): | ||
subdir = 'ms' | ||
else: | ||
subdir = 'misc' | ||
|
||
dir_path = os.path.join(subject_dir, subdir) | ||
os.makedirs(dir_path, exist_ok=True) | ||
|
||
file_path = os.path.join(dir_path, filename) | ||
with open(file_path, 'wb') as f: | ||
f.write(response.content) | ||
print(f"Downloaded: {filename}") | ||
|
||
|
||
def print_subjects_in_columns(subjects): | ||
"""Print the available subjects in multiple columns.""" | ||
terminal_width = os.get_terminal_size().columns | ||
max_width = max(len(f"{i}. {subject}") for i, subject in enumerate(subjects, 1)) | ||
num_columns = max(1, terminal_width // (max_width + 2)) | ||
subject_list = [f"{i}. {subject}" for i, subject in enumerate(subjects, 1)] | ||
for i in range(0, len(subject_list), num_columns): | ||
row = subject_list[i:i+num_columns] | ||
print(" ".join(item.ljust(max_width) for item in row)) | ||
|
||
|
||
def main(): | ||
"""Main function to run the script.""" | ||
exam_board = get_exam_board() | ||
exam_level = get_exam_level(exam_board) | ||
subjects = get_subjects(exam_board, exam_level) | ||
|
||
print(f"\nAvailable subjects for {exam_board} {exam_level.replace('+', ' ')}:") | ||
print_subjects_in_columns(subjects) | ||
|
||
choices = input("\nEnter the numbers of the subjects you want to download (space-separated): ") | ||
selected_indices = [int(x.strip()) for x in choices.split()] | ||
|
||
selected_subjects = list(subjects.keys()) | ||
for index in selected_indices: | ||
subject = selected_subjects[index - 1] | ||
subject_url = subjects[subject] | ||
print(f"\nProcessing {subject}...") | ||
|
||
pdfs = get_pdfs(subject_url, exam_board) | ||
subject_dir = os.path.join(exam_board, exam_level.replace('+', ' '), subject.replace('/', '_').replace('&', 'and')) | ||
os.makedirs(subject_dir, exist_ok=True) | ||
|
||
for filename, pdf_url in pdfs.items(): | ||
download_pdf(pdf_url, filename, subject_dir, exam_board) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,63 @@ | ||
# OandALvl-exam-paper-downloader | ||
This script automates the download of past exam papers and mark schemes from the xtremepapers website for CAIE and Edexcel boards. It allows users to select exam boards, levels, and subjects, and organizes the downloaded files into well-structured directories based on the exam type and content (question papers, mark schemes, etc.). | ||
# O and A Levels Xtremepapers Downloader | ||
|
||
This Python script allows users to download past papers for O Level and AS/A Level examinations from the Xtremepapers website. | ||
|
||
## Features | ||
|
||
- Choose between O Level and AS/A Level examinations | ||
- View available subjects for the chosen examination level | ||
- Select multiple subjects for download | ||
- Automatically organizes downloaded papers into subject and paper type folders | ||
|
||
## Requirements | ||
|
||
- Python 3.6 or higher | ||
- `requests` library | ||
- `beautifulsoup4` library | ||
|
||
## Installation | ||
|
||
1. Ensure you have Python 3.6+ installed on your system. | ||
|
||
2. Install the required libraries: | ||
|
||
```bash | ||
pip install requests beautifulsoup4 | ||
``` | ||
|
||
3. Download the script using curl: | ||
|
||
```bash | ||
curl -fsSL https://raw.githubusercontent.com/fam007e/fun007/master/OandALevelsXtremepapersDownloader/OandALvLQPSDL.py -o OandALvLQPSDL.py | ||
``` | ||
|
||
## Usage | ||
|
||
1. Run the script: | ||
|
||
```bash | ||
python OandALvLQPSDL.py | ||
``` | ||
|
||
2. Follow the on-screen prompts: | ||
- Choose the examination level (O Level or AS/A Level) | ||
- Select the subjects you want to download papers for | ||
- Wait for the download to complete | ||
|
||
3. The downloaded papers will be organized in folders by examination level, subject, and paper type (`ms` for mark schemes, `qp` for question papers, and `misc` for other types). | ||
|
||
## Note | ||
|
||
Please be respectful of the Xtremepapers website and avoid overloading their servers with too many requests in a short time. | ||
|
||
## License | ||
|
||
This project is open source and available under the [LICENSE](../LICENSE). | ||
|
||
## Contributing | ||
|
||
Contributions, issues, and feature requests are welcome. Feel free to check [issues page](https://github.com/fam007e/fun007repo/issues) if you want to contribute. | ||
|
||
## Author | ||
|
||
[fam007e](https://github.com/fam007e) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
requests>=2.25.1 | ||
beautifulsoup4>=4.9.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from setuptools import setup, find_packages | ||
|
||
# Read the contents of your README file | ||
with open('README.md', 'r') as f: | ||
long_description = f.read() | ||
|
||
setup( | ||
name='exam_downloader', | ||
version='0.1.0', | ||
description='A script to download A-Level test papers and mark schemes from xtremepape.rs.', | ||
long_description=long_description, | ||
long_description_content_type='text/markdown', | ||
url='https://github.com/fam007e/fun007/', | ||
author='Faisal Ahmed Moshiur', | ||
author_email='faisalmoshiur+gitpy@gmail.com', | ||
license='MIT', | ||
packages=find_packages(), | ||
install_requires=[ | ||
'requests>=2.25.1', | ||
'beautifulsoup4>=4.9.3', | ||
], | ||
entry_points={ | ||
'console_scripts': [ | ||
'exam-downloader=DWNFDCleaner.main:main', | ||
], | ||
}, | ||
classifiers=[ | ||
'Programming Language :: Python :: 3', | ||
'License :: OSI Approved :: MIT License', | ||
'Operating System :: OS Independent', | ||
], | ||
python_requires='>=3.6', | ||
) |