Skip to content

Commit

Permalink
update package database (plus generator script)
Browse files Browse the repository at this point in the history
  • Loading branch information
sunderme committed Dec 18, 2024
1 parent cd2d61d commit 3045625
Show file tree
Hide file tree
Showing 2 changed files with 429,651 additions and 440,555 deletions.
64 changes: 64 additions & 0 deletions utilities/generateDB.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/python3

import requests
import json

# URLs to fetch JSON data from
urls = {
"packages": "https://ctan.org/json/2.0/packages",
"authors": "https://ctan.org/json/2.0/authors",
"topics": "https://ctan.org/json/2.0/topics",
"licenses": "https://ctan.org/json/2.0/licenses"
}

# Function to fetch JSON data from a URL
def fetch_json(url):
response = requests.get(url)
response.raise_for_status() # Raise an exception for HTTP errors
return response.json()

# Fetch JSON data from all URLs
data = {key: fetch_json(url) for key, url in urls.items()}

# Create a dictionary for quick lookup of authors by ID
authors_dict = {author['key']: author for author in data["authors"]}

# Create a dictionary for quick lookup of topics by ID
topics_dict = {topic['key']: topic for topic in data["topics"]}

# Create a dictionary for quick lookup of topics by ID
license_dict = {license['key']: license for license in data["licenses"]}

# Process the packages data
packages = data["packages"]
package_content = []
for package in packages:
package_key = package['key']
package_url = f"https://www.ctan.org/json/2.0/pkg/{package_key}"
package_data = fetch_json(package_url)

if 'authors' in package_data:
authors = package_data['authors']
package_data['authors'] = [authors_dict[a['id']] for a in authors]

if 'topics' in package_data:
topics = package_data['topics']
package_data['topics'] = [topics_dict[a] for a in topics]

if 'license' in package_data:
licenses = package_data['license']
if isinstance(licenses, list):
package_data['license'] = [license_dict[a] for a in licenses]
else:
if licenses in license_dict:
package_data['license'] = license_dict[licenses]


print(f"Package: {package_key}")

package_content.append(package_data)

# Save the processed data to a JSON file
with open("packageDatabase.json", "w") as file:
json.dump(package_content, file, indent=2)

Loading

0 comments on commit 3045625

Please sign in to comment.