Skip to content

Commit

Permalink
feat(getCountries): adds threads to be more faster
Browse files Browse the repository at this point in the history
  • Loading branch information
ThiaudioTT committed Jul 25, 2024
1 parent ce2120c commit b1a4cd6
Showing 1 changed file with 52 additions and 26 deletions.
78 changes: 52 additions & 26 deletions tools/getVanillaCountries.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
from bs4 import BeautifulSoup
import requests
import os
import threading
from queue import Queue

# this script is used to get the vanilla countries from the wiki
# It creates a file with the countries and their codes in a python dictionary format
# it also downloads the flags of the countries
# execut to see the failed: python your_script.py > output.log

# Todo: this was made in a hurry, LMAO. Refactor it!

# Hint: use threads to be faster

HOI_SOURCE = "https://hoi4.paradoxwikis.com/"
N_THREADS = 5

# Thread lock, when writing the file, it must be locked to avoid conflicts
lock = threading.Lock()

# Helper functions:
def getCountryDetails(row) -> tuple[str, str]:
Expand All @@ -20,9 +26,10 @@ def getCountryDetails(row) -> tuple[str, str]:

def appendCountryToFile(country_name: str, country_code: str, filename: str) -> None:
# Write the country to the file
with open(f"{filename}.py", "a") as countriesFile:
# it must be in the format "ITA" : ("Italy", "ita"),
countriesFile.write(f'"{country_code}": ("{country_name}", "{country_code.lower()}"),\n')
with lock: # lock the file to avoid conflicts, at the end of the with block the lock is released
with open(f"{filename}.py", "a") as countriesFile:
# it must be in the format "ITA" : ("Italy", "ita"),
countriesFile.write(f'"{country_code}": ("{country_name}", "{country_code.lower()}"),\n')


def getSoup(url: str, error_message: str) -> BeautifulSoup:
Expand All @@ -44,39 +51,58 @@ def downloadFlagImage(flagWebpage: BeautifulSoup, country_code: str) -> None:

flag_response = requests.get(flag_url)
if flag_response.status_code == 200:
with open(f"flags/{country_code}.png", "wb") as flag_file:
flag_file.write(flag_response.content)
with lock: # in this case the lock is not necessary, but it is good to keep it
with open(f"flags/{country_code}.png", "wb") as flag_file:
flag_file.write(flag_response.content)
print(f"Flag for {country_code} downloaded.")
else:
print(f"Failed to download flag for {country_code}")


def getCountry(row, filename):
cName, cCode = getCountryDetails(row)
# print(f"Country: {cName}Code: {cCode}")
try:

def getCountries(table: list[str], filename: str):
"""Downloads the flags of the countries and creates a file with the countries and their codes in a python dictionary format"""
for row in table.find_all("tr"):
if not row.find_all("td"): continue

cName, cCode = getCountryDetails(row)
print(f"Country: {cName}Code: {cCode}")
try:
# Write the country to the file
appendCountryToFile(cName, cCode, filename)

# Write the country to the file
appendCountryToFile(cName, cCode, filename)
# Download the flag of the country
# the flag is in another webpage
country_webpage_URL = HOI_SOURCE + row.find_all("td")[0].find("a")["href"][1:]
country_webpage = getSoup(country_webpage_URL, f"Failed to get flag WEBPAGE for {cName}")

# Download the flag of the country
# the flag is in another webpage
country_webpage_URL = HOI_SOURCE + row.find_all("td")[0].find("a")["href"][1:]
country_webpage = getSoup(country_webpage_URL, f"Failed to get flag WEBPAGE for {cName}")

# Enters another webpage to get the flag (the webpage has the flag in a different size)
cFlag_webpage = HOI_SOURCE + country_webpage.select_one("div.mw-parser-output:nth-child(4) > div:nth-child(2) > a")["href"][1:]
cFlag_webpage = getSoup(cFlag_webpage, f"Failed to get flag WEBPAGE for {cName}")

# Enters another webpage to get the flag (the webpage has the flag in a different size)
cFlag_webpage = HOI_SOURCE + country_webpage.select_one("div.mw-parser-output:nth-child(4) > div:nth-child(2) > a")["href"][1:]
cFlag_webpage = getSoup(cFlag_webpage, f"Failed to get flag WEBPAGE for {cName}")
downloadFlagImage(cFlag_webpage, cCode)
except Exception as e:
print(f"Failed to get flag for {cName} - {e}")

downloadFlagImage(cFlag_webpage, cCode)
except Exception as e:
print(f"Failed to get flag for {cName} - {e}")
def getCountries(table: list[str], filename: str):
"""Downloads the flags of the countries and creates a file with the countries and their codes in a python dictionary format"""

# Make threads to get the countries
# Transform the table into a queue to be able to use threads #thread-safe!
queue = Queue()
for row in table.find_all("tr"):
if not row.find_all("td"): continue
queue.put(row)


while not queue.empty():
threads = [] # list of threads
for _ in range(N_THREADS):
if not queue.empty():
row = queue.get()
thread = threading.Thread(target=getCountry, args=(row, filename)) # create the thread
threads.append(thread)
thread.start()

for thread in threads:
thread.join() # wait for the threads to finish


def main():
Expand Down

0 comments on commit b1a4cd6

Please sign in to comment.