diff --git a/.gitignore b/.gitignore index bd846b1..f16bc8d 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ venv/ *.pyc llnl_scraper.egg-info/ .vscode/ +.idea/ +scraper/unf.json diff --git a/scraper/functions.py b/scraper/functions.py new file mode 100644 index 0000000..a1e794c --- /dev/null +++ b/scraper/functions.py @@ -0,0 +1,34 @@ +""" functions to use the code as a project package """ +from scraper import code_gov +import logging +import json + +logger = logging.getLogger(__name__) + + +def scrape(configfile): + """ + run the scraper using the config.json file + + Parameters + __________ + configfile : path + file path to the configuration file with format as outlined in README.md + + Returns + _______ + json + a JSON file of the scraped metadata + """ + + # open the config file + f = open(configfile) + config_json = json.load(f) + + # process + code_json = code_gov.process_config(config_json) + code_gov.force_attributes(code_json, config_json) + logger.info("Number of Projects: %s", len(code_json["releases"])) + + # return + return code_json.to_json()