From b012bcca60734614b37e05846541eaeed313d5b6 Mon Sep 17 00:00:00 2001 From: Stuart Chalk Date: Tue, 4 Oct 2022 14:09:50 -0400 Subject: [PATCH 1/3] addition of Python function call 'scrape' for processing config file from another Python script and returning the JSON output --- scraper/functions.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 scraper/functions.py diff --git a/scraper/functions.py b/scraper/functions.py new file mode 100644 index 0000000..a1e794c --- /dev/null +++ b/scraper/functions.py @@ -0,0 +1,34 @@ +""" functions to use the code as a project package """ +from scraper import code_gov +import logging +import json + +logger = logging.getLogger(__name__) + + +def scrape(configfile): + """ + run the scraper using the config.json file + + Parameters + __________ + configfile : path + file path to the configuration file with format as outlined in README.md + + Returns + _______ + json + a JSON file of the scraped metadata + """ + + # open the config file + f = open(configfile) + config_json = json.load(f) + + # process + code_json = code_gov.process_config(config_json) + code_gov.force_attributes(code_json, config_json) + logger.info("Number of Projects: %s", len(code_json["releases"])) + + # return + return code_json.to_json() From 892b1764765fc203f28f6eb443f2e5ff3f78d780 Mon Sep 17 00:00:00 2001 From: Stuart Chalk Date: Tue, 4 Oct 2022 15:19:50 -0400 Subject: [PATCH 2/3] updating the .gitignore for the PyCharm IDE .idea folder --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index bd846b1..75a3b7c 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ venv/ *.pyc llnl_scraper.egg-info/ .vscode/ +.idea/ From fb8c04100d531f05f46532805822e2af23ecafb8 Mon Sep 17 00:00:00 2001 From: Stuart Chalk Date: Tue, 4 Oct 2022 15:20:47 -0400 Subject: [PATCH 3/3] updating the .gitignore for the unf.json config file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 75a3b7c..f16bc8d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ venv/ llnl_scraper.egg-info/ .vscode/ .idea/ +scraper/unf.json