Skip to content

Commit

Permalink
Runs cleanfile but no results output
Browse files Browse the repository at this point in the history
  • Loading branch information
StephenCarterLIIA authored Sep 6, 2024
1 parent 8199626 commit 9e51841
Show file tree
Hide file tree
Showing 15 changed files with 1,818 additions and 56 deletions.
2 changes: 2 additions & 0 deletions liiatools/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from liiatools.datasets.s903.s903_cli import s903
from liiatools.datasets.social_work_workforce.csww_cli import csww
from liiatools.datasets.s251.s251_cli import s251
from liiatools.datasets.sen2.sen2_cli import sen2


@click.group()
Expand All @@ -17,6 +18,7 @@ def cli():
cli.add_command(s903)
cli.add_command(csww)
cli.add_command(s251)
cli.add_command(sen2)

if __name__ == "__main__":
cli()
80 changes: 80 additions & 0 deletions liiatools/datasets/sen2/lds_sen2_clean/configuration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import datetime
import logging
import os
from pathlib import Path
import yaml
from string import Template

from liiatools.spec import common as common_asset_dir

log = logging.getLogger(__name__)

COMMON_CONFIG_DIR = Path(common_asset_dir.__file__).parent


class Config(dict):
def __init__(self, *config_files):
super().__init__()

if not config_files:
config_files = [
"DEFAULT_DATA_CODES",
]

for file in config_files:
if file == "DEFAULT_DATA_CODES":
file = COMMON_CONFIG_DIR / "LA-codes.yml"
self.load_config(file, conditional=False)

self["config_date"] = datetime.datetime.now().isoformat()
try:
self["username"] = os.getlogin()
except OSError:
# This happens when tests are not run under a login shell, e.g. CI pipeline
pass

def load_config(self, filename, conditional=False, warn=False):
"""
Load configuration from yaml file. Any loaded configuration
is only set if the values don't already exist in CONFIG.
Files can contain ${} placeholders following the Python string.Template format.
The context will include any keys already existing in the configuration, any keys
from the current file - however, if these include placeholders, the placeholders
will not be replaced. Finally, environment variables can be referenced with
`os_environ_VARIABLE_NAME`.
Keyword arguments:
filename -- Filename to load from
conditional -- If True, ignore file if it doesn't exist. If False, fail. (default False)
"""
if conditional and not os.path.isfile(filename):
if warn:
log.warning("Missing optional file {}".format(filename))

return

with open(filename) as FILE:
user_config = yaml.load(FILE, Loader=yaml.FullLoader)

log.info(
"Loading {} configuration values from '{}'.".format(
len(user_config), filename
)
)

environment_dict = {"os_environ_{}".format(k): v for k, v in os.environ.items()}

variables = dict(self)
variables.update(user_config)
variables.update(environment_dict)

with open(filename, "rt") as FILE:
user_config_string = FILE.read()

user_config_template = Template(user_config_string)
user_config_string = user_config_template.substitute(variables)

user_config = yaml.load(user_config_string, Loader=yaml.FullLoader)

self.update(user_config)
20 changes: 20 additions & 0 deletions liiatools/datasets/sen2/lds_sen2_clean/converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from sfdata_stream_parser.checks import type_check
from sfdata_stream_parser import events
from sfdata_stream_parser.filters.generic import streamfilter, pass_event


@streamfilter(check=type_check(events.TextNode), fail_function=pass_event)
def convert_true_false(event):
"""
Search for any events that have the schema type="yesnotype" and convert any values of false to 0 and true to 1
:param event: A filtered list of event objects
:return: An updated list of event objects
"""
if hasattr(event, "schema"):
if event.schema.type.name == "yesnotype":
if event.text.lower() == "false":
event = event.from_event(event, text="0")
elif event.text.lower() == "true":
event = event.from_event(event, text="1")
return event
87 changes: 87 additions & 0 deletions liiatools/datasets/sen2/lds_sen2_clean/file_creator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from pathlib import Path
import pandas as pd
import logging

from liiatools.datasets.shared_functions import converters, common

log = logging.getLogger(__name__)


def convert_to_dataframe(data):
data = data.export("df")
return data


def get_year(data, year):
data["YEAR"] = year
return data


def convert_to_datetime(data):
data[["PersonBirthDate"]] = data[
["PersonBirthDate"]
].apply(pd.to_datetime)
return data


def _get_person_school_year(datevalue):
if datevalue.month >= 9:
school_year = datevalue.year
elif datevalue.month <= 8:
school_year = datevalue.year - 1
else:
school_year = None
return school_year


def add_school_year(data):
data["PersonSchoolYear"] = data["PersonBirthDate"].apply(
lambda row: _get_person_school_year(row)
)
return data


def add_la_name(data, la_name):
data["LA"] = la_name
return data


def la_prefix(data, la_code):
data["Surname"] = data["Surname"] + "_" + la_code
return data


def degrade_dob(data):
if data["PersonBirthDate"] is not None:
data["PersonBirthDate"] = data["PersonBirthDate"].apply(
lambda row: converters.to_month_only_dob(row)
)
return data


def add_fields(input_year, data, la_name, la_code):
"""
Add YEAR, LA, PERSONSCHOOLYEAR to exported dataframe
Append LA_code from config to LAChildID
:param input_year: A string of the year of return for the current file
:param data: The dataframe to be cleaned
:param la_name: LA name
:param la_code: LA code
:return: Cleaned and degraded dataframe
"""
data = convert_to_dataframe(data)
data = get_year(data, input_year)
data = convert_to_datetime(data)
data = add_school_year(data)
data = add_la_name(data, la_name)
data = la_prefix(data, la_code)
data = degrade_dob(data)
return data


def export_file(input, output, data):
filename = Path(input).stem
outfile = filename + "_clean.csv"
output_path = Path(output, outfile)
data.to_csv(output_path, index=False)
Loading

0 comments on commit 9e51841

Please sign in to comment.