-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6bf48c1
commit 58b51e6
Showing
5 changed files
with
153 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/usr/bin/env python | ||
|
||
import sys | ||
|
||
import typer | ||
|
||
from .cli.entry_point import entrypoint_cli | ||
|
||
|
||
|
||
|
||
def main(): | ||
|
||
if sys.version_info[:3] == (3, 8): | ||
pass | ||
|
||
|
||
try: | ||
entrypoint_cli() | ||
except Exception as exc: | ||
tb = exc.__cause__.__traceback__ | ||
print(tb) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#!/usr/bin/env python | ||
|
||
import sqlite3 | ||
import json | ||
import os | ||
import typing | ||
import datetime | ||
|
||
import typer | ||
from typer import Argument, Option | ||
import pandas as pd | ||
|
||
|
||
# examples for using SQL engines and dialects | ||
from graphreduce.node import SQLNode, DynamicNode | ||
from graphreduce.graph_reduce import GraphReduce | ||
from graphreduce.enum import SQLOpType, ComputeLayerEnum, PeriodUnit | ||
from graphreduce.models import sqlop | ||
from graphreduce.context import method_requires | ||
|
||
|
||
auto_fe_cli = typer.Typer(name="auto_fe", help="Perform automated feature engineering", no_args_is_help=True) | ||
|
||
|
||
|
||
|
||
@auto_fe_cli.command("autofefs") | ||
def autofe_filesystem ( | ||
# directory or sqlite db | ||
data_path: str = Argument(help="Path to data"), | ||
# 'csv', 'parquet', etc. | ||
fmt: str = Argument(help="File format"), | ||
# {fname: 'prefix} | ||
prefixes: str = Argument(help="json dict of filenames with prefixes (e.g., `{'test.csv':'test'}`)"), | ||
# {fname: 'ts'} | ||
date_keys: str = Argument(help="json dict of filenames with associated date key (e.g., `{'test.csv': 'ts'}`)"), | ||
# [ {'from_node': 'fname', 'from_key', 'to_node': 'fname', 'to_key': key', 'reduce':True} ] | ||
relationships: str = Argument( | ||
help="json of relationships (e.g., `[{'from_node':'fname', 'from_key':'cust_id', 'to_node':'tname', 'to_key'}]`)"), | ||
parent_node: str = Argument( | ||
help="parent/root node to which to aggregate all of the data" | ||
), | ||
cut_date: str = Argument(str(datetime.datetime.today())), | ||
# 'pandas', 'dask', 'sql' | ||
compute_layer: str = Argument("pandas"), | ||
hops_front: int = Argument(1), | ||
hops_back: int = Argument(3), | ||
output_path: str = Option('-op', '--output-path', help='output path for the data') | ||
): | ||
""" | ||
Main automated feature engineering function. | ||
""" | ||
|
||
prefixes = json.loads(prefixes) | ||
date_keys = json.loads(date_keys) | ||
relationships = json.loads(relationships) | ||
|
||
nodes = {} | ||
if fmt in ['csv', 'parquet', 'delta', 'iceberg']: | ||
for f in os.listdir(data_path): | ||
print(f"adding file {f}") | ||
nodes[f] = DynamicNode( | ||
fpath=f"{data_path}/{f}", | ||
fmt=f.split('.')[1], | ||
prefix=prefixes.get(f), | ||
compute_layer=getattr(ComputeLayerEnum, compute_layer), | ||
date_key=date_keys.get(f, None) | ||
) | ||
|
||
gr = GraphReduce( | ||
name='autofe', | ||
parent_node=nodes[parent_node], | ||
fmt=fmt, | ||
cut_date=datetime.datetime.now(), | ||
compute_layer=getattr(ComputeLayerEnum, compute_layer), | ||
auto_features=True, | ||
auto_feature_hops_front=hops_front, | ||
auto_feature_hops_back=hops_back | ||
) | ||
|
||
for rel in relationships: | ||
gr.add_entity_edge( | ||
parent_node=nodes[rel['to_node']], | ||
parent_key=rel['to_key'], | ||
relation_node=nodes[rel['from_node']], | ||
relation_key=rel['from_key'], | ||
reduce=rel.get('reduce', True) | ||
) | ||
|
||
gr.do_transformations() | ||
if not output_path: | ||
output_path = os.path.join( | ||
os.path.expanduser("~"), | ||
"graphreduce_outputs/test.csv" | ||
) | ||
|
||
getattr(gr.parent_node.df, f"to_{fmt}")(output_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/usr/bin/env python | ||
|
||
import typing | ||
|
||
import typer | ||
|
||
|
||
from .auto_fe import auto_fe_cli | ||
|
||
|
||
entrypoint_cli_typer = typer.Typer( | ||
no_args_is_help=True, | ||
add_completion=False, | ||
rich_markup_mode="markdown", | ||
help=""" | ||
See examples at https://github.com/wesmadrigal/graphreduce | ||
""" | ||
) | ||
|
||
# Automated feature engineering | ||
entrypoint_cli_typer.add_typer(auto_fe_cli, rich_help_panel="autofe") | ||
|
||
|
||
entrypoint_cli = typer.main.get_command(entrypoint_cli_typer) | ||
entrypoint_cli.list_commands(None) | ||
|
||
|
||
if __name__ == '__main__': | ||
entrypoint_cli() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters