Skip to content

Commit

Permalink
cli support
Browse files Browse the repository at this point in the history
  • Loading branch information
wesmadrigal committed Jul 6, 2024
1 parent 6bf48c1 commit 58b51e6
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 3 deletions.
26 changes: 26 additions & 0 deletions graphreduce/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env python

import sys

import typer

from .cli.entry_point import entrypoint_cli




def main():

if sys.version_info[:3] == (3, 8):
pass


try:
entrypoint_cli()
except Exception as exc:
tb = exc.__cause__.__traceback__
print(tb)


if __name__ == '__main__':
main()
Empty file added graphreduce/cli/__init__.py
Empty file.
97 changes: 97 additions & 0 deletions graphreduce/cli/auto_fe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/env python

import sqlite3
import json
import os
import typing
import datetime

import typer
from typer import Argument, Option
import pandas as pd


# examples for using SQL engines and dialects
from graphreduce.node import SQLNode, DynamicNode
from graphreduce.graph_reduce import GraphReduce
from graphreduce.enum import SQLOpType, ComputeLayerEnum, PeriodUnit
from graphreduce.models import sqlop
from graphreduce.context import method_requires


auto_fe_cli = typer.Typer(name="auto_fe", help="Perform automated feature engineering", no_args_is_help=True)




@auto_fe_cli.command("autofefs")
def autofe_filesystem (
# directory or sqlite db
data_path: str = Argument(help="Path to data"),
# 'csv', 'parquet', etc.
fmt: str = Argument(help="File format"),
# {fname: 'prefix}
prefixes: str = Argument(help="json dict of filenames with prefixes (e.g., `{'test.csv':'test'}`)"),
# {fname: 'ts'}
date_keys: str = Argument(help="json dict of filenames with associated date key (e.g., `{'test.csv': 'ts'}`)"),
# [ {'from_node': 'fname', 'from_key', 'to_node': 'fname', 'to_key': key', 'reduce':True} ]
relationships: str = Argument(
help="json of relationships (e.g., `[{'from_node':'fname', 'from_key':'cust_id', 'to_node':'tname', 'to_key'}]`)"),
parent_node: str = Argument(
help="parent/root node to which to aggregate all of the data"
),
cut_date: str = Argument(str(datetime.datetime.today())),
# 'pandas', 'dask', 'sql'
compute_layer: str = Argument("pandas"),
hops_front: int = Argument(1),
hops_back: int = Argument(3),
output_path: str = Option('-op', '--output-path', help='output path for the data')
):
"""
Main automated feature engineering function.
"""

prefixes = json.loads(prefixes)
date_keys = json.loads(date_keys)
relationships = json.loads(relationships)

nodes = {}
if fmt in ['csv', 'parquet', 'delta', 'iceberg']:
for f in os.listdir(data_path):
print(f"adding file {f}")
nodes[f] = DynamicNode(
fpath=f"{data_path}/{f}",
fmt=f.split('.')[1],
prefix=prefixes.get(f),
compute_layer=getattr(ComputeLayerEnum, compute_layer),
date_key=date_keys.get(f, None)
)

gr = GraphReduce(
name='autofe',
parent_node=nodes[parent_node],
fmt=fmt,
cut_date=datetime.datetime.now(),
compute_layer=getattr(ComputeLayerEnum, compute_layer),
auto_features=True,
auto_feature_hops_front=hops_front,
auto_feature_hops_back=hops_back
)

for rel in relationships:
gr.add_entity_edge(
parent_node=nodes[rel['to_node']],
parent_key=rel['to_key'],
relation_node=nodes[rel['from_node']],
relation_key=rel['from_key'],
reduce=rel.get('reduce', True)
)

gr.do_transformations()
if not output_path:
output_path = os.path.join(
os.path.expanduser("~"),
"graphreduce_outputs/test.csv"
)

getattr(gr.parent_node.df, f"to_{fmt}")(output_path)
29 changes: 29 additions & 0 deletions graphreduce/cli/entry_point.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python

import typing

import typer


from .auto_fe import auto_fe_cli


entrypoint_cli_typer = typer.Typer(
no_args_is_help=True,
add_completion=False,
rich_markup_mode="markdown",
help="""
See examples at https://github.com/wesmadrigal/graphreduce
"""
)

# Automated feature engineering
entrypoint_cli_typer.add_typer(auto_fe_cli, rich_help_panel="autofe")


entrypoint_cli = typer.main.get_command(entrypoint_cli_typer)
entrypoint_cli.list_commands(None)


if __name__ == '__main__':
entrypoint_cli()
4 changes: 1 addition & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,9 @@
author="Wes Madrigal",
author_email="wes@madconsulting.ai",
license="MIT",

description="Leveraging graph data structures for complex feature engineering pipelines.",
long_description = pathlib.Path("README.md").read_text(),
long_description_content_type = "text/markdown",

keywords = ", ".join(KEYWORDS),
classifiers = [
"Programming Language :: Python :: 3",
Expand All @@ -60,6 +58,6 @@
"Source" : "http://github.com/wesmadrigal/graphreduce",
"Issue Tracker" : "https://github.com/wesmadrigal/graphreduce/issues"
},

py_modules=["graphreduce"],
zip_safe=False,
)

0 comments on commit 58b51e6

Please sign in to comment.