From 58b51e687da152db10528f77578cb509a13cfbc8 Mon Sep 17 00:00:00 2001
From: Wes <wesley7879@gmail.com>
Date: Sat, 6 Jul 2024 14:32:40 -0400
Subject: [PATCH] cli support

---
 graphreduce/__main__.py        | 26 +++++++++
 graphreduce/cli/__init__.py    |  0
 graphreduce/cli/auto_fe.py     | 97 ++++++++++++++++++++++++++++++++++
 graphreduce/cli/entry_point.py | 29 ++++++++++
 setup.py                       |  4 +-
 5 files changed, 153 insertions(+), 3 deletions(-)
 create mode 100644 graphreduce/__main__.py
 create mode 100644 graphreduce/cli/__init__.py
 create mode 100644 graphreduce/cli/auto_fe.py
 create mode 100644 graphreduce/cli/entry_point.py

diff --git a/graphreduce/__main__.py b/graphreduce/__main__.py
new file mode 100644
index 0000000..7cae889
--- /dev/null
+++ b/graphreduce/__main__.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+import sys
+
+import typer
+
+from .cli.entry_point import entrypoint_cli
+
+
+
+
+def main():
+
+    if sys.version_info[:3] == (3, 8):
+        pass
+
+
+    try:
+        entrypoint_cli()
+    except Exception as exc:
+        tb = exc.__cause__.__traceback__
+        print(tb)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/graphreduce/cli/__init__.py b/graphreduce/cli/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/graphreduce/cli/auto_fe.py b/graphreduce/cli/auto_fe.py
new file mode 100644
index 0000000..ea233ff
--- /dev/null
+++ b/graphreduce/cli/auto_fe.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+
+import sqlite3
+import json
+import os
+import typing
+import datetime
+
+import typer
+from typer import Argument, Option
+import pandas as pd
+
+
+# examples for using SQL engines and dialects
+from graphreduce.node import SQLNode, DynamicNode
+from graphreduce.graph_reduce import GraphReduce
+from graphreduce.enum import SQLOpType, ComputeLayerEnum, PeriodUnit
+from graphreduce.models import sqlop
+from graphreduce.context import method_requires
+
+
+auto_fe_cli = typer.Typer(name="auto_fe", help="Perform automated feature engineering", no_args_is_help=True)
+
+
+
+
+@auto_fe_cli.command("autofefs")
+def autofe_filesystem (
+            # directory or sqlite db
+            data_path: str = Argument(help="Path to data"),
+            # 'csv', 'parquet', etc.
+            fmt: str = Argument(help="File format"),
+            # {fname: 'prefix}
+            prefixes: str = Argument(help="json dict of filenames with prefixes (e.g., `{'test.csv':'test'}`)"),
+            # {fname: 'ts'}
+            date_keys: str = Argument(help="json dict of filenames with associated date key (e.g., `{'test.csv': 'ts'}`)"),
+            # [ {'from_node': 'fname', 'from_key', 'to_node': 'fname', 'to_key': key', 'reduce':True} ]
+            relationships: str = Argument(
+                help="json of relationships (e.g., `[{'from_node':'fname', 'from_key':'cust_id', 'to_node':'tname', 'to_key'}]`)"),
+            parent_node: str = Argument(
+                help="parent/root node to which to aggregate all of the data"
+                ),
+            cut_date: str = Argument(str(datetime.datetime.today())),
+            # 'pandas', 'dask', 'sql'
+            compute_layer: str = Argument("pandas"),
+            hops_front: int = Argument(1),
+            hops_back: int = Argument(3),
+            output_path: str = Option('-op', '--output-path', help='output path for the data')
+            ):
+    """
+Main automated feature engineering function.
+    """
+
+    prefixes = json.loads(prefixes)
+    date_keys = json.loads(date_keys)
+    relationships = json.loads(relationships)
+
+    nodes = {}
+    if fmt in ['csv', 'parquet', 'delta', 'iceberg']:
+        for f in os.listdir(data_path):
+            print(f"adding file {f}")
+            nodes[f] = DynamicNode(
+                    fpath=f"{data_path}/{f}",
+                    fmt=f.split('.')[1],
+                    prefix=prefixes.get(f),
+                    compute_layer=getattr(ComputeLayerEnum, compute_layer),
+                    date_key=date_keys.get(f, None)
+                    )
+
+    gr = GraphReduce(
+            name='autofe',
+            parent_node=nodes[parent_node],
+            fmt=fmt,
+            cut_date=datetime.datetime.now(),
+            compute_layer=getattr(ComputeLayerEnum, compute_layer),
+            auto_features=True,
+            auto_feature_hops_front=hops_front,
+            auto_feature_hops_back=hops_back
+            )
+
+    for rel in relationships:
+        gr.add_entity_edge(
+                parent_node=nodes[rel['to_node']],
+                parent_key=rel['to_key'],
+                relation_node=nodes[rel['from_node']],
+                relation_key=rel['from_key'],
+                reduce=rel.get('reduce', True)
+                )
+
+    gr.do_transformations()
+    if not output_path:
+        output_path = os.path.join(
+                os.path.expanduser("~"),
+                "graphreduce_outputs/test.csv"
+                )
+
+    getattr(gr.parent_node.df, f"to_{fmt}")(output_path)
diff --git a/graphreduce/cli/entry_point.py b/graphreduce/cli/entry_point.py
new file mode 100644
index 0000000..b180a93
--- /dev/null
+++ b/graphreduce/cli/entry_point.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+
+import typing
+
+import typer
+
+
+from .auto_fe import auto_fe_cli
+
+
+entrypoint_cli_typer = typer.Typer(
+        no_args_is_help=True,
+        add_completion=False,
+        rich_markup_mode="markdown",
+        help="""
+        See examples at https://github.com/wesmadrigal/graphreduce
+        """
+)
+
+# Automated feature engineering
+entrypoint_cli_typer.add_typer(auto_fe_cli, rich_help_panel="autofe")
+
+
+entrypoint_cli = typer.main.get_command(entrypoint_cli_typer)
+entrypoint_cli.list_commands(None)
+
+
+if __name__ == '__main__':
+    entrypoint_cli()
diff --git a/setup.py b/setup.py
index 027d622..dc53ea7 100644
--- a/setup.py
+++ b/setup.py
@@ -38,11 +38,9 @@
         author="Wes Madrigal",
         author_email="wes@madconsulting.ai",
         license="MIT",
-
         description="Leveraging graph data structures for complex feature engineering pipelines.",
         long_description = pathlib.Path("README.md").read_text(),
         long_description_content_type = "text/markdown",
-
         keywords = ", ".join(KEYWORDS),
         classifiers = [
             "Programming Language :: Python :: 3",
@@ -60,6 +58,6 @@
             "Source" : "http://github.com/wesmadrigal/graphreduce",
             "Issue Tracker" : "https://github.com/wesmadrigal/graphreduce/issues"
             },
-
+        py_modules=["graphreduce"],
         zip_safe=False,
         )