simonw · atiro · Aug 23, 2020
diff --git a/csvs_to_sqlite/cli.py b/csvs_to_sqlite/cli.py
@@ -76,6 +76,12 @@
     multiple=True,
     help=("One or more custom date format strings to try when parsing dates/datetimes"),
 )
+@click.option(
+    "--date-julian",
+    "-dj",
+    multiple=True,
+    help=("One or more columns to store as Julian Dates (reals)"),
+)
 @click.option(
     "--primary-key",
     "-pk",
@@ -134,6 +140,7 @@ def cli(
     date,
     datetime,
     datetime_format,
+    date_julian,
     primary_key,
     fts,
     index,
@@ -174,7 +181,11 @@ def cli(
                 df[filename_column] = name
                 if shape:
                     shape += ",{}".format(filename_column)
-            sql_type_overrides = apply_shape(df, shape)
+            if shape:
+                sql_type_overrides = apply_shape(df, shape)
+            elif date_julian:
+                julian_cols = ",".join(["%s(REAL)" % s for s in date_julian])
+                sql_type_overrides = apply_shape(df, julian_cols, True)
             apply_dates_and_datetimes(df, date, datetime, datetime_format)
             dataframes.append(df)
         except LoadCsvError as e:

diff --git a/csvs_to_sqlite/utils.py b/csvs_to_sqlite/utils.py
@@ -466,17 +466,18 @@ def parse_shape(shape):
     return defns
 
 
-def apply_shape(df, shape):
+def apply_shape(df, shape, modify=False):
     # Shape is format 'county:Cty,votes:Vts(REAL)'
     # Applies changes in place, returns dtype= arg for to_sql
     if not shape:
         return None
     defns = parse_shape(shape)
-    # Drop any columns we don't want
-    cols_to_keep = [d["csv_name"] for d in defns]
-    cols_to_drop = [c for c in df.columns if c not in cols_to_keep]
-    if cols_to_drop:
-        df.drop(cols_to_drop, axis=1, inplace=True)
+    # If we are not modifying, drop any columns we don't want
+    if not modify:
+        cols_to_keep = [d["csv_name"] for d in defns]
+        cols_to_drop = [c for c in df.columns if c not in cols_to_keep]
+        if cols_to_drop:
+            df.drop(cols_to_drop, axis=1, inplace=True)
     # Apply column renames
     renames = {
         d["csv_name"]: d["db_name"] for d in defns if d["csv_name"] != d["db_name"]