From 7f8c38d0bc93cedf581f5e1d11b7261c895d0369 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 27 Nov 2019 09:21:18 -0500 Subject: [PATCH] Update README for 0.1.0 release --- README.md | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index ba2a7f8f..49bf54bf 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ ```bash # iterate through all source nodes, run drop + create + refresh (if partitioned) dbt run-operation stage_external_sources -# maybe someday: dbt source create-external ? + +# maybe someday: dbt source stage-external ``` The macros assume that you have already created an external stage (Snowflake) @@ -23,10 +24,10 @@ source: # NEW: "external" property of source node external: - location: # S3 file path or stage (Snowflake) - file_format: # Hive or Snowflake - row_format: # Hive - tbl_properties: # Hive + location: # S3 file path or Snowflake stage + file_format: # Hive specification or Snowflake named format / specification + row_format: # Hive specification + tbl_properties: # Hive specification # Specify a list of file-path partitions. @@ -37,43 +38,47 @@ source: expression: to_date(substr(metadata$filename, 8, 10), 'YYYY/MM/DD') # ------ REDSHIFT ------- - partitions: + partitions: - name: appId data_type: varchar(255) - vals: # array of values + vals: # list of values - dev - prod - # macro to convert partition value to file path specification - # takes keyword arguments 'name' + 'value' - path_macro: test_external_sources_redshift.year_month_day + # macro to convert partition value to file path specification. + path_macro: dbt_external_tables.key_value + # this "helper" macro is defined in the package, but you can use + # custom macro that takes keyword arguments 'name' + 'value' + # and returns the path as a string + # if multiple partitions, order matters for compiling S3 path - name: collector_date data_type: date - vals: # macro w/ args to generate array of values - macro: dbt.dates_in_range + vals: # macro w/ keyword args to generate list of values + macro: dbt.dates_in_range args: start_date_str: '2019-08-01' end_date_str: '{{modules.datetime.date.today().strftime("%Y-%m-%d")}}' in_fmt: "%Y-%m-%d" out_fmt: "%Y-%m-%d" - path_macro: test_external_sources_redshift.year_month_day + path_macro: dbt_external_tables.year_month_day - # Specify ALL column names + datatypes + # Specify ALL column names + datatypes. + # Column order matters for CSVs. Other file formats require column + # names to exactly match. columns: - name: app_id data_type: varchar(255) description: "Application ID" + - name: platform + data_type: varchar(255) + description: "Platform" ... ``` -See `sample_sources` for full, valid YML config that establishes Snowplow events +See [`sample_sources`](sample_sources) for full valid YML config that establishes Snowplow events as a dbt source and stage-ready external table in Snowflake and Spectrum. -### Current dependencies - -* dbt@0.15.0 in [`dev/louisa-may-alcott`](https://github.com/fishtown-analytics/dbt/tree/dev/louisa-may-alcott) - ### Supported databases * Redshift (Spectrum)