diff --git a/data/transform/models/marts/telemetry/base/feature_usage_base.sql b/data/transform/models/marts/telemetry/base/feature_usage_base.sql index 3394b42b..7d70fb96 100644 --- a/data/transform/models/marts/telemetry/base/feature_usage_base.sql +++ b/data/transform/models/marts/telemetry/base/feature_usage_base.sql @@ -16,6 +16,7 @@ elt_state AS ( WHERE cli_command = 'elt' AND NULLIF( + -- WARNING: this is likely not accurate anymore TRIM(GET(COALESCE(GET(options_obj, 'elt'), { }), 'state')), 'null' ) IS NOT NULL diff --git a/data/transform/models/marts/telemetry/base/unstructured_parsing/unstruct_exec_flattened.sql b/data/transform/models/marts/telemetry/base/unstructured_parsing/unstruct_exec_flattened.sql index 6aafb480..6e8d779d 100644 --- a/data/transform/models/marts/telemetry/base/unstructured_parsing/unstruct_exec_flattened.sql +++ b/data/transform/models/marts/telemetry/base/unstructured_parsing/unstruct_exec_flattened.sql @@ -5,12 +5,42 @@ WITH base AS ( SELECT - *, - ROW_NUMBER() OVER ( - PARTITION BY - context_uuid - ORDER BY COALESCE(LEN(options_obj::string), 0) DESC - ) AS opt_obj_row_num + context_uuid, + event_created_at, + ip_address_hash, + project_uuid, + freedesktop_version_id, + meltano_version, + num_cpu_cores_available, + windows_edition, + command, + sub_command, + machine, + system_release, + project_uuid_source, + options_obj, + freedesktop_id, + freedesktop_id_like, + is_dev_build, + process_hierarchy, + python_version, + environment_name_hash, + client_uuid, + is_ci_environment, + notable_flag_env_vars, + notable_hashed_env_vars, + num_cpu_cores, + python_implementation, + system_name, + system_version, + exit_code, + exit_timestamp, + process_duration_microseconds, + exception, + event, + block_type, + event_name, + plugins_obj FROM {{ ref('unstruct_event_flattened') }} ), @@ -47,12 +77,14 @@ SELECT MAX(base.machine) AS machine, MAX(base.system_release) AS system_release, MAX(base.project_uuid_source) AS project_uuid_source, - GET( - ARRAY_AGG( - CASE WHEN base.opt_obj_row_num = 1 THEN base.options_obj END - ), - 0 - ) AS options_obj, + MAX(null) AS options_obj, + -- NOTE: This is too inefficient and we're not using it much + -- GET( + -- ARRAY_AGG( + -- CASE WHEN base.opt_obj_row_num = 1 THEN base.options_obj END + -- ), + -- 0 + -- ) AS options_obj, MAX(base.freedesktop_id) AS freedesktop_id, MAX(base.freedesktop_id_like) AS freedesktop_id_like, MAX(base.is_dev_build) AS is_dev_build, @@ -87,8 +119,8 @@ SELECT ) ) AS exception_cause, -- Tracing - -- TODO: event states are deduped here, maybe agg differently - ARRAY_AGG(base.event) AS event_states, + -- NOTE: maxing out array agg 16MB limit and its not used anywhere + -- ARRAY_AGG(base.event) AS event_states, ARRAY_AGG( DISTINCT base.block_type ) AS event_block_types, diff --git a/data/transform/models/marts/telemetry/schema.yml b/data/transform/models/marts/telemetry/schema.yml index cfd71566..41dfbe46 100644 --- a/data/transform/models/marts/telemetry/schema.yml +++ b/data/transform/models/marts/telemetry/schema.yml @@ -298,9 +298,6 @@ models: - name: exception_cause - - name: event_states - description: A list of events states that occured during the CLI execution (e.g. started, completed, failed, etc.) - - name: event_block_types description: A list of the type of block events that were received.