diff --git a/warehouse/oso_sqlmesh/models/intermediate/events/opendevdata/int_events_daily__opendevdata.sql b/warehouse/oso_sqlmesh/models/intermediate/events/opendevdata/int_events_daily__opendevdata.sql new file mode 100644 index 0000000000..1ef3bbcbb5 --- /dev/null +++ b/warehouse/oso_sqlmesh/models/intermediate/events/opendevdata/int_events_daily__opendevdata.sql @@ -0,0 +1,27 @@ +MODEL ( + name oso.int_events_daily__opendevdata, + description 'Daily aggregation of OpenDevData events', + kind FULL, + audits ( + has_at_least_n_rows(threshold := 0) + ) +); + +SELECT + activity.day AS bucket_day, + COALESCE(users.artifact_id, CAST(activity.canonical_developer_id AS VARCHAR)) AS from_artifact_id, + repos.artifact_id AS to_artifact_id, + 'OPENDEVDATA' AS event_source, + 'COMMIT_CODE' AS event_type, + SUM(activity.num_commits) AS amount +FROM oso.stg_opendevdata__repo_developer_activities AS activity +JOIN oso.int_repositories__opendevdata AS repos + ON activity.repo_id = repos.repository_id +LEFT JOIN oso.int_github_users__opendevdata AS users + ON activity.canonical_developer_id = users.canonical_developer_id +WHERE + activity.num_commits > 0 +GROUP BY + activity.day, + COALESCE(users.artifact_id, CAST(activity.canonical_developer_id AS VARCHAR)), + repos.artifact_id diff --git a/warehouse/oso_sqlmesh/models/metrics_factories.py b/warehouse/oso_sqlmesh/models/metrics_factories.py index ef2e429d8d..7a06dc7ee5 100644 --- a/warehouse/oso_sqlmesh/models/metrics_factories.py +++ b/warehouse/oso_sqlmesh/models/metrics_factories.py @@ -101,6 +101,7 @@ def add_project_and_collection_entity_category_tags( "int_events_daily__l2_transactions", "int_events_daily__defillama", "int_events_daily__github", + "int_events_daily__opendevdata", "int_events_daily__github_with_lag", "int_events_daily__funding", "int_events_aux_prs", @@ -328,6 +329,35 @@ def add_project_and_collection_entity_category_tags( ), additional_tags=["data_category=code"], ), + "developer_active_days_by_opendevdata": MetricQueryDef( + ref="code/active_days_by_opendevdata.sql", + time_aggregations=[ + "monthly", + # "quarterly", + # "biannually", + "yearly", + ], + entity_types=["artifact", "project", "collection"], + is_intermediate=True, + additional_tags=["data_category=code", "source=opendevdata"], + ), + "developer_activity_classification_by_opendevdata": MetricQueryDef( + ref="code/developer_activity_classification_by_opendevdata.sql", + vars={ + "full_time_ratio": 10 / 30, + }, + time_aggregations=[ + "monthly", + # "quarterly", + # "biannually", + "yearly", + ], + metadata=MetricMetadata( + display_name="Developer Classifications (OpenDevData)", + description="Metrics related to developer activity classifications based on OpenDevData", + ), + additional_tags=["data_category=code", "source=opendevdata"], + ), "contributor_classifications": MetricQueryDef( ref="code/contributor_activity_classification.sql", vars={ diff --git a/warehouse/oso_sqlmesh/oso_metrics/code/active_days_by_opendevdata.sql b/warehouse/oso_sqlmesh/oso_metrics/code/active_days_by_opendevdata.sql new file mode 100644 index 0000000000..8e60b2424a --- /dev/null +++ b/warehouse/oso_sqlmesh/oso_metrics/code/active_days_by_opendevdata.sql @@ -0,0 +1,11 @@ +select + @metrics_sample_date(events.bucket_day) as metrics_sample_date, + events.event_source, + events.to_artifact_id, + events.from_artifact_id as from_artifact_id, + @metric_name() as metric, + count(distinct events.bucket_day) as amount +from oso.int_events_daily__opendevdata as events +where + events.bucket_day between @metrics_start('DATE') and @metrics_end('DATE') +group by 1, metric, from_artifact_id, to_artifact_id, event_source \ No newline at end of file diff --git a/warehouse/oso_sqlmesh/oso_metrics/code/developer_activity_classification_by_opendevdata.sql b/warehouse/oso_sqlmesh/oso_metrics/code/developer_activity_classification_by_opendevdata.sql new file mode 100644 index 0000000000..e46e40b584 --- /dev/null +++ b/warehouse/oso_sqlmesh/oso_metrics/code/developer_activity_classification_by_opendevdata.sql @@ -0,0 +1,64 @@ +select active.metrics_sample_date, + active.event_source, + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), + '' as from_artifact_id, + @metric_name('full_time_developers') as metric, + COUNT(DISTINCT active.from_artifact_id) as amount +from @metrics_peer_ref( + developer_active_days_by_opendevdata, + time_aggregation := @time_aggregation, + ) as active +where active.amount / @metrics_sample_interval_length(active.metrics_sample_date, 'day') >= @full_time_ratio +group by metric, + from_artifact_id, + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + ), + event_source, + metrics_sample_date +union all +select active.metrics_sample_date, + active.event_source, + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), + '' as from_artifact_id, + @metric_name('part_time_developers') as metric, + COUNT(DISTINCT active.from_artifact_id) as amount +from @metrics_peer_ref( + developer_active_days_by_opendevdata, + time_aggregation := @time_aggregation, + ) as active +where active.amount / @metrics_sample_interval_length(active.metrics_sample_date, 'day') < @full_time_ratio +group by metric, + from_artifact_id, + @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + event_source, + metrics_sample_date +union all +select active.metrics_sample_date, + active.event_source, + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), + '' as from_artifact_id, + @metric_name('active_developers') as metric, + COUNT(DISTINCT active.from_artifact_id) as amount +from @metrics_peer_ref( + developer_active_days_by_opendevdata, + time_aggregation := @time_aggregation, + ) as active +group by metric, + from_artifact_id, + @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + event_source, + metrics_sample_date