diff --git a/Dockerfile b/Dockerfile index 2f87a0a..13f7757 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ -FROM postgis/postgis:14-3.4 as pg +FROM postgis/postgis:16-3.4 as pg LABEL maintainer="OpenAQ" # defaults that may be overwritten by env ENV POSTGIS_MAJOR 3 -ENV POSTGRESQL_MAJOR 14 +ENV POSTGRESQL_MAJOR 16 ENV PGUSER postgres ENV PGDATABASE postgres ENV PGHOST localhost @@ -24,6 +24,7 @@ EXPOSE 5432 # Docker image will automatically run scripts in `/docker-entrypoint-initdb.d` RUN mkdir -p /docker-entrypoint-initdb.d \ && echo "#!/bin/bash" >/docker-entrypoint-initdb.d/001_initdb.sh \ - && echo "/openaqdb/init.sh" >> /docker-entrypoint-initdb.d/001_initdb.sh + && echo "/openaqdb/init.sh" >> /docker-entrypoint-initdb.d/001_initdb.sh \ + && echo "/openaqdb/mock.sh" >> /docker-entrypoint-initdb.d/001_initdb.sh WORKDIR /openaqdb \ No newline at end of file diff --git a/migra/README.md b/migra/README.md index b441421..075fba0 100644 --- a/migra/README.md +++ b/migra/README.md @@ -41,7 +41,7 @@ REMOTE_DATABASE_URL=postgresql://user:pwd@host:port/openaq ``` ```sh -ENV=docker python3 compare.py +DOTENV=.env.docker python3 compare.py ``` ## Notes diff --git a/migra/compare.py b/migra/compare.py index a4da228..e1dbb15 100644 --- a/migra/compare.py +++ b/migra/compare.py @@ -21,6 +21,6 @@ m.add_all_changes(privileges=False) sql = m.sql.encode('utf8') - with open(f'migra/patches/patch_{today}_chk.sql', 'w') as f: + with open(f'migra/patches/patch_{today}.sql', 'w') as f: for statement in m.statements: f.write(f"----------------\n{statement}\n") diff --git a/migra/patches/patch_20240819.sql b/migra/patches/patch_20240819.sql new file mode 100644 index 0000000..30f0233 --- /dev/null +++ b/migra/patches/patch_20240819.sql @@ -0,0 +1,978 @@ +---------------- +alter table "public"."rollups" drop constraint "rollups_groups_id_fkey"; +---------------- +alter table "public"."sensors_latest" drop constraint "sensors_latest_sensors_id_fkey"; +---------------- +drop index if exists "public"."locations_base_v2_country_idx"; +---------------- +drop index if exists "public"."locations_base_v2_geog_idx"; +---------------- +drop index if exists "public"."locations_base_v2_id_idx"; +---------------- +drop index if exists "public"."locations_base_v2_lastUpdated_idx"; +---------------- +drop index if exists "public"."locations_base_v2_name_idx"; +---------------- +drop index if exists "public"."locations_base_v2_parameters_idx"; +---------------- +drop index if exists "public"."locations_bounds_idx"; +---------------- +drop index if exists "public"."locations_geom_idx"; +---------------- +drop index if exists "public"."locations_last_datetime_idx"; +---------------- +drop index if exists "public"."locations_location_id_idx"; +---------------- +drop index if exists "public"."locations_measurands_id_idx"; +---------------- +drop index if exists "public"."sensor_nodes_json_geog_idx"; +---------------- +drop index if exists "public"."sensor_nodes_json_json_idx"; +---------------- +drop index if exists "public"."sensor_stats_sensors_id_idx"; +---------------- +drop index if exists "public"."sensors_first_last_sensors_id_idx"; +---------------- +drop function if exists "public"."calculate_daily_data_jobs"(n integer); +---------------- +drop function if exists "public"."calculate_rollup_daily_stats"(day date); +---------------- +drop materialized view if exists "public"."city_stats"; +---------------- +drop materialized view if exists "public"."country_stats"; +---------------- +drop function if exists "public"."get_closest_countries_id"(g geometry, w integer); +---------------- +drop function if exists "public"."initialize_daily_stats"(sd date, ed date); +---------------- +drop procedure if exists "public"."intialize_sensors_rollup"(); +---------------- +drop materialized view if exists "public"."locations"; +---------------- +drop materialized view if exists "public"."locations_base_v2"; +---------------- +drop function if exists "public"."measurements_per_hour"(dur interval); +---------------- +drop function if exists "public"."measurements_per_hour"(param text, dur interval); +---------------- +drop materialized view if exists "public"."sensor_nodes_json"; +---------------- +drop view if exists "public"."sensor_systems_json"; +---------------- +drop materialized view if exists "public"."sensors_first_last"; +---------------- +drop materialized view if exists "public"."sensors_full_flat"; +---------------- +drop view if exists "public"."sensors_full_json"; +---------------- +drop materialized view if exists "public"."locations_view_cached"; +---------------- +drop view if exists "public"."sensors_full"; +---------------- +drop view if exists "public"."locations_view"; +---------------- +drop materialized view if exists "public"."sensor_stats"; +---------------- +alter table "public"."rollups" drop constraint "rollups_pkey"; +---------------- +alter table "public"."sensors_latest" drop constraint "sensors_latest_pkey"; +---------------- +drop index if exists "public"."rollups_measurands_id_idx"; +---------------- +drop index if exists "public"."rollups_pkey"; +---------------- +drop index if exists "public"."rollups_rollup_idx"; +---------------- +drop index if exists "public"."rollups_sensors_id_idx"; +---------------- +drop index if exists "public"."rollups_st_idx"; +---------------- +drop index if exists "public"."sensors_latest_pkey"; +---------------- +drop table "public"."rollups"; +---------------- +drop table "public"."sensors_latest"; +---------------- +drop table "public"."sensors_rollup_patch"; +---------------- +create table "public"."annual_data" ( + "sensors_id" integer not null, + "datetime" date not null, + "datetime_first" timestamp with time zone not null, + "datetime_last" timestamp with time zone not null, + "value_count" integer not null, + "value_avg" double precision, + "value_sd" double precision, + "value_min" double precision, + "value_max" double precision, + "value_p02" double precision, + "value_p25" double precision, + "value_p50" double precision, + "value_p75" double precision, + "value_p98" double precision, + "value_raw_avg" double precision, + "value_raw_count" double precision, + "value_raw_min" double precision, + "value_raw_max" double precision, + "error_count" integer, + "error_raw_count" integer, + "updated_on" timestamp with time zone, + "calculated_on" timestamp with time zone, + "calculated_count" integer default 1 +); + +---------------- +create table "public"."annual_data_queue" ( + "datetime" date not null, + "tz_offset" integer not null, + "added_on" timestamp with time zone not null default now(), + "queued_on" timestamp with time zone, + "modified_on" timestamp with time zone, + "modified_count" integer not null default 0, + "calculated_on" timestamp with time zone, + "calculated_count" integer not null default 0, + "calculated_seconds" real, + "sensor_nodes_count" integer, + "sensors_count" integer, + "measurements_count" integer, + "measurements_raw_count" integer +); + +---------------- +create table "public"."annual_stats" ( + "datetime" date not null, + "added_on" timestamp with time zone not null default now(), + "modified_on" timestamp with time zone, + "calculated_count" integer not null default 0, + "updated_on" timestamp with time zone, + "calculated_on" timestamp with time zone, + "sensor_nodes_count" integer, + "measurements_count" integer, + "measurements_raw_count" integer, + "sensors_count" integer +); + +---------------- +create table "public"."daily_exported_stats" ( + "day" date not null, + "sensor_nodes_count" bigint not null, + "sensors_count" bigint not null, + "hours_count" bigint not null, + "measurements_count" bigint not null, + "export_path" text, + "calculated_on" timestamp without time zone, + "initiated_on" timestamp without time zone, + "exported_on" timestamp without time zone, + "metadata" jsonb +); + +---------------- +alter table "public"."analyses_summary" drop column "first_datetime"; +---------------- +alter table "public"."analyses_summary" drop column "last_datetime"; +---------------- +alter table "public"."analyses_summary" add column "datetime_first" timestamp with time zone; +---------------- +alter table "public"."analyses_summary" add column "datetime_last" timestamp with time zone; +---------------- +alter table "public"."daily_data" drop column "first_datetime"; +---------------- +alter table "public"."daily_data" drop column "last_datetime"; +---------------- +alter table "public"."daily_data" add column "datetime_first" timestamp with time zone not null; +---------------- +alter table "public"."daily_data" add column "datetime_last" timestamp with time zone not null; +---------------- +alter table "public"."daily_data_queue" add column "calculated_seconds" real; +---------------- +alter table "public"."hourly_data" drop column "first_datetime"; +---------------- +alter table "public"."hourly_data" drop column "last_datetime"; +---------------- +alter table "public"."hourly_data" add column "datetime_first" timestamp with time zone not null; +---------------- +alter table "public"."hourly_data" add column "datetime_last" timestamp with time zone not null; +---------------- +alter table "public"."licenses" alter column "attribution_required" set not null; +---------------- +alter table "public"."licenses" alter column "commercial_use_allowed" set not null; +---------------- +alter table "public"."licenses" alter column "modification_allowed" set not null; +---------------- +alter table "public"."licenses" alter column "redistribution_allowed" set not null; +---------------- +alter table "public"."licenses" alter column "share_alike_required" set not null; +---------------- +alter table "public"."sensors_rollup" alter column "datetime_first" set not null; +---------------- +alter table "public"."sensors_rollup" alter column "datetime_last" set not null; +---------------- +alter table "public"."sensors_rollup" alter column "value_avg" set not null; +---------------- +alter table "public"."sensors_rollup" alter column "value_latest" set not null; +---------------- +alter table "public"."sensors_rollup" alter column "value_max" set not null; +---------------- +alter table "public"."sensors_rollup" alter column "value_min" set not null; +---------------- +alter table "public"."sensors_rollup" alter column "value_sd" set not null; +---------------- +drop extension if exists "pgbouncer_fdw"; +---------------- +CREATE INDEX annual_data_day_idx ON public.annual_data USING btree (datetime); +---------------- +CREATE UNIQUE INDEX annual_data_queue_datetime_tz_offset_key ON public.annual_data_queue USING btree (datetime, tz_offset); +---------------- +CREATE UNIQUE INDEX annual_data_sensors_id_datetime_key ON public.annual_data USING btree (sensors_id, datetime); +---------------- +CREATE INDEX annual_data_sensors_id_idx ON public.annual_data USING btree (sensors_id); +---------------- +CREATE UNIQUE INDEX annual_stats_pkey ON public.annual_stats USING btree (datetime); +---------------- +CREATE UNIQUE INDEX daily_exported_stats_day_key ON public.daily_exported_stats USING btree (day); +---------------- +alter table "public"."annual_stats" add constraint "annual_stats_pkey" PRIMARY KEY using index "annual_stats_pkey"; +---------------- +alter table "public"."annual_data" add constraint "annual_data_sensors_id_datetime_key" UNIQUE using index "annual_data_sensors_id_datetime_key"; +---------------- +alter table "public"."annual_data" add constraint "annual_data_sensors_id_fkey" FOREIGN KEY (sensors_id) REFERENCES sensors(sensors_id) ON DELETE CASCADE not valid; +---------------- +alter table "public"."annual_data" validate constraint "annual_data_sensors_id_fkey"; +---------------- +alter table "public"."annual_data_queue" add constraint "annual_data_queue_datetime_tz_offset_key" UNIQUE using index "annual_data_queue_datetime_tz_offset_key"; +---------------- +alter table "public"."daily_data" add constraint "daily_data_sensors_id_fkey" FOREIGN KEY (sensors_id) REFERENCES sensors(sensors_id) ON DELETE CASCADE not valid; +---------------- +alter table "public"."daily_data" validate constraint "daily_data_sensors_id_fkey"; +---------------- +alter table "public"."daily_exported_stats" add constraint "daily_exported_stats_day_key" UNIQUE using index "daily_exported_stats_day_key"; +---------------- +set check_function_bodies = off; +---------------- +CREATE OR REPLACE FUNCTION public.as_local(dt timestamp with time zone, tz text) + RETURNS timestamp with time zone + LANGUAGE sql + IMMUTABLE PARALLEL SAFE STRICT +AS $function$ +SELECT timezone(tz, dt); +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.as_year(tstz timestamp with time zone, tz text) + RETURNS date + LANGUAGE sql + IMMUTABLE PARALLEL SAFE +AS $function$ +SELECT date_trunc('year', timezone(tz, tstz + '-1sec'::interval))::date; +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.calculate_annual_data_by_offset(dy date DEFAULT (CURRENT_DATE - 1), tz_offset integer DEFAULT 0) + RETURNS TABLE(sensors_id integer, sensor_nodes_id integer, datetime date, updated_on timestamp with time zone, datetime_first timestamp with time zone, datetime_last timestamp with time zone, value_count bigint, value_avg real, value_sd real, value_min real, value_max real, value_raw_count bigint, value_raw_avg real, value_raw_min real, value_raw_max real, value_p02 real, value_p25 real, value_p50 real, value_p75 real, value_p98 real, error_raw_count bigint, error_count bigint) + LANGUAGE sql +AS $function$ +SELECT + m.sensors_id +, sn.sensor_nodes_id +, as_year(m.datetime, t.tzid) as datetime +, MAX(m.updated_on) as updated_on +, MIN(datetime_first) as datetime_first +, MAX(datetime_last) as datetime_last +, COUNT(1) AS value_count +, AVG(value_avg) as value_avg +, STDDEV(value_avg) as value_sd +, MIN(value_avg) as value_min +, MAX(value_avg) as value_max +, SUM(value_count) as value_raw_count +, SUM(value_avg*value_count)/SUM(value_count) as value_raw_avg +, MIN(value_min) as value_raw_min +, MAX(value_max) as value_raw_max +, PERCENTILE_CONT(0.02) WITHIN GROUP(ORDER BY value_avg) as value_p02 +, PERCENTILE_CONT(0.25) WITHIN GROUP(ORDER BY value_avg) as value_p25 +, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY value_avg) as value_p50 +, PERCENTILE_CONT(0.75) WITHIN GROUP(ORDER BY value_avg) as value_p75 +, PERCENTILE_CONT(0.98) WITHIN GROUP(ORDER BY value_avg) as value_p98 +, SUM(error_count) as error_raw_count +, SUM((value_avg IS NULL)::int) as error_count +FROM hourly_data m +JOIN sensors s ON (m.sensors_id = s.sensors_id) +JOIN sensor_systems sy ON (s.sensor_systems_id = sy.sensor_systems_id) +JOIN sensor_nodes sn ON (sy.sensor_nodes_id = sn.sensor_nodes_id) +JOIN timezones t ON (sn.timezones_id = t.timezones_id) +WHERE value_count > 0 +AND datetime > as_utc(date_trunc('year', dy), t.tzid) +AND datetime <= as_utc(date_trunc('year', dy + '1year'::interval), t.tzid) +AND utc_offset_hours(dy, t.tzid) = tz_offset +GROUP BY 1,2,3 +HAVING COUNT(1) > 0; + $function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.calculate_daily_data_by_offset(dy date DEFAULT (CURRENT_DATE - 1), tz_offset integer DEFAULT 0) + RETURNS TABLE(sensors_id integer, sensor_nodes_id integer, datetime date, updated_on timestamp with time zone, datetime_first timestamp with time zone, datetime_last timestamp with time zone, value_count bigint, value_avg real, value_sd real, value_min real, value_max real, value_raw_count bigint, value_raw_avg real, value_raw_min real, value_raw_max real, value_p02 real, value_p25 real, value_p50 real, value_p75 real, value_p98 real, error_raw_count bigint, error_count bigint) + LANGUAGE sql +AS $function$ +SELECT + m.sensors_id +, sn.sensor_nodes_id +, as_date(m.datetime, t.tzid) as datetime +, MAX(m.updated_on) as updated_on +, MIN(datetime_first) as datetime_first +, MAX(datetime_last) as datetime_last +, COUNT(1) AS value_count +, AVG(value_avg) as value_avg +, STDDEV(value_avg) as value_sd +, MIN(value_avg) as value_min +, MAX(value_avg) as value_max +, SUM(value_count) as value_raw_count +, SUM(value_avg*value_count)/SUM(value_count) as value_raw_avg +, MIN(value_min) as value_raw_min +, MAX(value_max) as value_raw_max +, PERCENTILE_CONT(0.02) WITHIN GROUP(ORDER BY value_avg) as value_p02 +, PERCENTILE_CONT(0.25) WITHIN GROUP(ORDER BY value_avg) as value_p25 +, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY value_avg) as value_p50 +, PERCENTILE_CONT(0.75) WITHIN GROUP(ORDER BY value_avg) as value_p75 +, PERCENTILE_CONT(0.98) WITHIN GROUP(ORDER BY value_avg) as value_p98 +, SUM(error_count) as error_raw_count +, SUM((value_avg IS NULL)::int) as error_count +FROM hourly_data m +JOIN sensors s ON (m.sensors_id = s.sensors_id) +JOIN sensor_systems sy ON (s.sensor_systems_id = sy.sensor_systems_id) +JOIN sensor_nodes sn ON (sy.sensor_nodes_id = sn.sensor_nodes_id) +JOIN timezones t ON (sn.timezones_id = t.timezones_id) +WHERE value_count > 0 +AND datetime > as_utc(dy, t.tzid) +AND datetime <= as_utc(dy + 1, t.tzid) +AND utc_offset_hours(dy, t.tzid) = tz_offset +GROUP BY 1,2,3 +HAVING COUNT(1) > 0; + $function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.calculate_rollup_daily_exported_stats(day date) + RETURNS bigint + LANGUAGE sql +AS $function$ +WITH data AS ( + SELECT (datetime - '1sec'::interval)::date as day + , h.sensors_id + , sensor_nodes_id + , value_count + FROM hourly_data h + JOIN sensors s ON (h.sensors_id = s.sensors_id) + JOIN sensor_systems ss ON (s.sensor_systems_id = ss.sensor_systems_id) + WHERE datetime > day::timestamp + AND datetime <= day + '1day'::interval +), inserts AS ( +INSERT INTO daily_exported_stats ( + day +, sensor_nodes_count +, sensors_count +, hours_count +, measurements_count +, calculated_on +) +SELECT day +, COUNT(DISTINCT sensor_nodes_id) as sensor_nodes_count +, COUNT(DISTINCT sensors_id) as sensors_count +, COUNT(1) as hours_count +, SUM(value_count) as measurements_count +, current_timestamp +FROM data +GROUP BY day +ON CONFLICT (day) DO UPDATE +SET sensor_nodes_count = EXCLUDED.sensor_nodes_count +, sensors_count = EXCLUDED.sensors_count +, hours_count = EXCLUDED.hours_count +, measurements_count = EXCLUDED.measurements_count +, calculated_on = EXCLUDED.calculated_on +RETURNING measurements_count) +SELECT measurements_count +FROM inserts; +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.daily_data_updated_event(dy date, tz_offset_int integer) + RETURNS boolean + LANGUAGE sql +AS $function$ + SELECT update_annual_data_queue(dy, tz_offset_int)>0; +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.fetch_annual_data_jobs(n integer DEFAULT 1, min_day date DEFAULT NULL::date, max_day date DEFAULT NULL::date) + RETURNS TABLE(datetime date, tz_offset integer, queued_on timestamp with time zone) + LANGUAGE plpgsql +AS $function$ + BEGIN + RETURN QUERY + UPDATE annual_data_queue + SET queued_on = CURRENT_TIMESTAMP + , calculated_count = calculated_count + 1 + FROM ( + SELECT q.datetime + , q.tz_offset + FROM annual_data_queue q + -- Its either not been calculated or its been modified + WHERE q.datetime >= COALESCE(min_day, '-infinity'::date) + AND q.datetime <= COALESCE(max_day, current_date - '1year'::interval) + AND (q.calculated_on IS NULL OR q.modified_on > q.calculated_on) + -- either its never been or it was resently modified but not queued + AND (q.queued_on IS NULL -- has not been queued + OR ( + q.queued_on < now() - '1h'::interval -- a set amount of time has passed AND + AND ( + q.queued_on < q.modified_on -- its been changed since being queued + OR calculated_on IS NULL -- it was never calculated + ) + )) + ORDER BY q.datetime, q.tz_offset + LIMIT n + FOR UPDATE SKIP LOCKED + ) as d + WHERE d.datetime = annual_data_queue.datetime + AND d.tz_offset = annual_data_queue.tz_offset + RETURNING annual_data_queue.datetime + , annual_data_queue.tz_offset + , annual_data_queue.queued_on; + END; +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.get_datetime_object(tstz date, tz text DEFAULT 'UTC'::text) + RETURNS json + LANGUAGE sql + IMMUTABLE PARALLEL SAFE +AS $function$ + SELECT get_datetime_object(tstz::timestamp, tz); +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.get_measurands_id(m text) + RETURNS integer + LANGUAGE sql + IMMUTABLE PARALLEL SAFE STRICT +AS $function$ +SELECT measurands_id +FROM measurands +WHERE lower(measurand) = lower(m) +LIMIT 1; +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.hourly_data_updated_event(hr timestamp with time zone) + RETURNS boolean + LANGUAGE sql +AS $function$ + SELECT update_daily_data_queue(hr)>0; +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.initialize_daily_exported_stats(sd date DEFAULT '-infinity'::date, ed date DEFAULT 'infinity'::date) + RETURNS bigint + LANGUAGE sql +AS $function$ +WITH first_and_last AS ( +SELECT MIN(datetime) as datetime_first +, MAX(datetime) as datetime_last +FROM measurements +WHERE datetime >= sd +AND datetime <= ed +), datetimes AS ( +SELECT generate_series( + date_trunc('day', datetime_first) + , date_trunc('day', datetime_last) + , '1day'::interval) as day +FROM first_and_last +), inserts AS ( +INSERT INTO daily_exported_stats (day, sensor_nodes_count, sensors_count, measurements_count, hours_count) +SELECT day::date, -1, -1, -1, -1 +FROM datetimes +WHERE has_measurement(day::date) +ON CONFLICT (day) DO NOTHING +RETURNING 1) +SELECT COUNT(1) FROM inserts; +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.insert_annual_data_by_offset(dy date DEFAULT (CURRENT_DATE - 1), tz_offset integer DEFAULT 0) + RETURNS TABLE(sensor_nodes_count bigint, sensors_count bigint, measurements_hourly_count bigint, measurements_count bigint) + LANGUAGE sql +AS $function$ +SET LOCAL work_mem = '512MB'; +WITH data_rollup AS ( + SELECT * + FROM calculate_annual_data_by_offset(dy, tz_offset) +), data_inserted AS ( +INSERT INTO annual_data ( + sensors_id +, datetime +, updated_on +, datetime_first +, datetime_last +, value_count +, value_avg +, value_sd +, value_min +, value_max +, value_raw_count +, value_raw_avg +, value_raw_min +, value_raw_max +, value_p02 +, value_p25 +, value_p50 +, value_p75 +, value_p98 +, error_count +, error_raw_count +, calculated_on) + SELECT sensors_id +, datetime +, updated_on +, datetime_first +, datetime_last +, value_count +, value_avg +, value_sd +, value_min +, value_max +, value_raw_count +, value_raw_avg +, value_raw_min +, value_raw_max +, value_p02 +, value_p25 +, value_p50 +, value_p75 +, value_p98 +, error_count +, error_raw_count +, current_timestamp as calculated_on + FROM data_rollup +ON CONFLICT (sensors_id, datetime) DO UPDATE +SET datetime_first = EXCLUDED.datetime_first +, datetime_last = EXCLUDED.datetime_last +, updated_on = EXCLUDED.updated_on +, value_avg = EXCLUDED.value_avg +, value_min = EXCLUDED.value_min +, value_max = EXCLUDED.value_max +, value_count = EXCLUDED.value_count +, value_raw_avg = EXCLUDED.value_raw_avg +, value_raw_min = EXCLUDED.value_raw_min +, value_raw_max = EXCLUDED.value_raw_max +, value_raw_count = EXCLUDED.value_raw_count +, value_p02 = EXCLUDED.value_p02 +, value_p25 = EXCLUDED.value_p25 +, value_p50 = EXCLUDED.value_p50 +, value_p75 = EXCLUDED.value_p75 +, value_p98 = EXCLUDED.value_p98 +, error_count = EXCLUDED.error_count +, error_raw_count = EXCLUDED.error_raw_count +, calculated_on = EXCLUDED.calculated_on + RETURNING sensors_id, value_count, value_raw_count + ) SELECT COUNT(DISTINCT sensors_id) as sensors_count + , COUNT(DISTINCT sensor_nodes_id) as sensor_nodes_count + , SUM(value_count) as measurements_hourly_count + , SUM(value_raw_count) as measurements_count + FROM data_rollup; +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.insert_daily_data_by_offset(dy date DEFAULT (CURRENT_DATE - 1), tz_offset integer DEFAULT 0) + RETURNS TABLE(sensor_nodes_count bigint, sensors_count bigint, measurements_hourly_count bigint, measurements_count bigint) + LANGUAGE sql +AS $function$ +SET LOCAL work_mem = '512MB'; +WITH data_rollup AS ( + SELECT * + FROM calculate_daily_data_by_offset(dy, tz_offset) +), data_inserted AS ( +INSERT INTO daily_data ( + sensors_id +, datetime +, updated_on +, datetime_first +, datetime_last +, value_count +, value_avg +, value_sd +, value_min +, value_max +, value_raw_count +, value_raw_avg +, value_raw_min +, value_raw_max +, value_p02 +, value_p25 +, value_p50 +, value_p75 +, value_p98 +, error_count +, error_raw_count +, calculated_on) + SELECT sensors_id +, datetime +, updated_on +, datetime_first +, datetime_last +, value_count +, value_avg +, value_sd +, value_min +, value_max +, value_raw_count +, value_raw_avg +, value_raw_min +, value_raw_max +, value_p02 +, value_p25 +, value_p50 +, value_p75 +, value_p98 +, error_count +, error_raw_count +, current_timestamp as calculated_on + FROM data_rollup +ON CONFLICT (sensors_id, datetime) DO UPDATE +SET datetime_first = EXCLUDED.datetime_first +, datetime_last = EXCLUDED.datetime_last +, updated_on = EXCLUDED.updated_on +, value_avg = EXCLUDED.value_avg +, value_min = EXCLUDED.value_min +, value_max = EXCLUDED.value_max +, value_count = EXCLUDED.value_count +, value_raw_avg = EXCLUDED.value_raw_avg +, value_raw_min = EXCLUDED.value_raw_min +, value_raw_max = EXCLUDED.value_raw_max +, value_raw_count = EXCLUDED.value_raw_count +, value_p02 = EXCLUDED.value_p02 +, value_p25 = EXCLUDED.value_p25 +, value_p50 = EXCLUDED.value_p50 +, value_p75 = EXCLUDED.value_p75 +, value_p98 = EXCLUDED.value_p98 +, error_count = EXCLUDED.error_count +, error_raw_count = EXCLUDED.error_raw_count +, calculated_on = EXCLUDED.calculated_on + RETURNING sensors_id, value_count, value_raw_count + ) SELECT COUNT(DISTINCT sensors_id) as sensors_count + , COUNT(DISTINCT sensor_nodes_id) as sensor_nodes_count + , SUM(value_count) as measurements_hourly_count + , SUM(value_raw_count) as measurements_count + FROM data_rollup; +$function$ +; +---------------- +create or replace view "public"."location_licenses_view" as SELECT sn.sensor_nodes_id, + json_agg(json_build_object('id', pl.licenses_id, 'name', l.name, 'date_from', lower(pl.active_period), 'date_to', upper(pl.active_period), 'attribution', json_build_object('name', e.full_name, 'url', COALESCE((e.metadata ->> 'url'::text), NULL::text)))) AS licenses, + array_agg(DISTINCT pl.licenses_id) AS license_ids + FROM (((providers_licenses pl + JOIN sensor_nodes sn USING (providers_id)) + JOIN entities e ON ((sn.owner_entities_id = e.entities_id))) + JOIN licenses l ON ((l.licenses_id = pl.licenses_id))) + GROUP BY sn.sensor_nodes_id; + +---------------- +CREATE OR REPLACE PROCEDURE public.update_annual_data(IN n integer DEFAULT 5, IN min_day date DEFAULT NULL::date, IN max_day date DEFAULT NULL::date) + LANGUAGE plpgsql +AS $procedure$ +DECLARE + rw record; +BEGIN +FOR rw IN ( + SELECT datetime + , tz_offset + FROM fetch_annual_data_jobs(n, min_day, max_day)) +LOOP + RAISE NOTICE 'updating year: % - %', rw.datetime, rw.tz_offset; + PERFORM update_annual_data(rw.datetime, rw.tz_offset); + COMMIT; +END LOOP; +END; +$procedure$ +; +---------------- +CREATE OR REPLACE FUNCTION public.update_annual_data(dy date DEFAULT (CURRENT_DATE - 1), tz_offset_int integer DEFAULT 0) + RETURNS bigint + LANGUAGE plpgsql +AS $function$ +DECLARE +nw timestamptz := clock_timestamp(); +mc bigint; +BEGIN +WITH inserted AS ( + SELECT sensor_nodes_count + , sensors_count + , measurements_hourly_count + , measurements_count + FROM insert_annual_data_by_offset(dy, tz_offset_int)) + INSERT INTO annual_data_queue ( + datetime + , tz_offset + , calculated_on + , calculated_count + , sensor_nodes_count + , sensors_count + , measurements_count + , measurements_raw_count + , calculated_seconds + ) + SELECT dy + , tz_offset_int + , now() + , 1 + , sensor_nodes_count + , sensors_count + , measurements_hourly_count + , measurements_count + , EXTRACT(EPOCH FROM clock_timestamp() - nw) + FROM inserted + ON CONFLICT (datetime, tz_offset) DO UPDATE + SET calculated_on = EXCLUDED.calculated_on + , calculated_count = annual_data_queue.calculated_count + 1 + , measurements_count = EXCLUDED.measurements_count + , sensors_count = EXCLUDED.sensors_count + , calculated_seconds = EXCLUDED.calculated_seconds + RETURNING measurements_count INTO mc; + -- PERFORM annual_data_updated_event(dy, tz_offset_int); + RETURN mc; +END; +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.update_annual_data_queue(dt date, tz_offset_int integer) + RETURNS bigint + LANGUAGE sql +AS $function$ + WITH annual_inserts AS ( + INSERT INTO annual_data_queue (datetime, tz_offset) VALUES + (date_trunc('year', dt + make_interval(hours=>tz_offset_int, secs=>-1)) + , tz_offset_int) + ON CONFLICT (datetime, tz_offset) DO UPDATE + SET modified_on = now() + , modified_count = annual_data_queue.modified_count + 1 + RETURNING datetime, tz_offset + ) SELECT COUNT(*) + FROM annual_inserts; + $function$ +; +---------------- +CREATE OR REPLACE PROCEDURE public.update_daily_data(IN n integer DEFAULT 5, IN min_day date DEFAULT NULL::date, IN max_day date DEFAULT NULL::date) + LANGUAGE plpgsql +AS $procedure$ +DECLARE + rw record; +BEGIN +FOR rw IN ( + SELECT datetime + , tz_offset + FROM fetch_daily_data_jobs(n, min_day, max_day)) +LOOP + RAISE NOTICE 'updating day: % - %', rw.datetime, rw.tz_offset; + PERFORM update_daily_data(rw.datetime, rw.tz_offset); + COMMIT; +END LOOP; +END; +$procedure$ +; +---------------- +CREATE OR REPLACE FUNCTION public.update_daily_data(dy date DEFAULT (CURRENT_DATE - 1), tz_offset_int integer DEFAULT 0) + RETURNS bigint + LANGUAGE plpgsql +AS $function$ +DECLARE +nw timestamptz := clock_timestamp(); +mc bigint; +BEGIN +WITH inserted AS ( + SELECT sensor_nodes_count + , sensors_count + , measurements_hourly_count + , measurements_count + FROM insert_daily_data_by_offset(dy, tz_offset_int)) + INSERT INTO daily_data_queue ( + datetime + , tz_offset + , calculated_on + , calculated_count + , sensor_nodes_count + , sensors_count + , measurements_count + , measurements_raw_count + , calculated_seconds + ) + SELECT dy + , tz_offset_int + , now() + , 1 + , sensor_nodes_count + , sensors_count + , measurements_hourly_count + , measurements_count + , EXTRACT(EPOCH FROM clock_timestamp() - nw) + FROM inserted + ON CONFLICT (datetime, tz_offset) DO UPDATE + SET calculated_on = EXCLUDED.calculated_on + , calculated_count = daily_data_queue.calculated_count + 1 + , measurements_count = EXCLUDED.measurements_count + , sensors_count = EXCLUDED.sensors_count + , calculated_seconds = EXCLUDED.calculated_seconds + RETURNING measurements_count INTO mc; + PERFORM daily_data_updated_event(dy, tz_offset_int); + RETURN mc; +END; +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.update_daily_data_queue(dt timestamp with time zone) + RETURNS bigint + LANGUAGE sql +AS $function$ + WITH affected_offsets AS ( + -- the following will just queue up every possible offset + -- regardless of whether we have a sensor node + -- SELECT generate_series(-12, 14, 1) as tz_offset + -- and this will only do the queue the offsets that we have + SELECT utc_offset_hours(dt, tzid) as tz_offset + FROM sensor_nodes n + JOIN timezones t USING (timezones_id) + GROUP BY 1 + ), daily_inserts AS ( + INSERT INTO daily_data_queue (datetime, tz_offset) + SELECT (dt + make_interval(hours=>tz_offset::int, secs=>-1))::date + , tz_offset + FROM affected_offsets + ON CONFLICT (datetime, tz_offset) DO UPDATE + SET modified_on = now() + , modified_count = daily_data_queue.modified_count + 1 + RETURNING datetime, tz_offset + ) SELECT COUNT(*) + FROM daily_inserts; + $function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.utc_offset_hours(dt date, tz text) + RETURNS integer + LANGUAGE sql + IMMUTABLE PARALLEL SAFE STRICT +AS $function$ +SELECT date_part('hours', utc_offset(dt::timestamptz,tz)) as tz_offset +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.utc_offset_hours(dt timestamp with time zone, tz text) + RETURNS integer + LANGUAGE sql + IMMUTABLE PARALLEL SAFE STRICT +AS $function$ +SELECT date_part('hours', utc_offset(dt,tz)) as tz_offset +$function$ +; +---------------- +CREATE OR REPLACE FUNCTION public.utc_offset_hours(tz text) + RETURNS integer + LANGUAGE sql + IMMUTABLE PARALLEL SAFE STRICT +AS $function$ +SELECT date_part('hours', utc_offset(tz)) as tz_offset +$function$ +; +---------------- +create or replace view "public"."locations_view" as WITH nodes_instruments AS ( + SELECT sn.sensor_nodes_id, + bool_or(i.is_monitor) AS is_monitor, + json_agg(json_build_object('id', i.instruments_id, 'name', i.label, 'manufacturer', jsonb_build_object('id', i.manufacturer_entities_id, 'name', mc.full_name))) AS instruments, + array_agg(DISTINCT i.instruments_id) AS instrument_ids, + array_agg(DISTINCT mc.full_name) AS manufacturers, + array_agg(DISTINCT i.manufacturer_entities_id) AS manufacturer_ids + FROM (((sensor_nodes sn + JOIN sensor_systems ss USING (sensor_nodes_id)) + JOIN instruments i USING (instruments_id)) + JOIN entities mc ON ((mc.entities_id = i.manufacturer_entities_id))) + GROUP BY sn.sensor_nodes_id + ), nodes_sensors AS ( + SELECT sn.sensor_nodes_id, + min(sl.datetime_first) AS datetime_first, + max(sl.datetime_last) AS datetime_last, + json_agg(jsonb_build_object('id', s.sensors_id, 'name', ((m.measurand || ' '::text) || m.units), 'parameter', jsonb_build_object('id', m.measurands_id, 'name', m.measurand, 'units', m.units, 'value_last', sl.value_latest, 'datetime_last', sl.datetime_last, 'display_name', m.display))) AS sensors, + array_agg(DISTINCT m.measurand) AS parameters, + array_agg(DISTINCT m.measurands_id) AS parameter_ids + FROM ((((sensor_nodes sn + JOIN sensor_systems ss USING (sensor_nodes_id)) + JOIN sensors s USING (sensor_systems_id)) + LEFT JOIN sensors_rollup sl USING (sensors_id)) + JOIN measurands m USING (measurands_id)) + GROUP BY sn.sensor_nodes_id + ) + SELECT l.sensor_nodes_id AS id, + l.site_name AS name, + l.ismobile, + t.tzid AS timezone, + ni.is_monitor AS ismonitor, + l.city, + jsonb_build_object('id', c.countries_id, 'code', c.iso, 'name', c.name) AS country, + jsonb_build_object('id', oc.entities_id, 'name', oc.full_name, 'type', oc.entity_type) AS owner, + jsonb_build_object('id', p.providers_id, 'name', p.label) AS provider, + jsonb_build_object('latitude', st_y(l.geom), 'longitude', st_x(l.geom)) AS coordinates, + ni.instruments, + ns.sensors, + get_datetime_object(ns.datetime_first, t.tzid) AS datetime_first, + get_datetime_object(ns.datetime_last, t.tzid) AS datetime_last, + l.geom, + (l.geom)::geography AS geog, + c.countries_id, + ns.parameters, + ns.parameter_ids, + ((oc.entity_type)::text ~* 'research'::text) AS is_analysis, + ni.manufacturers, + ni.manufacturer_ids, + ni.instrument_ids, + ll.licenses, + ll.license_ids, + l.providers_id + FROM (((((((sensor_nodes l + JOIN timezones t ON ((l.timezones_id = t.timezones_id))) + JOIN countries c ON ((c.countries_id = l.countries_id))) + JOIN entities oc ON ((oc.entities_id = l.owner_entities_id))) + JOIN providers p ON ((p.providers_id = l.providers_id))) + JOIN nodes_instruments ni USING (sensor_nodes_id)) + JOIN nodes_sensors ns USING (sensor_nodes_id)) + LEFT JOIN location_licenses_view ll USING (sensor_nodes_id)) + WHERE l.is_public; + +---------------- +create materialized view "public"."locations_view_cached" as SELECT id, + name, + ismobile, + timezone, + ismonitor, + city, + country, + owner, + provider, + coordinates, + instruments, + sensors, + datetime_first, + datetime_last, + geom, + geog, + countries_id, + parameters, + parameter_ids, + is_analysis, + manufacturers, + manufacturer_ids, + instrument_ids, + licenses, + license_ids, + providers_id + FROM locations_view; + +---------------- +create or replace view "public"."provider_licenses_view" as SELECT p.providers_id, + json_agg(json_build_object('id', p.licenses_id, 'name', l.name, 'date_from', lower(p.active_period), 'date_to', upper(p.active_period))) AS licenses + FROM (providers_licenses p + JOIN licenses l ON ((l.licenses_id = p.licenses_id))) + GROUP BY p.providers_id; + diff --git a/openaqdb/cron.sql b/openaqdb/cron.sql index 481c5aa..8512755 100644 --- a/openaqdb/cron.sql +++ b/openaqdb/cron.sql @@ -19,22 +19,20 @@ SELECT cron.schedule_in_database( , 'openaq' ); --- every hour on the 1/2 hour SELECT cron.schedule_in_database( - 'rollup-daily-data-leading' - , '30 * * * *' - , $$SELECT calculate_daily_data_full((now() + '15h'::interval)::date)$$ + 'rollup-daily-data' + , '*/20 * * * *' + , $$CALL update_daily_data(500)$$ , 'openaq' ); SELECT cron.schedule_in_database( - 'rollup-daily-data-trailing' - , '10 * * * *' - , $$SELECT calculate_daily_data_full((now() - '13h'::interval)::date)$$ + 'rollup-annual-data' + , '0 * * * *' + , $$CALL update_annual_data(25)$$ , 'openaq' ); - -- at quarter past each hour calculate -- the latest 10 hours that need updating SELECT cron.schedule_in_database( @@ -168,18 +166,6 @@ SELECT cron.schedule_in_database( $$ LANGUAGE SQL; - - - SELECT jobid - , start_time - , age(end_time, start_time) as duration - FROM cron.job_run_details - WHERE start_time > current_date - AND jobid = 2 - ORDER BY start_time DESC - LIMIT 30; - - CREATE OR REPLACE VIEW recent_jobs_summary AS WITH jobs AS ( SELECT d.jobid @@ -204,14 +190,3 @@ SELECT cron.schedule_in_database( FROM jobs GROUP BY jobid, jobname, active ORDER BY 1,2 DESC; - - - - SELECT command - , end_time - , return_message - FROM cron.job_run_details - WHERE status = 'failed' - AND start_time > now() - '12h'::interval - ORDER BY end_time DESC - LIMIT 10; diff --git a/openaqdb/idempotent/update_rollups.sql b/openaqdb/idempotent/update_rollups.sql index eb9661d..1e200a8 100644 --- a/openaqdb/idempotent/update_rollups.sql +++ b/openaqdb/idempotent/update_rollups.sql @@ -257,8 +257,8 @@ SELECT 'day' as rollup, _st as st, _et as et, - min(datetime) as first_datetime, - max(datetime) as last_datetime, + min(datetime) as datetime_first, + max(datetime) as datetime_last, count(*) as value_count, sum(value) as value_sum, last(value, datetime) as last_value, @@ -282,20 +282,20 @@ SELECT SELECT groups_id, measurands_id, - last(sensors_id, last_datetime) as sensors_id, + last(sensors_id, datetime_last) as sensors_id, rollup, st, et, - min(first_datetime) as first_datetime, - max(last_datetime) as last_datetime, + min(datetime_first) as datetime_first, + max(datetime_last) as datetime_last, sum(value_count) as value_count, sum(value_sum) as value_sum, - last(last_value, last_datetime) as last_value, + last(last_value, datetime_last) as last_value, min(minx) as minx, min(miny) as miny, max(maxx) as maxx, max(maxy) as maxy, - last(last_point, last_datetime) as last_point + last(last_point, datetime_last) as last_point FROM dailyrolluptemp_by_sensor JOIN groups_sensors USING (sensors_id) JOIN sensors USING (sensors_id) @@ -312,8 +312,8 @@ SELECT rollup, st, et, - first_datetime, - last_datetime, + datetime_first, + datetime_last, value_count, value_sum, last_value, @@ -370,8 +370,8 @@ RAISE NOTICE '% %', _st, _et; rollup, st, et, - first_datetime, - last_datetime, + datetime_first, + datetime_last, value_count, value_sum, last_value, @@ -383,20 +383,20 @@ RAISE NOTICE '% %', _st, _et; ) SELECT groups_id, measurands_id, - last(sensors_id, last_datetime), + last(sensors_id, datetime_last), 'month', _st, _et, - min(first_datetime), - max(last_datetime), + min(datetime_first), + max(datetime_last), sum(value_count), sum(value_sum), - last(last_value, last_datetime), + last(last_value, datetime_last), min(minx), min(miny), max(maxx), max(maxy), - last(last_point, last_datetime) + last(last_point, datetime_last) FROM rollups WHERE rollup = 'day' AND @@ -433,8 +433,8 @@ RAISE NOTICE '% %', _st, _et; rollup, st, et, - first_datetime, - last_datetime, + datetime_first, + datetime_last, value_count, value_sum, last_value, @@ -446,20 +446,20 @@ RAISE NOTICE '% %', _st, _et; ) SELECT groups_id, measurands_id, - last(sensors_id, last_datetime), + last(sensors_id, datetime_last), 'year', _st, _et, - min(first_datetime), - max(last_datetime), + min(datetime_first), + max(datetime_last), sum(value_count), sum(value_sum), - last(last_value, last_datetime), + last(last_value, datetime_last), min(minx), min(miny), max(maxx), max(maxy), - last(last_point, last_datetime) + last(last_point, datetime_last) FROM rollups WHERE rollup = 'month' AND @@ -488,8 +488,8 @@ RAISE NOTICE 'Updating total Rollups --- %', clock_timestamp(); rollup, st, et, - first_datetime, - last_datetime, + datetime_first, + datetime_last, value_count, value_sum, last_value, @@ -501,20 +501,20 @@ RAISE NOTICE 'Updating total Rollups --- %', clock_timestamp(); ) SELECT groups_id, measurands_id, - last(sensors_id, last_datetime), + last(sensors_id, datetime_last), 'total', '1970-01-01'::timestamptz, '2999-01-01'::timestamptz, - min(first_datetime), - max(last_datetime), + min(datetime_first), + max(datetime_last), sum(value_count), sum(value_sum), - last(last_value, last_datetime), + last(last_value, datetime_last), min(minx), min(miny), max(maxx), max(maxy), - last(last_point, last_datetime) + last(last_point, datetime_last) FROM rollups WHERE rollup = 'year' @@ -789,6 +789,12 @@ BEGIN AND data_averaging_period_seconds IS NULL; ----------- UPDATE sensors + SET data_averaging_period_seconds = 3600 + , data_logging_period_seconds = 3600 + WHERE source_id ~* '^smartsense' + AND data_averaging_period_seconds IS NULL; + ----------- + UPDATE sensors SET data_averaging_period_seconds = 1 , data_logging_period_seconds = 1 WHERE source_id ~* 'habitatmap' @@ -808,16 +814,17 @@ $$ LANGUAGE plpgsql; -- run after you have just imported data outside of the fetcher -CREATE OR REPLACE PROCEDURE intialize_sensors_rollup() AS $$ +CREATE OR REPLACE PROCEDURE initialize_sensors_rollup() AS $$ DECLARE BEGIN - CREATE TEMP TABLE sensors_missing_from_rollup AS + CREATE TEMP TABLE sensors_missing_from_rollup ON COMMIT DROP AS -- Get a list of all sensors missing data WITH missing AS ( SELECT sensors_id FROM sensors LEFT JOIN sensors_rollup s USING (sensors_id) WHERE s.sensors_id IS NULL + OR value_avg IS NULL ), data AS ( -- use that list to aggregate based on the measurements SELECT m.sensors_id @@ -865,22 +872,20 @@ BEGIN , value_max , value_latest FROM sensors_missing_from_rollup - ON CONFLICT DO NOTHING; +ON CONFLICT (sensors_id) DO UPDATE +SET datetime_first = EXCLUDED.datetime_first +, datetime_last = EXCLUDED.datetime_last +, value_count = EXCLUDED.value_count +, value_min = EXCLUDED.value_min +, value_max = EXCLUDED.value_max +, value_avg = EXCLUDED.value_avg +, value_sd = EXCLUDED.value_sd +, value_latest = COALESCE(sensors_rollup.value_latest, EXCLUDED.value_latest); +, modified_on = now() END; $$ LANGUAGE plpgsql; -SELECT sensors_id -, MIN(first_datetime) as first_datetime -, MAX(last_datetime) as last_datetime -, COUNT(1) as value_count -, STDDEV(value_avg) as value_sd -, AVG(value_avg) as value_avg -INTO TEMP sensors_temp_table -FROM hourly_data -GROUP BY 1; - - CREATE OR REPLACE FUNCTION reset_hourly_stats( st timestamptz DEFAULT '-infinity' , et timestamptz DEFAULT 'infinity' diff --git a/openaqdb/idempotent/util_functions.sql b/openaqdb/idempotent/util_functions.sql index 4b2394e..5f38b30 100644 --- a/openaqdb/idempotent/util_functions.sql +++ b/openaqdb/idempotent/util_functions.sql @@ -99,11 +99,26 @@ CREATE OR REPLACE FUNCTION utc_offset(tz text) RETURNS interval AS $$ SELECT timezone(tz, now()) - timezone('UTC', now()); $$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE; + +CREATE OR REPLACE FUNCTION utc_offset_hours(tz text) RETURNS int AS $$ +SELECT date_part('hours', utc_offset(tz)) as tz_offset +$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE; + -- get the offset for a specific date/time CREATE OR REPLACE FUNCTION utc_offset(dt timestamptz, tz text) RETURNS interval AS $$ SELECT timezone(tz, dt) - timezone('UTC', dt); $$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE; +CREATE OR REPLACE FUNCTION utc_offset_hours(dt timestamptz, tz text) RETURNS int AS $$ +SELECT date_part('hours', utc_offset(dt,tz)) as tz_offset +$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE; + +CREATE OR REPLACE FUNCTION utc_offset_hours(dt date, tz text) RETURNS int AS $$ +SELECT date_part('hours', utc_offset(dt::timestamptz,tz)) as tz_offset +$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE; + + + -- same but supplying the sensor_nodes_id CREATE OR REPLACE FUNCTION utc_offset(dt timestamptz, sn int) RETURNS interval AS $$ SELECT utc_offset(dt, t.tzid) @@ -113,8 +128,8 @@ FROM sensor_nodes n $$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE; CREATE OR REPLACE FUNCTION utc_offset(sn int) RETURNS interval AS $$ -SELECT utc_offset(t.tzid) -FROM sensor_nodes n + SELECT utc_offset(t.tzid) + FROM sensor_nodes n JOIN timezones t ON (t.timezones_id = n.timezones_id) WHERE sensor_nodes_id = sn; $$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE; @@ -127,6 +142,11 @@ CREATE OR REPLACE FUNCTION as_utc(dt timestamptz, tz text) RETURNS timestamptz A SELECT timezone(tz, timezone('UTC', dt)); $$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE; +CREATE OR REPLACE FUNCTION as_local(dt timestamptz, tz text) RETURNS timestamptz AS $$ +SELECT timezone(tz, dt); +$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE; + + CREATE OR REPLACE FUNCTION as_local_hour(dt timestamptz, tz text) RETURNS timestamptz AS $$ SELECT timezone(tz, date_trunc('hour', dt)); $$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE; @@ -156,6 +176,11 @@ RETURNS date AS $$ SELECT date_trunc('day', timezone(tz, tstz + '-1sec'::interval))::date; $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; +CREATE OR REPLACE FUNCTION as_year(tstz timestamptz, tz text) +RETURNS date AS $$ +SELECT date_trunc('year', timezone(tz, tstz + '-1sec'::interval))::date; +$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; + --DROP FUNCTION IF EXISTS truncate_timestamp(timestamptz, text, text, interval); CREATE OR REPLACE FUNCTION truncate_timestamp(tstz timestamptz, period text, tz text, _offset interval) RETURNS timestamptz AS $$ @@ -183,12 +208,13 @@ SELECT timezone(tz, tstz::timestamp); $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; -CREATE OR REPLACE FUNCTION get_datetime_object(tstz timestamptz, tz text DEFAULT 'UTC') + CREATE OR REPLACE FUNCTION get_datetime_object(tstz timestamptz, tz text DEFAULT 'UTC') RETURNS json AS $$ SELECT json_build_object( 'utc', format_timestamp(tstz, 'UTC') , 'local', format_timestamp(tstz, tz) - ); + , 'timezone', tz + ) WHERE tstz IS NOT NULL; $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; -- assume that its in the right timezone but not timestamptz @@ -199,9 +225,15 @@ SELECT json_build_object( 'utc', format_timestamp(tstz AT TIME ZONE tz, 'UTC') , 'local', format_timestamp(tstz AT TIME ZONE tz, tz) , 'timezone', tz - ); + ) WHERE tstz IS NOT NULL; $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; +CREATE OR REPLACE FUNCTION get_datetime_object(tstz date, tz text DEFAULT 'UTC') +RETURNS json AS $$ + SELECT get_datetime_object(tstz::timestamp, tz); +$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; + + CREATE OR REPLACE FUNCTION slugify("value" TEXT) RETURNS TEXT AS $$ -- removes accents (diacritic signs) from a given string -- diff --git a/openaqdb/idempotent/views.sql b/openaqdb/idempotent/views.sql index 99eff7a..73986df 100644 --- a/openaqdb/idempotent/views.sql +++ b/openaqdb/idempotent/views.sql @@ -20,8 +20,8 @@ CREATE AGGREGATE public.last(anyelement, timestamptz) ( create table if not exists analyses_summary as SELECT sensors_id -, min(datetime) as first_datetime -, max(datetime) as last_datetime +, min(datetime) as datetime_first +, max(datetime) as datetime_last , last(value,datetime) as last_value , count(*) as value_count , sum(value) as value_sum @@ -34,18 +34,6 @@ SELECT sensors_id FROM analyses GROUP BY sensors_id; -DROP MATERIALIZED VIEW IF EXISTS sensors_first_last; -CREATE MATERIALIZED VIEW sensors_first_last AS -SELECT - sensors_id, - first_datetime, - last_datetime, - last_value -FROM - rollups -WHERE rollup='total' -; -CREATE INDEX ON sensors_first_last (sensors_id); DROP VIEW IF EXISTS sensor_nodes_sources_view CASCADE; CREATE OR REPLACE VIEW sensor_nodes_sources_view AS @@ -186,155 +174,6 @@ GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12 CREATE UNIQUE INDEX ON groups_view (groups_id, measurands_id); ANALYZE groups_view; -DROP MATERIALIZED VIEW IF EXISTS sensor_stats; -CREATE MATERIALIZED VIEW sensor_stats AS -WITH - analyses AS - (SELECT - sensors_id, - value_count, - value_sum, - first_datetime, - last_datetime, - last_value, - last_point, - minx, - miny, - maxx, - maxy - FROM analyses_summary - ), - sensorsdata as ( - SELECT - sensors_id, - value_count, - value_sum, - first_datetime, - last_datetime, - last_value, - last_point, - minx, - miny, - maxx, - maxy - FROM - rollups - LEFT JOIN groups_view USING (groups_id, measurands_id) - WHERE - rollup='total' and groups_view.type='node' - ), - out as ( - SELECT * FROM analyses - UNION ALL - SELECT * FROM sensorsdata - WHERE sensors_id NOT IN (SELECT sensors_id FROM analyses) - ) - SELECT - out.*, - sensor_nodes_id, - country, - city, - measurands_id - FROM out - JOIN sensors using (sensors_id) - JOIN sensor_systems using (sensor_systems_id) - JOIN sensor_nodes using (sensor_nodes_id) - ; - -create unique index on sensor_stats (sensors_id); - -DROP VIEW IF EXISTS sensors_full CASCADE; -CREATE OR REPLACE VIEW sensors_full AS -SELECT - sensors_id, - measurands_id, - sensor_systems_id, - sensor_nodes_id, - measurand, - units, - value_count::numeric, - value_sum / value_count as values_average, - first_datetime, - null::float as first_value, - last_datetime, - last_value, - sensors.metadata as sensor_metadata -FROM - sensors - LEFT JOIN sensor_stats USING (sensors_id, measurands_id) - LEFT JOIN measurands USING (measurands_id) - --LEFT JOIN measurands USING (measurands_id) - --LEFT JOIN rollups USING(sensors_id, measurands_id) - --LEFT JOIN sensor_systems USING(sensor_systems_id) - --WHERE rollup='total' -; - - -DROP VIEW IF EXISTS sensors_full_flat CASCADE; -CREATE MATERIALIZED VIEW sensors_full_flat AS -SELECT - * -FROM - sensors_full s - LEFT JOIN - sensor_nodes_ext USING (sensor_nodes_id); - - -DROP VIEW IF EXISTS sensors_full_json CASCADE; -CREATE OR REPLACE VIEW sensors_full_json AS -SELECT - sensors_id, - sensor_systems_id, - ( - to_jsonb(sensors_full) - - 'sensor_metadata' - ) || coalesce(sensor_metadata, '{}'::jsonb) as json -FROM - sensors_full -; - -DROP VIEW IF EXISTS sensor_systems_json CASCADE; -CREATE OR REPLACE VIEW sensor_systems_json AS -SELECT - sensor_systems_id, - sensor_nodes_id, - (to_jsonb(sensor_systems) - 'metadata') || - coalesce(metadata, '{}'::jsonb) || - jsonb_build_object( - 'sensors', json_agg(json) - ) as json -FROM sensor_systems -LEFT JOIN sensors_full_json -USING (sensor_systems_id) -GROUP BY sensor_systems_id, sensor_nodes_id -; - - -DROP MATERIALIZED VIEW IF EXISTS sensor_nodes_json CASCADE; -CREATE MATERIALIZED VIEW sensor_nodes_json AS -SELECT - sensor_nodes_id, - geom::geography as geog, - jsonb_strip_nulls( - (to_jsonb(sensor_nodes_ext) - 'metadata') || - coalesce(metadata, '{}'::jsonb) || - jsonb_build_object( - 'sensor_systems', json_agg(json) - ) - ) as json -FROM sensor_nodes_ext -LEFT JOIN sensor_systems_json -USING (sensor_nodes_id) -GROUP BY - sensor_nodes_id, - metadata, - geog, - geom, - sensor_nodes_ext -; -CREATE INDEX ON sensor_nodes_json USING GIST (geog); -CREATE INDEX ON sensor_nodes_json USING GIN (json); - @@ -375,119 +214,6 @@ CREATE INDEX ON measurements_fastapi_base USING GIST (geog); -DROP MATERIALIZED VIEW IF EXISTS locations_base_v2; -CREATE MATERIALIZED VIEW locations_base_v2 AS -WITH base AS ( - SELECT - sensor_nodes.sensor_nodes_id, - sensors.sensors_id, - site_name, - json, - ismobile, - sensor_nodes.city, - sensor_nodes.country, - source_name, - mfr(sensor_systems.metadata) mfr, - value_count, - value_sum, - first_datetime, - last_datetime, - last_value, - measurand, - units, - sensors.measurands_id, - geom, - last_point, - minx, - miny, - maxx, - maxy, - (sensor_nodes.metadata->>'is_analysis')::bool as "isAnalysis" - FROM - sensor_nodes - JOIN sensor_nodes_json USING (sensor_nodes_id) - JOIN sensor_systems USING(sensor_nodes_id) - JOIN sensors USING (sensor_systems_id) - JOIN sensor_stats using (sensors_id) - JOIN measurands on (measurands.measurands_id = sensors.measurands_id) - ), - overall AS ( - SELECT - sensor_nodes_id as "id", - site_name as "name", - json->>'sensorType' as "sensorType", - json->>'entity' as "entity", - ismobile as "isMobile", - "isAnalysis", - city, - country, - json->'sources' as sources, - jsonb_build_object( - -- 'longitude', st_x(coalesce((last(last_point, last_datetime))::geometry, geom)), - -- 'latitude', st_y(coalesce((last(last_point, last_datetime))::geometry, geom)) - ) as coordinates, - jsonb_agg(DISTINCT mfr) FILTER (WHERE mfr is not Null) as manufacturers, - sum(value_count) as measurements, - min(first_datetime) as "firstUpdated", - max(last_datetime) as "lastUpdated", - json, - coalesce((last(last_point, last_datetime))::geometry, geom) as sgeom, - coalesce((last(last_point, last_datetime)), geom::geography) as geog, - CASE WHEN ismobile THEN to_jsonb(ARRAY[min(minx), min(miny), max(maxx), max(maxy)]) ELSE NULL::jsonb END as bounds - FROM base - group by id, name,city,country,json,geom,sources,"sensorType","isMobile","isAnalysis" - ), - byparameter AS ( - SELECT - sensors_id as id, - sensor_nodes_id, - measurand as parameter, - units as unit, - measurands_id as "parameterId", - value_count as count, - value_sum / value_count as average, - first_datetime as "firstUpdated", - last_datetime as "lastUpdated", - last_value as "lastValue", - jsonb_agg(DISTINCT mfr) FILTER (WHERE mfr is not Null) as manufacturers - FROM - base - GROUP BY 1,2,3,4,5,6,7,8,9,10 - ) - SELECT - overall.*, - jsonb_agg((to_jsonb(byparameter) || parameter("parameterId"))-'{sensor_nodes_id}'::text[]) as parameters - - FROM overall - LEFT JOIN byparameter ON (overall.id=sensor_nodes_id) - GROUP BY - overall.id, - name, - city, - country, - coordinates, - overall."firstUpdated", - overall."lastUpdated", - "sensorType", - "entity", - "isMobile", - "isAnalysis", - measurements, - json, - sources, - geog, - sgeom, - bounds, - overall.manufacturers - ORDER BY "lastUpdated" DESC; - - -CREATE INDEX ON locations_base_v2 ("lastUpdated"); -CREATE INDEX ON locations_base_v2 (country); -CREATE INDEX ON locations_base_v2 USING GIST(geog); -CREATE INDEX ON locations_base_v2 USING GIN(parameters); -CREATE INDEX ON locations_base_v2 (id); -CREATE INDEX ON locations_base_v2 (name); create or replace view measurements_analyses AS @@ -497,36 +223,6 @@ SELECT * FROM analyses; -/* -DROP MATERIALIZED VIEW IF EXISTS mobile_gen CASCADE; -CREATE MATERIALIZED VIEW mobile_gen AS -SELECT - sensor_nodes_id, - st_snaptogrid(pt3857(lon,lat),30) as geom, - count(*) -FROM - measurements - LEFT JOIN sensors USING (sensors_id) - LEFT JOIN sensor_systems USING (sensor_systems_id) -WHERE - lat is not null and lon is not null -GROUP BY - 1,2 -; -CREATE INDEX ON mobile_gen (sensors_id); -CREATE INDEX ON mobile_gen USING GIST (geom, sensors_id); - -CREATE MATERIALIZED VIEW mobile_gen_boxes AS -SELECT - sensor_nodes_id, - st_expand(st_extent(geom),20)::geometry as box -FROM - mobile_gen -GROUP BY 1; -CREATE INDEX ON mobile_gen_boxes (sensors_id); -CREATE INDEX ON mobile_gen_boxes USING GIST (box, sensors_id); -*/ - DROP VIEW IF EXISTS sensor_nodes_check; CREATE OR REPLACE VIEW sensor_nodes_check AS SELECT sn.sensor_nodes_id @@ -1207,4 +903,44 @@ JOIN $$ LANGUAGE SQL; + CREATE OR REPLACE FUNCTION measurements_per_hour(dur interval DEFAULT '1day') RETURNS TABLE ( + sensor_nodes_count bigint + , sensors_count bigint + , measurements_per_hour_expected double precision + , measurements_per_hour_observed double precision + ) AS $$ + SELECT COUNT(DISTINCT n.sensor_nodes_id) as sensor_nodes_count + , COUNT(DISTINCT n.sensors_id) as sensors_count + , ROUND(SUM(3600.0/s.data_logging_period_seconds)) as measurements_per_hour_expected + , ROUND(SUM((3600.0/s.data_logging_period_seconds)*(percent_complete/100.0))) as measurements_per_hour_observed + FROM sensor_nodes_check n + JOIN sensors s USING (sensors_id) + WHERE datetime_last > now() - dur; + $$ LANGUAGE SQL; + + + CREATE OR REPLACE FUNCTION measurements_per_hour(param text, dur interval DEFAULT '1day') RETURNS TABLE ( + sensor_nodes_count bigint + , sensors_count bigint + , measurements_per_hour_expected double precision + , measurements_per_hour_observed double precision + ) AS $$ + SELECT COUNT(DISTINCT n.sensor_nodes_id) as sensor_nodes_count + , COUNT(DISTINCT n.sensors_id) as sensors_count + , ROUND(SUM(3600.0/s.data_logging_period_seconds)) as measurements_per_hour_expected + , ROUND(SUM((3600.0/s.data_logging_period_seconds)*(percent_complete/100.0))) as measurements_per_hour_observed + FROM sensor_nodes_check n + JOIN sensors s USING (sensors_id) + WHERE datetime_last > now() - dur + AND n.parameter ~* param; + $$ LANGUAGE SQL; + + + -- SELECT * FROM measurements_per_hour('pm25', '1day'::interval); + + -- WITH gs AS ( + -- SELECT UNNEST(ARRAY['1 hour', '4 hours', '8 hours', '16 hours', '2 days', '1 week']) as dur) + -- SELECT gs.dur as lag, m.* + -- FROM gs, measurements_per_hour('^pm', dur::interval) m; + COMMIT; diff --git a/openaqdb/init.sh b/openaqdb/init.sh index 1e94e0d..ce95637 100755 --- a/openaqdb/init.sh +++ b/openaqdb/init.sh @@ -41,7 +41,6 @@ psql --single-transaction \ psql --single-transaction -v ON_ERROR_STOP=1 -f lookups/measurands.sql psql --single-transaction -v ON_ERROR_STOP=1 -f lookups/measurands_map.sql psql --single-transaction -v ON_ERROR_STOP=1 -f lookups/thresholds.sql -psql --single-transaction -v ON_ERROR_STOP=1 -f lookups/thresholds.sql gunzip -c lookups/countries.csv.gz | psql --single-transaction -v ON_ERROR_STOP=1 -c "copy countries (name, iso_a3, iso, geog) from stdin DELIMITER ',' CSV HEADER" gunzip -c lookups/timezones.csv.gz | psql --single-transaction -v ON_ERROR_STOP=1 -c "copy timezones (tzid, geog) from stdin DELIMITER ',' CSV HEADER" diff --git a/openaqdb/init.sql b/openaqdb/init.sql index 7fe33ac..43cd8eb 100644 --- a/openaqdb/init.sql +++ b/openaqdb/init.sql @@ -61,6 +61,7 @@ GRANT ALL ON FUNCTIONS to :DATABASE_WRITE_USER; \i tables/providers_views.sql \i tables/parameters_views.sql \i tables/daily_data_rollups.sql +\i tables/annual_data_rollups.sql \i tables/lists.sql \i tables/measurements_view.sql @@ -69,8 +70,7 @@ GRANT ALL ON FUNCTIONS to :DATABASE_WRITE_USER; \i refresh_idempotent.sql INSERT INTO fetchlogs (key, last_modified) VALUES - ('lcs-etl-pipeline/measures/purpleair/1664911958-z2atn.csv.gz', now()) -, ('uploaded/measures/houston/61509.csv.gz', now()) + ('uploaded/measures/houston/61509.csv.gz', now()) , ('realtime-gzipped/2022-10-04/1664912239.ndjson.gz', now()) ON CONFLICT DO NOTHING ; diff --git a/openaqdb/locations/locations.sql b/openaqdb/locations/locations.sql index 5564bde..0a5f0e7 100644 --- a/openaqdb/locations/locations.sql +++ b/openaqdb/locations/locations.sql @@ -8,17 +8,35 @@ --DROP VIEW IF EXISTS locations_view CASCADE; CREATE OR REPLACE VIEW provider_licenses_view AS - SELECT p.providers_id - , json_agg(json_build_object( - 'id', p.licenses_id - , 'url', COALESCE(p.url, l.url) - , 'description', COALESCE(p.notes, l.description) - , 'date_from', lower(active_period) - , 'date_to', upper(active_period) - )) as licenses - FROM providers_licenses p - JOIN licenses l ON (l.licenses_id = p.licenses_id) - GROUP BY providers_id; + SELECT p.providers_id + , json_agg(json_build_object( + 'id', p.licenses_id + , 'name', l.name + , 'date_from', lower(active_period) + , 'date_to', upper(active_period) + )) as licenses + FROM providers_licenses p + JOIN licenses l ON (l.licenses_id = p.licenses_id) + GROUP BY providers_id; + + + CREATE OR REPLACE VIEW location_licenses_view AS + SELECT sn.sensor_nodes_id + , json_agg(json_build_object( + 'id', pl.licenses_id + , 'name', l.name + , 'date_from', lower(pl.active_period) + , 'date_to', upper(pl.active_period) + , 'attribution', json_build_object( + 'name', e.full_name, 'url', COALESCE(e.metadata->>'url', NULL) + ) + )) as licenses + , array_agg(DISTINCT pl.licenses_id) as license_ids + FROM providers_licenses pl + JOIN sensor_nodes sn USING (providers_id) + JOIN entities e ON (sn.owner_entities_id = e.entities_id) + JOIN licenses l ON (l.licenses_id = pl.licenses_id) + GROUP BY sn.sensor_nodes_id; CREATE OR REPLACE VIEW locations_view AS @@ -35,6 +53,7 @@ WITH nodes_instruments AS ( , 'name', mc.full_name ) )) as instruments + , array_agg(DISTINCT i.instruments_id) as instrument_ids , array_agg(DISTINCT mc.full_name) as manufacturers , array_agg(DISTINCT i.manufacturer_entities_id) as manufacturer_ids FROM sensor_nodes sn @@ -65,7 +84,7 @@ WITH nodes_instruments AS ( FROM sensor_nodes sn JOIN sensor_systems ss USING (sensor_nodes_id) JOIN sensors s USING (sensor_systems_id) - JOIN sensors_rollup sl USING (sensors_id) + LEFT JOIN sensors_rollup sl USING (sensors_id) JOIN measurands m USING (measurands_id) GROUP BY sensor_nodes_id) ----------------------------- @@ -106,8 +125,10 @@ SELECT , oc.entity_type::text~*'research' as is_analysis , ni.manufacturers , ni.manufacturer_ids - , pl.licenses - , l.providers_id + , ni.instrument_ids + , ll.licenses + , ll.license_ids + , l.providers_id FROM sensor_nodes l JOIN timezones t ON (l.timezones_id = t.timezones_id) JOIN countries c ON (c.countries_id = l.countries_id) @@ -115,7 +136,7 @@ JOIN entities oc ON (oc.entities_id = l.owner_entities_id) JOIN providers p ON (p.providers_id = l.providers_id) JOIN nodes_instruments ni USING (sensor_nodes_id) JOIN nodes_sensors ns USING (sensor_nodes_id) -LEFT JOIN provider_licenses_view pl ON (pl.providers_id = l.providers_id) +LEFT JOIN location_licenses_view ll USING (sensor_nodes_id) WHERE l.is_public; DROP MATERIALIZED VIEW IF EXISTS locations_view_cached CASCADE; @@ -144,7 +165,7 @@ WITH locations AS ( JOIN sensor_systems ss ON (ss.sensor_nodes_id = sn.sensor_nodes_id) JOIN instruments i ON (ss.instruments_id = i.instruments_id) JOIN entities mc ON (mc.entities_id = i.manufacturer_entities_id) - WHERE sn.is_public + WHERE sn.is_public GROUP BY 1,2) SELECT id , jsonb_agg(manufacturer) as manufacturers diff --git a/openaqdb/lookups/measurands_map.sql b/openaqdb/lookups/measurands_map.sql index 98d1831..d804966 100644 --- a/openaqdb/lookups/measurands_map.sql +++ b/openaqdb/lookups/measurands_map.sql @@ -2,21 +2,21 @@ WITH map AS ( SELECT d.* , m.measurands_id FROM (VALUES -('PurpleAir', 'pm1.0', 'pm1', 'µg/m³'), -('PurpleAir', 'pm2.5','pm25', 'µg/m³'), -('PurpleAir', 'pm10.0','pm10', 'µg/m³'), -('PurpleAir', '0.3_um_count','um003', 'particles/cm³'), -('PurpleAir', '0.5_um_count','um005', 'particles/cm³'), -('PurpleAir', '1.0_um_count','um010', 'particles/cm³'), -('PurpleAir', '2.5_um_count','um025', 'particles/cm³'), -('PurpleAir', '5.0_um_count','um050', 'particles/cm³'), -('PurpleAir', '10.0_um_count','um100', 'particles/cm³'), -('PurpleAir', 'humidity','humidity', '%'), -('PurpleAir', 'temperature','temperature', 'f'), -('PurpleAir', 'pressure','pressure', 'mb'), -('PurpleAir', 'voc','voc', 'iaq'), -('PurpleAir', 'ozone1','ozone', 'ppb') -, ('clarity', 'relHumid','relativehumidity', '%') -- RelativeHumidity +-- ('PurpleAir', 'pm1.0', 'pm1', 'µg/m³'), +-- ('PurpleAir', 'pm2.5','pm25', 'µg/m³'), +-- ('PurpleAir', 'pm10.0','pm10', 'µg/m³'), +-- ('PurpleAir', '0.3_um_count','um003', 'particles/cm³'), +-- ('PurpleAir', '0.5_um_count','um005', 'particles/cm³'), +-- ('PurpleAir', '1.0_um_count','um010', 'particles/cm³'), +-- ('PurpleAir', '2.5_um_count','um025', 'particles/cm³'), +-- ('PurpleAir', '5.0_um_count','um050', 'particles/cm³'), +-- ('PurpleAir', '10.0_um_count','um100', 'particles/cm³'), +-- ('PurpleAir', 'humidity','humidity', '%'), +-- ('PurpleAir', 'temperature','temperature', 'f'), +-- ('PurpleAir', 'pressure','pressure', 'mb'), +-- ('PurpleAir', 'voc','voc', 'iaq'), +-- ('PurpleAir', 'ozone1','ozone', 'ppb') + ('clarity', 'relHumid','relativehumidity', '%') -- RelativeHumidity , ('clarity', 'temperature','temperature', 'c') -- Temperature , ('clarity', 'pm2_5ConcMass','pm25', 'μg/m3') -- PM2.5 mass concentration , ('clarity', 'pm1ConcMass','pm1', 'μg/m3') -- PM1 mass concentration diff --git a/openaqdb/mock.sh b/openaqdb/mock.sh new file mode 100755 index 0000000..e1e4322 --- /dev/null +++ b/openaqdb/mock.sh @@ -0,0 +1,15 @@ +#!/bin/bash +cd $( dirname "${BASH_SOURCE[0]}") + +export PGDATABASE=$DATABASE_DB +export PGHOST=localhost + +psql --single-transaction -f "mock.sql" +psql -c "CALL initialize_sensors_rollup()" \ + -c "SELECT reset_hourly_stats()" \ + -c "CALL update_hourly_data(20000)" \ + -c "CALL update_daily_data(10000)" \ + -c "CALL update_annual_data(5000)" \ + -c "CALL update_cached_tables()" \ + -c "SELECT datetime, AVG(value_avg) as value_avg, AVG(value_min) as value_min, AVG(value_max) as value_max, AVG(value_count) as value_count_avg, MAX(value_count) as value_count_min, MIN(value_count) as value_count_max FROM daily_data GROUP BY 1 ORDER BY 1;" \ + -c "SELECT datetime, AVG(value_avg) as value_avg, AVG(value_min) as value_min, AVG(value_max) as value_max, AVG(value_count) as value_count_avg, MAX(value_count) as value_count_min, MIN(value_count) as value_count_max FROM annual_data GROUP BY 1 ORDER BY 1;" diff --git a/openaqdb/mock.sql b/openaqdb/mock.sql new file mode 100644 index 0000000..5420992 --- /dev/null +++ b/openaqdb/mock.sql @@ -0,0 +1,114 @@ + + + +-- create some test sites + -- using airgradient provider just to get a license +WITH locations AS ( + SELECT * FROM + (VALUES + ('testing/site1', 'testing', 'testing site 1', ST_SetSRID(ST_Point( -151.76306, -16.51516), 4326), 'airnow', 'pm25') -- Kirbati + , ('testing/site2', 'testing', 'testing site 2', ST_SetSRID(ST_Point( -121.8418, 44.75228), 4326), 'airnow', 'pm25') -- America/Los_Angeles + , ('testing/site3', 'testing', 'testing site 3', ST_SetSRID(ST_Point( -71.104, 42.315),4326), 'airnow', 'pm25') -- America/New_York + , ('testing/site4', 'testing', 'testing site 4', ST_SetSRID(ST_Point( -0.107389, 51.487236), 4326), 'airnow', 'pm25') -- Europe/London + , ('testing/site5', 'testing', 'testing site 5', ST_SetSRID(ST_Point( 185.199922, -20.248716), 4326), 'airnow', 'pm25') + ) as t (source_id, source_name, site_name, geom, provider, measurand) + ), inserted_nodes AS ( + INSERT INTO sensor_nodes ( + source_id + , source_name + , site_name + , geom + , countries_id + , timezones_id + , providers_id + , owner_entities_id + , ismobile -- currently not defaulted in the table + ) + SELECT source_id + , source_name + , site_name + , geom + , get_countries_id(geom) + , get_timezones_id(geom) + , get_providers_id(provider) + , 1 + , 'f' + FROM locations + ON CONFLICT (source_name, source_id) DO UPDATE + SET geom = EXCLUDED.geom + , timezones_id = EXCLUDED.timezones_id + , countries_id = EXCLUDED.countries_id + , owner_entities_id = EXCLUDED.owner_entities_id + RETURNING source_id, sensor_nodes_id + ), inserted_systems AS ( + INSERT INTO sensor_systems (sensor_nodes_id, source_id, instruments_id) + SELECT sensor_nodes_id + , source_id + , 2 as instruments_id -- 2 is a monitor and not a sensor + FROM locations l + JOIN inserted_nodes n USING (source_id) + ON CONFLICT (sensor_nodes_id, source_id) DO UPDATE + SET instruments_id = EXCLUDED.instruments_id + RETURNING sensor_systems_id, source_id + ), inserted_sensors AS ( + INSERT INTO sensors ( + sensor_systems_id + , source_id + , measurands_id + , data_averaging_period_seconds + , data_logging_period_seconds + ) + SELECT sensor_systems_id + , source_id||'/'||measurand + , get_measurands_id(l.measurand) + , 60*30 + , 60*30 + FROM locations l + JOIN inserted_systems n USING (source_id) + ON CONFLICT (sensor_systems_id, measurands_id) DO UPDATE + SET source_id = EXCLUDED.source_id + RETURNING sensor_systems_id, source_id + ) SELECT * FROM inserted_sensors; + + +WITH fake_times AS ( +SELECT generate_series('2023-03-01'::date, '2023-04-01'::date, '30min'::interval) as datetime + ) INSERT INTO measurements (datetime, sensors_id, value) + --SELECT f.datetime, s.sensors_id, date_part('day', as_local(datetime - interval '1sec', t.tzid)) + SELECT as_utc(datetime, t.tzid), s.sensors_id, date_part('day', datetime - interval '1sec') + FROM fake_times f + JOIN sensors s ON (TRUE) + JOIN sensor_systems sy ON (s.sensor_systems_id = sy.sensor_systems_id) + JOIN sensor_nodes sn ON (sy.sensor_nodes_id = sn.sensor_nodes_id) + JOIN timezones t ON (sn.timezones_id = t.timezones_id) + ON CONFLICT DO NOTHING; + + + -- make sure we have something to test the moy with +WITH fake_times AS ( +--SELECT generate_series(current_date - (365 * 2), current_date, '1d'::interval) as datetime +SELECT generate_series('2021-12-25'::date, '2023-01-05'::date, '1d'::interval) as datetime + ) INSERT INTO measurements (datetime, sensors_id, value) + --SELECT f.datetime, s.sensors_id, date_part('day', as_utc(datetime - interval '1sec', t.tzid)) + SELECT as_utc(datetime, t.tzid), s.sensors_id, date_part('day', datetime - interval '1sec') + FROM fake_times xf + JOIN sensors s ON (TRUE) + JOIN sensor_systems sy ON (s.sensor_systems_id = sy.sensor_systems_id) + JOIN sensor_nodes sn ON (sy.sensor_nodes_id = sn.sensor_nodes_id) + JOIN timezones t ON (sn.timezones_id = t.timezones_id) + WHERE sensors_id = 1 + ON CONFLICT DO NOTHING; + + +WITH fake_times AS ( +SELECT generate_series(current_date - 7, current_timestamp, '30min'::interval) as datetime + ) INSERT INTO measurements (datetime, sensors_id, value) + --SELECT f.datetime, s.sensors_id, date_part('day', as_utc(datetime - interval '1sec', t.tzid)) + SELECT as_utc(datetime, t.tzid), s.sensors_id, date_part('day', datetime - interval '1sec') + FROM fake_times f + JOIN sensors s ON (TRUE) + JOIN sensor_systems sy ON (s.sensor_systems_id = sy.sensor_systems_id) + JOIN sensor_nodes sn ON (sy.sensor_nodes_id = sn.sensor_nodes_id) + JOIN timezones t ON (sn.timezones_id = t.timezones_id) + WHERE sensors_id = 1 + ON CONFLICT DO NOTHING; diff --git a/openaqdb/tables/annual_data_rollups.sql b/openaqdb/tables/annual_data_rollups.sql new file mode 100644 index 0000000..ea76c06 --- /dev/null +++ b/openaqdb/tables/annual_data_rollups.sql @@ -0,0 +1,337 @@ + +CREATE TABLE IF NOT EXISTS annual_data ( + sensors_id int NOT NULL REFERENCES sensors ON DELETE CASCADE +, datetime date NOT NULL -- time be +, datetime_first timestamptz NOT NULL +, datetime_last timestamptz NOT NULL +, value_count int NOT NULL +, value_avg double precision +, value_sd double precision +, value_min double precision +, value_max double precision +, value_p02 double precision +, value_p25 double precision +, value_p50 double precision +, value_p75 double precision +, value_p98 double precision +, value_raw_avg double precision +, value_raw_count double precision +, value_raw_min double precision +, value_raw_max double precision +, error_count int +, error_raw_count int +, updated_on timestamptz -- last time the sensor data was updated +, calculated_on timestamptz-- last time the row rollup was calculated +, calculated_count int DEFAULT 1 +, UNIQUE(sensors_id, datetime) +); + +CREATE INDEX IF NOT EXISTS annual_data_sensors_id_idx +ON annual_data +USING btree (sensors_id); + +CREATE INDEX IF NOT EXISTS annual_data_day_idx +ON annual_data +USING btree (datetime); + + +CREATE TABLE IF NOT EXISTS annual_data_queue ( + datetime date NOT NULL + , tz_offset int NOT NULL + , added_on timestamptz NOT NULL DEFAULT now() + , queued_on timestamptz + , modified_on timestamptz -- last time the hourly data was modified + , modified_count int NOT NULL DEFAULT 0 + , calculated_on timestamptz + , calculated_count int NOT NULL DEFAULT 0 + , calculated_seconds real + , sensor_nodes_count int + , sensors_count int + , measurements_count int + , measurements_raw_count int + , UNIQUE(datetime, tz_offset) + ); + + + CREATE TABLE IF NOT EXISTS annual_stats ( + datetime date PRIMARY KEY + , added_on timestamptz NOT NULL DEFAULT now() + , modified_on timestamptz -- last time the hourly data was modified + , calculated_count int NOT NULL DEFAULT 0 + , updated_on timestamptz -- + , calculated_on timestamptz + , sensor_nodes_count int + , measurements_count int + , measurements_raw_count int + , sensors_count int + ); + + +CREATE OR REPLACE FUNCTION fetch_annual_data_jobs(n int DEFAULT 1, min_day date DEFAULT NULL, max_day date DEFAULT NULL) RETURNS TABLE( + datetime date + , tz_offset int + , queued_on timestamptz + ) AS $$ + BEGIN + RETURN QUERY + UPDATE annual_data_queue + SET queued_on = CURRENT_TIMESTAMP + , calculated_count = calculated_count + 1 + FROM ( + SELECT q.datetime + , q.tz_offset + FROM annual_data_queue q + -- Its either not been calculated or its been modified + WHERE q.datetime >= COALESCE(min_day, '-infinity'::date) + AND q.datetime <= COALESCE(max_day, current_date - '1year'::interval) + AND (q.calculated_on IS NULL OR q.modified_on > q.calculated_on) + -- either its never been or it was resently modified but not queued + AND (q.queued_on IS NULL -- has not been queued + OR ( + q.queued_on < now() - '1h'::interval -- a set amount of time has passed AND + AND ( + q.queued_on < q.modified_on -- its been changed since being queued + OR calculated_on IS NULL -- it was never calculated + ) + )) + ORDER BY q.datetime, q.tz_offset + LIMIT n + FOR UPDATE SKIP LOCKED + ) as d + WHERE d.datetime = annual_data_queue.datetime + AND d.tz_offset = annual_data_queue.tz_offset + RETURNING annual_data_queue.datetime + , annual_data_queue.tz_offset + , annual_data_queue.queued_on; + END; +$$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION update_annual_data_queue(dt date, tz_offset_int int) RETURNS bigint AS $$ + WITH annual_inserts AS ( + INSERT INTO annual_data_queue (datetime, tz_offset) VALUES + (date_trunc('year', dt + make_interval(hours=>tz_offset_int, secs=>-1)) + , tz_offset_int) + ON CONFLICT (datetime, tz_offset) DO UPDATE + SET modified_on = now() + , modified_count = annual_data_queue.modified_count + 1 + RETURNING datetime, tz_offset + ) SELECT COUNT(*) + FROM annual_inserts; + $$ LANGUAGE SQL; + + +CREATE OR REPLACE FUNCTION daily_data_updated_event(dy date, tz_offset_int int) RETURNS boolean AS $$ + SELECT update_annual_data_queue(dy, tz_offset_int)>0; +$$ LANGUAGE SQL; + + +CREATE OR REPLACE FUNCTION calculate_annual_data_by_offset(dy date DEFAULT current_date - 1, tz_offset int DEFAULT 0) + RETURNS TABLE ( + sensors_id int + , sensor_nodes_id int + , datetime date + , updated_on timestamptz + , datetime_first timestamptz + , datetime_last timestamptz + , value_count bigint + , value_avg real + , value_sd real + , value_min real + , value_max real + , value_raw_count bigint + , value_raw_avg real + , value_raw_min real + , value_raw_max real + , value_p02 real + , value_p25 real + , value_p50 real + , value_p75 real + , value_p98 real + , error_raw_count bigint + , error_count bigint + ) AS $$ +SELECT + m.sensors_id +, sn.sensor_nodes_id +, as_year(m.datetime, t.tzid) as datetime +, MAX(m.updated_on) as updated_on +, MIN(datetime_first) as datetime_first +, MAX(datetime_last) as datetime_last +, COUNT(1) AS value_count +, AVG(value_avg) as value_avg +, STDDEV(value_avg) as value_sd +, MIN(value_avg) as value_min +, MAX(value_avg) as value_max +, SUM(value_count) as value_raw_count +, SUM(value_avg*value_count)/SUM(value_count) as value_raw_avg +, MIN(value_min) as value_raw_min +, MAX(value_max) as value_raw_max +, PERCENTILE_CONT(0.02) WITHIN GROUP(ORDER BY value_avg) as value_p02 +, PERCENTILE_CONT(0.25) WITHIN GROUP(ORDER BY value_avg) as value_p25 +, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY value_avg) as value_p50 +, PERCENTILE_CONT(0.75) WITHIN GROUP(ORDER BY value_avg) as value_p75 +, PERCENTILE_CONT(0.98) WITHIN GROUP(ORDER BY value_avg) as value_p98 +, SUM(error_count) as error_raw_count +, SUM((value_avg IS NULL)::int) as error_count +FROM hourly_data m +JOIN sensors s ON (m.sensors_id = s.sensors_id) +JOIN sensor_systems sy ON (s.sensor_systems_id = sy.sensor_systems_id) +JOIN sensor_nodes sn ON (sy.sensor_nodes_id = sn.sensor_nodes_id) +JOIN timezones t ON (sn.timezones_id = t.timezones_id) +WHERE value_count > 0 +AND datetime > as_utc(date_trunc('year', dy), t.tzid) +AND datetime <= as_utc(date_trunc('year', dy + '1year'::interval), t.tzid) +AND utc_offset_hours(dy, t.tzid) = tz_offset +GROUP BY 1,2,3 +HAVING COUNT(1) > 0; + $$ LANGUAGE SQL; + + +CREATE OR REPLACE FUNCTION insert_annual_data_by_offset(dy date DEFAULT current_date - 1, tz_offset int DEFAULT 0) + RETURNS TABLE ( + sensor_nodes_count bigint + , sensors_count bigint + , measurements_hourly_count bigint + , measurements_count bigint + ) AS $$ +SET LOCAL work_mem = '512MB'; +WITH data_rollup AS ( + SELECT * + FROM calculate_annual_data_by_offset(dy, tz_offset) +), data_inserted AS ( +INSERT INTO annual_data ( + sensors_id +, datetime +, updated_on +, datetime_first +, datetime_last +, value_count +, value_avg +, value_sd +, value_min +, value_max +, value_raw_count +, value_raw_avg +, value_raw_min +, value_raw_max +, value_p02 +, value_p25 +, value_p50 +, value_p75 +, value_p98 +, error_count +, error_raw_count +, calculated_on) + SELECT sensors_id +, datetime +, updated_on +, datetime_first +, datetime_last +, value_count +, value_avg +, value_sd +, value_min +, value_max +, value_raw_count +, value_raw_avg +, value_raw_min +, value_raw_max +, value_p02 +, value_p25 +, value_p50 +, value_p75 +, value_p98 +, error_count +, error_raw_count +, current_timestamp as calculated_on + FROM data_rollup +ON CONFLICT (sensors_id, datetime) DO UPDATE +SET datetime_first = EXCLUDED.datetime_first +, datetime_last = EXCLUDED.datetime_last +, updated_on = EXCLUDED.updated_on +, value_avg = EXCLUDED.value_avg +, value_min = EXCLUDED.value_min +, value_max = EXCLUDED.value_max +, value_count = EXCLUDED.value_count +, value_raw_avg = EXCLUDED.value_raw_avg +, value_raw_min = EXCLUDED.value_raw_min +, value_raw_max = EXCLUDED.value_raw_max +, value_raw_count = EXCLUDED.value_raw_count +, value_p02 = EXCLUDED.value_p02 +, value_p25 = EXCLUDED.value_p25 +, value_p50 = EXCLUDED.value_p50 +, value_p75 = EXCLUDED.value_p75 +, value_p98 = EXCLUDED.value_p98 +, error_count = EXCLUDED.error_count +, error_raw_count = EXCLUDED.error_raw_count +, calculated_on = EXCLUDED.calculated_on + RETURNING sensors_id, value_count, value_raw_count + ) SELECT COUNT(DISTINCT sensors_id) as sensors_count + , COUNT(DISTINCT sensor_nodes_id) as sensor_nodes_count + , SUM(value_count) as measurements_hourly_count + , SUM(value_raw_count) as measurements_count + FROM data_rollup; +$$ LANGUAGE SQL; + + +CREATE OR REPLACE FUNCTION update_annual_data(dy date DEFAULT current_date - 1, tz_offset_int int DEFAULT 0) RETURNS bigint AS $$ +DECLARE +nw timestamptz := clock_timestamp(); +mc bigint; +BEGIN +WITH inserted AS ( + SELECT sensor_nodes_count + , sensors_count + , measurements_hourly_count + , measurements_count + FROM insert_annual_data_by_offset(dy, tz_offset_int)) + INSERT INTO annual_data_queue ( + datetime + , tz_offset + , calculated_on + , calculated_count + , sensor_nodes_count + , sensors_count + , measurements_count + , measurements_raw_count + , calculated_seconds + ) + SELECT dy + , tz_offset_int + , now() + , 1 + , sensor_nodes_count + , sensors_count + , measurements_hourly_count + , measurements_count + , EXTRACT(EPOCH FROM clock_timestamp() - nw) + FROM inserted + ON CONFLICT (datetime, tz_offset) DO UPDATE + SET calculated_on = EXCLUDED.calculated_on + , calculated_count = annual_data_queue.calculated_count + 1 + , measurements_count = EXCLUDED.measurements_count + , sensors_count = EXCLUDED.sensors_count + , calculated_seconds = EXCLUDED.calculated_seconds + RETURNING measurements_count INTO mc; + -- PERFORM annual_data_updated_event(dy, tz_offset_int); + RETURN mc; +END; +$$ LANGUAGE plpgsql; + + +CREATE OR REPLACE PROCEDURE update_annual_data(n int DEFAULT 5, min_day date DEFAULT NULL, max_day date DEFAULT NULL) AS $$ +DECLARE + rw record; +BEGIN +FOR rw IN ( + SELECT datetime + , tz_offset + FROM fetch_annual_data_jobs(n, min_day, max_day)) +LOOP + RAISE NOTICE 'updating year: % - %', rw.datetime, rw.tz_offset; + PERFORM update_annual_data(rw.datetime, rw.tz_offset); + COMMIT; +END LOOP; +END; +$$ LANGUAGE plpgsql; diff --git a/openaqdb/tables/countries_views.sql b/openaqdb/tables/countries_views.sql index ed0d3b5..319bce4 100644 --- a/openaqdb/tables/countries_views.sql +++ b/openaqdb/tables/countries_views.sql @@ -7,6 +7,15 @@ RETURNS int LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE AS $$ SELECT countries_id from countries WHERE st_intersects(g::geography, geog) LIMIT 1; $$; +CREATE OR REPLACE FUNCTION get_closest_countries_id(g geometry, w int DEFAULT 1000) + RETURNS int LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE AS $$ + SELECT c.countries_id + FROM countries c + WHERE ST_DWithin(c.geog, g::geography, w) + ORDER BY ST_Distance(c.geog, g::geography) ASC + LIMIT 1; + $$; + CREATE OR REPLACE FUNCTION country(g geography) RETURNS text LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE AS $$ SELECT iso from countries WHERE st_intersects(g, geog) LIMIT 1; diff --git a/openaqdb/tables/daily_data_rollups.sql b/openaqdb/tables/daily_data_rollups.sql index e074c5e..e09bd43 100644 --- a/openaqdb/tables/daily_data_rollups.sql +++ b/openaqdb/tables/daily_data_rollups.sql @@ -47,6 +47,7 @@ , modified_count int NOT NULL DEFAULT 0 , calculated_on timestamptz , calculated_count int NOT NULL DEFAULT 0 + , calculated_seconds real , sensor_nodes_count int , sensors_count int , measurements_count int @@ -56,16 +57,18 @@ + CREATE OR REPLACE FUNCTION update_daily_data_queue() RETURNS bigint AS $$ WITH data_min AS ( SELECT MIN(datetime) as min_date + , MAX(datetime) as max_date FROM measurements ), days AS ( - SELECT generate_series(min_date, current_date, '1day'::interval) as datetime + SELECT generate_series(min_date, max_date, '1day'::interval) as datetime FROM data_min ), daily_inserts AS ( INSERT INTO daily_data_queue (datetime, tz_offset) - SELECT datetime, generate_series(0,23,1) as tz_offset + SELECT datetime, generate_series(-12,14,1) as tz_offset FROM days ON CONFLICT DO NOTHING RETURNING datetime, tz_offset @@ -76,9 +79,39 @@ CREATE OR REPLACE FUNCTION update_daily_data_queue() RETURNS bigint AS $$ +CREATE OR REPLACE FUNCTION update_daily_data_queue(dt timestamptz) RETURNS bigint AS $$ + WITH affected_offsets AS ( + -- the following will just queue up every possible offset + -- regardless of whether we have a sensor node + -- SELECT generate_series(-12, 14, 1) as tz_offset + -- and this will only do the queue the offsets that we have + SELECT utc_offset_hours(dt, tzid) as tz_offset + FROM sensor_nodes n + JOIN timezones t USING (timezones_id) + GROUP BY 1 + ), daily_inserts AS ( + INSERT INTO daily_data_queue (datetime, tz_offset) + SELECT (dt + make_interval(hours=>tz_offset::int, secs=>-1))::date + , tz_offset + FROM affected_offsets + ON CONFLICT (datetime, tz_offset) DO UPDATE + SET modified_on = now() + , modified_count = daily_data_queue.modified_count + 1 + RETURNING datetime, tz_offset + ) SELECT COUNT(*) + FROM daily_inserts; + $$ LANGUAGE SQL; + + +CREATE OR REPLACE FUNCTION hourly_data_updated_event(hr timestamptz) RETURNS boolean AS $$ + SELECT update_daily_data_queue(hr)>0; +$$ LANGUAGE SQL; + + + DROP FUNCTION IF EXISTS fetch_daily_data_jobs(int, date, date); -CREATE OR REPLACE FUNCTION fetch_daily_data_jobs(n int, min_day date DEFAULT '-infinity', max_day date DEFAULT 'infinity') RETURNS TABLE( +CREATE OR REPLACE FUNCTION fetch_daily_data_jobs(n int DEFAULT 1, min_day date DEFAULT NULL, max_day date DEFAULT NULL) RETURNS TABLE( datetime date , tz_offset int , queued_on timestamptz @@ -94,10 +127,17 @@ CREATE OR REPLACE FUNCTION fetch_daily_data_jobs(n int, min_day date DEFAULT '-i FROM daily_data_queue q -- Its either not been calculated or its been modified WHERE q.datetime >= COALESCE(min_day, '-infinity'::date) - AND q.datetime <= COALESCE(min_day, 'infinity'::date) + AND q.datetime <= COALESCE(max_day, current_date - '1day'::interval) AND (q.calculated_on IS NULL OR q.modified_on > q.calculated_on) -- either its never been or it was resently modified but not queued - AND (q.queued_on IS NULL OR (q.queued_on < q.modified_on AND q.queued_on < now() - '1h'::interval)) + AND (q.queued_on IS NULL -- has not been queued + OR ( + q.queued_on < now() - '1h'::interval -- a set amount of time has passed AND + AND ( + q.queued_on < q.modified_on -- its been changed since being queued + OR calculated_on IS NULL -- it was never calculated + ) + )) ORDER BY q.datetime, q.tz_offset LIMIT n FOR UPDATE SKIP LOCKED @@ -128,10 +168,10 @@ $$ LANGUAGE plpgsql; -- The daily data will be similar to the hourly data and include -- summary data for that day in the appropriate timezone CREATE TABLE IF NOT EXISTS daily_data ( - sensors_id int NOT NULL --REFERENCES sensors ON DELETE CASCADE + sensors_id int NOT NULL REFERENCES sensors ON DELETE CASCADE , datetime date NOT NULL -- keeping the name datetime makes dynamic queries easier -, first_datetime timestamptz NOT NULL -, last_datetime timestamptz NOT NULL +, datetime_first timestamptz NOT NULL +, datetime_last timestamptz NOT NULL , value_count int NOT NULL , value_avg double precision , value_sd double precision @@ -154,10 +194,6 @@ CREATE TABLE IF NOT EXISTS daily_data ( , UNIQUE(sensors_id, datetime) ); ---ALTER TABLE daily_data --- ADD COLUMN error_count int NOT NULL DEFAULT 0 ---, ADD COLUMN error_raw_count int NOT NULL DEFAULT 0; - CREATE INDEX IF NOT EXISTS daily_data_sensors_id_idx ON daily_data USING btree (sensors_id); @@ -189,8 +225,8 @@ SELECT , sn.sensor_nodes_id , as_date(m.datetime, t.tzid) as datetime , utc_offset(t.tzid) as utc_offset -, MIN(datetime) as first_datetime -, MAX(datetime) as last_datetime +, MIN(datetime) as datetime_first +, MAX(datetime) as datetime_last , COUNT(1) AS value_count , SUM(value_count) as value_raw_count , AVG(value_avg) as value_avg @@ -227,8 +263,8 @@ SELECT , sn.sensor_nodes_id , as_date(m.datetime, t.tzid) as datetime , MAX(m.updated_on) as updated_on -, MIN(first_datetime) as first_datetime -, MAX(last_datetime) as last_datetime +, MIN(datetime_first) as datetime_first +, MAX(datetime_last) as datetime_last , COUNT(1) AS value_count , AVG(value_avg) as value_avg , STDDEV(value_avg) as value_sd @@ -261,8 +297,8 @@ INSERT INTO daily_data ( sensors_id , datetime , updated_on -, first_datetime -, last_datetime +, datetime_first +, datetime_last , value_count , value_avg , value_sd @@ -283,8 +319,8 @@ INSERT INTO daily_data ( SELECT sensors_id , datetime , updated_on -, first_datetime -, last_datetime +, datetime_first +, datetime_last , value_count , value_avg , value_sd @@ -304,8 +340,8 @@ INSERT INTO daily_data ( , current_timestamp as calculated_on FROM sensors_rollup ON CONFLICT (sensors_id, datetime) DO UPDATE -SET first_datetime = EXCLUDED.first_datetime -, last_datetime = EXCLUDED.last_datetime +SET datetime_first = EXCLUDED.datetime_first +, datetime_last = EXCLUDED.datetime_last , updated_on = EXCLUDED.updated_on , value_avg = EXCLUDED.value_avg , value_min = EXCLUDED.value_min @@ -331,6 +367,223 @@ SET first_datetime = EXCLUDED.first_datetime $$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION calculate_daily_data_by_offset(dy date DEFAULT current_date - 1, tz_offset int DEFAULT 0) + RETURNS TABLE ( + sensors_id int + , sensor_nodes_id int + , datetime date + , updated_on timestamptz + , datetime_first timestamptz + , datetime_last timestamptz + , value_count bigint + , value_avg double precision + , value_sd double precision + , value_min double precision + , value_max double precision + , value_raw_count bigint + , value_raw_avg double precision + , value_raw_min double precision + , value_raw_max double precision + , value_p02 double precision + , value_p25 double precision + , value_p50 double precision + , value_p75 double precision + , value_p98 double precision + , error_raw_count bigint + , error_count bigint + ) AS $$ +SELECT + m.sensors_id +, sn.sensor_nodes_id +, as_date(m.datetime, t.tzid) as datetime +, MAX(m.updated_on) as updated_on +, MIN(datetime_first) as datetime_first +, MAX(datetime_last) as datetime_last +, COUNT(1) AS value_count +, AVG(value_avg) as value_avg +, STDDEV(value_avg) as value_sd +, MIN(value_avg) as value_min +, MAX(value_avg) as value_max +, SUM(value_count) as value_raw_count +, SUM(value_avg*value_count)/SUM(value_count) as value_raw_avg +, MIN(value_min) as value_raw_min +, MAX(value_max) as value_raw_max +, PERCENTILE_CONT(0.02) WITHIN GROUP(ORDER BY value_avg) as value_p02 +, PERCENTILE_CONT(0.25) WITHIN GROUP(ORDER BY value_avg) as value_p25 +, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY value_avg) as value_p50 +, PERCENTILE_CONT(0.75) WITHIN GROUP(ORDER BY value_avg) as value_p75 +, PERCENTILE_CONT(0.98) WITHIN GROUP(ORDER BY value_avg) as value_p98 +, SUM(error_count) as error_raw_count +, SUM((value_avg IS NULL)::int) as error_count +FROM hourly_data m +JOIN sensors s ON (m.sensors_id = s.sensors_id) +JOIN sensor_systems sy ON (s.sensor_systems_id = sy.sensor_systems_id) +JOIN sensor_nodes sn ON (sy.sensor_nodes_id = sn.sensor_nodes_id) +JOIN timezones t ON (sn.timezones_id = t.timezones_id) +WHERE value_count > 0 +AND datetime > as_utc(dy, t.tzid) +AND datetime <= as_utc(dy + 1, t.tzid) +AND utc_offset_hours(dy, t.tzid) = tz_offset +GROUP BY 1,2,3 +HAVING COUNT(1) > 0; + $$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION insert_daily_data_by_offset(dy date DEFAULT current_date - 1, tz_offset int DEFAULT 0) + RETURNS TABLE ( + sensor_nodes_count bigint + , sensors_count bigint + , measurements_hourly_count bigint + , measurements_count bigint + ) AS $$ +SET LOCAL work_mem = '512MB'; +WITH data_rollup AS ( + SELECT * + FROM calculate_daily_data_by_offset(dy, tz_offset) +), data_inserted AS ( +INSERT INTO daily_data ( + sensors_id +, datetime +, updated_on +, datetime_first +, datetime_last +, value_count +, value_avg +, value_sd +, value_min +, value_max +, value_raw_count +, value_raw_avg +, value_raw_min +, value_raw_max +, value_p02 +, value_p25 +, value_p50 +, value_p75 +, value_p98 +, error_count +, error_raw_count +, calculated_on) + SELECT sensors_id +, datetime +, updated_on +, datetime_first +, datetime_last +, value_count +, value_avg +, value_sd +, value_min +, value_max +, value_raw_count +, value_raw_avg +, value_raw_min +, value_raw_max +, value_p02 +, value_p25 +, value_p50 +, value_p75 +, value_p98 +, error_count +, error_raw_count +, current_timestamp as calculated_on + FROM data_rollup +ON CONFLICT (sensors_id, datetime) DO UPDATE +SET datetime_first = EXCLUDED.datetime_first +, datetime_last = EXCLUDED.datetime_last +, updated_on = EXCLUDED.updated_on +, value_avg = EXCLUDED.value_avg +, value_min = EXCLUDED.value_min +, value_max = EXCLUDED.value_max +, value_count = EXCLUDED.value_count +, value_raw_avg = EXCLUDED.value_raw_avg +, value_raw_min = EXCLUDED.value_raw_min +, value_raw_max = EXCLUDED.value_raw_max +, value_raw_count = EXCLUDED.value_raw_count +, value_p02 = EXCLUDED.value_p02 +, value_p25 = EXCLUDED.value_p25 +, value_p50 = EXCLUDED.value_p50 +, value_p75 = EXCLUDED.value_p75 +, value_p98 = EXCLUDED.value_p98 +, error_count = EXCLUDED.error_count +, error_raw_count = EXCLUDED.error_raw_count +, calculated_on = EXCLUDED.calculated_on + RETURNING sensors_id, value_count, value_raw_count + ) SELECT COUNT(DISTINCT sensors_id) as sensors_count + , COUNT(DISTINCT sensor_nodes_id) as sensor_nodes_count + , SUM(value_count) as measurements_hourly_count + , SUM(value_raw_count) as measurements_count + FROM data_rollup; +$$ LANGUAGE SQL; + + +CREATE OR REPLACE FUNCTION daily_data_updated_event(dy date, tz_offset_int int) RETURNS boolean AS $$ + SELECT 't'::boolean; +$$ LANGUAGE SQL; + + + +CREATE OR REPLACE FUNCTION update_daily_data(dy date DEFAULT current_date - 1, tz_offset_int int DEFAULT 0) RETURNS bigint AS $$ +DECLARE +nw timestamptz := clock_timestamp(); +mc bigint; +BEGIN +WITH inserted AS ( + SELECT sensor_nodes_count + , sensors_count + , measurements_hourly_count + , measurements_count + FROM insert_daily_data_by_offset(dy, tz_offset_int)) + INSERT INTO daily_data_queue ( + datetime + , tz_offset + , calculated_on + , calculated_count + , sensor_nodes_count + , sensors_count + , measurements_count + , measurements_raw_count + , calculated_seconds + ) + SELECT dy + , tz_offset_int + , now() + , 1 + , sensor_nodes_count + , sensors_count + , measurements_hourly_count + , measurements_count + , EXTRACT(EPOCH FROM clock_timestamp() - nw) + FROM inserted + ON CONFLICT (datetime, tz_offset) DO UPDATE + SET calculated_on = EXCLUDED.calculated_on + , calculated_count = daily_data_queue.calculated_count + 1 + , measurements_count = EXCLUDED.measurements_count + , sensors_count = EXCLUDED.sensors_count + , calculated_seconds = EXCLUDED.calculated_seconds + RETURNING measurements_count INTO mc; + PERFORM daily_data_updated_event(dy, tz_offset_int); + RETURN mc; +END; +$$ LANGUAGE plpgsql; + + +CREATE OR REPLACE PROCEDURE update_daily_data(n int DEFAULT 5, min_day date DEFAULT NULL, max_day date DEFAULT NULL) AS $$ +DECLARE + rw record; +BEGIN +FOR rw IN ( + SELECT datetime + , tz_offset + FROM fetch_daily_data_jobs(n, min_day, max_day)) +LOOP + RAISE NOTICE 'updating day: % - %', rw.datetime, rw.tz_offset; + PERFORM update_daily_data(rw.datetime, rw.tz_offset); + COMMIT; +END LOOP; +END; +$$ LANGUAGE plpgsql; + + + CREATE OR REPLACE FUNCTION calculate_sensor_daily_data(id int, sd date, ed date) RETURNS TABLE ( sensor_nodes_count bigint @@ -345,8 +598,8 @@ SELECT , sn.sensor_nodes_id , as_date(m.datetime, t.tzid) as datetime , MAX(m.updated_on) as updated_on -, MIN(first_datetime) as first_datetime -, MAX(last_datetime) as last_datetime +, MIN(datetime_first) as datetime_first +, MAX(datetime_last) as datetime_last , COUNT(1) AS value_count , AVG(value_avg) as value_avg , STDDEV(value_avg) as value_sd @@ -379,8 +632,8 @@ INSERT INTO daily_data ( sensors_id , datetime , updated_on -, first_datetime -, last_datetime +, datetime_first +, datetime_last , value_count , value_avg , value_sd @@ -401,8 +654,8 @@ INSERT INTO daily_data ( SELECT sensors_id , datetime , updated_on -, first_datetime -, last_datetime +, datetime_first +, datetime_last , value_count , value_avg , value_sd @@ -422,8 +675,8 @@ INSERT INTO daily_data ( , current_timestamp as calculated_on FROM sensors_rollup ON CONFLICT (sensors_id, datetime) DO UPDATE -SET first_datetime = EXCLUDED.first_datetime -, last_datetime = EXCLUDED.last_datetime +SET datetime_first = EXCLUDED.datetime_first +, datetime_last = EXCLUDED.datetime_last , updated_on = EXCLUDED.updated_on , value_avg = EXCLUDED.value_avg , value_min = EXCLUDED.value_min diff --git a/openaqdb/tables/exports.sql b/openaqdb/tables/exports.sql index db9fc9f..d170626 100644 --- a/openaqdb/tables/exports.sql +++ b/openaqdb/tables/exports.sql @@ -167,7 +167,7 @@ CREATE OR REPLACE FUNCTION get_providers_id(p text) RETURNS int LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE AS $$ SELECT providers_id FROM providers -WHERE source_name = p +WHERE lower(source_name) = lower(p) LIMIT 1; $$; diff --git a/openaqdb/tables/licenses.sql b/openaqdb/tables/licenses.sql index 696bcd4..434641e 100644 --- a/openaqdb/tables/licenses.sql +++ b/openaqdb/tables/licenses.sql @@ -17,11 +17,11 @@ CREATE TABLE IF NOT EXISTS licenses ( , description text -- short descriptive summary , url text -- link to the actual text/pdf/copy of license -- , author_entities_id int NOT NULL REFERENCES entities - , attribution_required boolean DEFAULT 't' - , share_alike_required boolean DEFAULT 't' - , commercial_use_allowed boolean DEFAULT 'f' - , redistribution_allowed boolean DEFAULT 'f' - , modification_allowed boolean DEFAULT 'f' + , attribution_required boolean NOT NULL DEFAULT 't' + , share_alike_required boolean NOT NULL DEFAULT 't' + , commercial_use_allowed boolean NOT NULL DEFAULT 'f' + , redistribution_allowed boolean NOT NULL DEFAULT 'f' + , modification_allowed boolean NOT NULL DEFAULT 'f' , metadata jsonb ); diff --git a/openaqdb/tables/measurands.sql b/openaqdb/tables/measurands.sql index 6363761..57fd8dd 100644 --- a/openaqdb/tables/measurands.sql +++ b/openaqdb/tables/measurands.sql @@ -31,3 +31,12 @@ SELECT measurands_id , concat(measurand, units) FROM measurands GROUP BY 1,2; + + +CREATE OR REPLACE FUNCTION get_measurands_id(m text) +RETURNS int LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE AS $$ +SELECT measurands_id +FROM measurands +WHERE lower(measurand) = lower(m) +LIMIT 1; +$$; diff --git a/openaqdb/tables/metadata.sql b/openaqdb/tables/metadata.sql index fa10cc6..4d13870 100644 --- a/openaqdb/tables/metadata.sql +++ b/openaqdb/tables/metadata.sql @@ -259,7 +259,8 @@ INSERT INTO instruments (instruments_id , description , manufacturer_entities_id , is_monitor) VALUES -(1, 'N/A', 'Instrument is not available', 1, 'f') +(1, 'Unknown Sensor', 'Instrument details not available', 1, 'f') +, (2, 'Government Monitor', 'Instrument details are not available', 1, 't') ON CONFLICT DO NOTHING; @@ -278,7 +279,7 @@ INSERT INTO providers (providers_id , source_name , export_prefix , owner_entities_id) VALUES -(1, 'N/A', 'Provider is not available', 'na', 'na_', 1); +(1, 'Unknown Provider', 'Provider details are not available', 'na', 'na_', 1); DO $$ BEGIN diff --git a/openaqdb/tables/rollups.sql b/openaqdb/tables/rollups.sql index 2127343..b226807 100644 --- a/openaqdb/tables/rollups.sql +++ b/openaqdb/tables/rollups.sql @@ -5,62 +5,20 @@ CREATE SEQUENCE IF NOT EXISTS sensors_rollup_sq START 10; CREATE TABLE IF NOT EXISTS sensors_rollup ( sensors_id int PRIMARY KEY REFERENCES sensors - , datetime_first timestamptz -- first recorded measument datetime (@ingest) - , datetime_last timestamptz -- last recorded measurement time (@ingest) - , geom_latest geometry -- last recorded point (@ingest) - , value_latest double precision -- last recorded measurement (@ingest) - , value_count int NOT NULL -- total count of measurements (@ingest, @rollup) - , value_avg double precision -- average of all measurements (@ingest, @rollup) - , value_sd double precision -- sd of all measurements (@ingest, @rollup) - , value_min double precision -- lowest measurement value (@ingest, @rollup) - , value_max double precision -- highest value measured (@ingest, @rollup) - --, value_p05 double precision -- 5th percentile (@rollup) - --, value_p50 double precision -- median (@rollup) - --, value_p95 double precision -- 95th percentile (@rollup) - , added_on timestamptz NOT NULL DEFAULT now() -- first time measurements were added (@ingest) - , modified_on timestamptz NOT NULL DEFAULT now() -- last time we measurements were added (@ingest) - --, calculated_on timestamptz -- last time data was rolled up (@rollup) + , datetime_first timestamptz NOT NULL -- first recorded measument datetime (@ingest) + , datetime_last timestamptz NOT NULL -- last recorded measurement time (@ingest) + , geom_latest geometry -- last recorded point (@ingest) + , value_latest double precision NOT NULL -- last recorded measurement (@ingest) + , value_count int NOT NULL NOT NULL -- total count of measurements (@ingest, @rollup) + , value_avg double precision NOT NULL -- average of all measurements (@ingest, @rollup) + , value_sd double precision NOT NULL -- sd of all measurements (@ingest, @rollup) + , value_min double precision NOT NULL -- lowest measurement value (@ingest, @rollup) + , value_max double precision NOT NULL -- highest value measured (@ingest, @rollup) + , added_on timestamptz NOT NULL DEFAULT now() -- first time measurements were added (@ingest) + , modified_on timestamptz NOT NULL DEFAULT now() -- last time we measurements were added (@ingest) ); --- Sensors latest will act as a cache for the most recent --- sensor value, managed by the ingester -CREATE TABLE IF NOT EXISTS sensors_latest ( - sensors_id int PRIMARY KEY NOT NULL REFERENCES sensors - , datetime timestamptz - , value double precision NOT NULL - , lat double precision -- so that nulls dont take up space - , lon double precision - , modified_on timestamptz DEFAULT now() - , fetchlogs_id int -- for debugging issues, no reference constraint -); - - -CREATE TABLE IF NOT EXISTS rollups ( - groups_id int REFERENCES groups (groups_id), - measurands_id int, - sensors_id int, - rollup text, - st timestamptz, - et timestamptz, - first_datetime timestamptz, - last_datetime timestamptz, - value_count bigint, - value_sum float, - last_value float, - minx float, - miny float, - maxx float, - maxy float, - last_point geography, - PRIMARY KEY (groups_id, measurands_id, rollup, et) -); - -CREATE INDEX rollups_measurands_id_idx ON rollups USING btree (measurands_id); -CREATE INDEX rollups_rollup_idx ON rollups USING btree (rollup); -CREATE INDEX rollups_sensors_id_idx ON rollups USING btree (sensors_id); -CREATE INDEX rollups_st_idx ON rollups USING btree (st); - -- The following tables, functions and views are to handle -- tracking coverage for the system. If possibly we may also want to replace -- the rollups table above (which uses groups) with the hourly_data table @@ -72,8 +30,8 @@ CREATE TABLE IF NOT EXISTS hourly_data ( sensors_id int NOT NULL --REFERENCES sensors ON DELETE CASCADE , datetime timestamptz NOT NULL , measurands_id int NOT NULL --REFERENCES measurands -- required for partition -, first_datetime timestamptz NOT NULL -, last_datetime timestamptz NOT NULL +, datetime_first timestamptz NOT NULL +, datetime_last timestamptz NOT NULL , value_count int NOT NULL , value_avg double precision , value_sd double precision @@ -378,8 +336,8 @@ INSERT INTO hourly_data ( sensors_id , measurands_id , datetime -, first_datetime -, last_datetime +, datetime_first +, datetime_last , value_count , value_avg , value_sd @@ -396,8 +354,8 @@ INSERT INTO hourly_data ( sensors_id , measurands_id , datetime -, MIN(datetime) as first_datetime -, MAX(datetime) as last_datetime +, MIN(datetime) as datetime_first +, MAX(datetime) as datetime_last , COUNT(1) as value_count , AVG(value) as value_avg , STDDEV(value) as value_sd @@ -414,8 +372,8 @@ FROM measurements_filtered m GROUP BY 1,2,3 HAVING COUNT(1) > 0 ON CONFLICT (sensors_id, measurands_id, datetime) DO UPDATE -SET first_datetime = EXCLUDED.first_datetime -, last_datetime = EXCLUDED.last_datetime +SET datetime_first = EXCLUDED.datetime_first +, datetime_last = EXCLUDED.datetime_last , value_avg = EXCLUDED.value_avg , value_min = EXCLUDED.value_min , value_max = EXCLUDED.value_max @@ -459,8 +417,8 @@ INSERT INTO hourly_data ( sensors_id , measurands_id , datetime -, first_datetime -, last_datetime +, datetime_first +, datetime_last , value_count , value_avg , value_sd @@ -477,8 +435,8 @@ INSERT INTO hourly_data ( sensors_id , measurands_id , datetime -, MIN(datetime) as first_datetime -, MAX(datetime) as last_datetime +, MIN(datetime) as datetime_first +, MAX(datetime) as datetime_last , COUNT(1) as value_count , AVG(value) as value_avg , STDDEV(value) as value_sd @@ -495,8 +453,8 @@ FROM measurements_filtered m GROUP BY 1,2,3 HAVING COUNT(1) > 0 ON CONFLICT (sensors_id, measurands_id, datetime) DO UPDATE -SET first_datetime = EXCLUDED.first_datetime -, last_datetime = EXCLUDED.last_datetime +SET datetime_first = EXCLUDED.datetime_first +, datetime_last = EXCLUDED.datetime_last , value_avg = EXCLUDED.value_avg , value_min = EXCLUDED.value_min , value_max = EXCLUDED.value_max @@ -532,7 +490,14 @@ SELECT * FROM calculate_hourly_data(dt::timestamptz, dt + '1day'::interval); $$ LANGUAGE SQL; +-- create a dummy method for triggering another event + CREATE OR REPLACE FUNCTION hourly_data_updated_event(hr timestamptz) RETURNS boolean AS $$ + SELECT 't'::boolean; + $$ LANGUAGE SQL; + + --DROP FUNCTION IF EXISTS update_hourly_data(timestamptz); +-- this is the root method CREATE OR REPLACE FUNCTION update_hourly_data(hr timestamptz DEFAULT now() - '1hour'::interval) RETURNS bigint AS $$ DECLARE nw timestamptz := clock_timestamp(); @@ -542,28 +507,29 @@ WITH inserted AS ( SELECT COALESCE(measurements_count, 0) as measurements_count , COALESCE(sensors_count, 0) as sensors_count FROM calculate_hourly_data(hr)) -INSERT INTO hourly_stats ( - datetime -, calculated_on -, measurements_count -, sensors_count -, calculated_count -, calculated_seconds) -SELECT date_trunc('hour', hr) -, now() -, measurements_count -, sensors_count -, 1 -, EXTRACT(EPOCH FROM clock_timestamp() - nw) -FROM inserted -ON CONFLICT (datetime) DO UPDATE -SET calculated_on = EXCLUDED.calculated_on -, calculated_count = hourly_stats.calculated_count + 1 -, measurements_count = EXCLUDED.measurements_count -, sensors_count = EXCLUDED.sensors_count -, calculated_seconds = EXCLUDED.calculated_seconds -RETURNING measurements_count INTO mc; -RETURN mc; + INSERT INTO hourly_stats ( + datetime + , calculated_on + , measurements_count + , sensors_count + , calculated_count + , calculated_seconds) + SELECT date_trunc('hour', hr) + , now() + , measurements_count + , sensors_count + , 1 + , EXTRACT(EPOCH FROM clock_timestamp() - nw) + FROM inserted + ON CONFLICT (datetime) DO UPDATE + SET calculated_on = EXCLUDED.calculated_on + , calculated_count = hourly_stats.calculated_count + 1 + , measurements_count = EXCLUDED.measurements_count + , sensors_count = EXCLUDED.sensors_count + , calculated_seconds = EXCLUDED.calculated_seconds + RETURNING measurements_count INTO mc; + PERFORM hourly_data_updated_event(hr); + RETURN mc; END; $$ LANGUAGE plpgsql; @@ -704,51 +670,6 @@ END; $$ LANGUAGE plpgsql; -DROP TABLE IF EXISTS sensors_rollup_patch; -SELECT sensors_id -, MIN(first_datetime) as first_datetime -, MAX(last_datetime) as last_datetime -, SUM(value_count) as value_count -, AVG(value_avg) as value_avg -, MIN(value_min) as value_min -, MAX(value_max) as value_max -INTO sensors_rollup_patch -FROM hourly_data -GROUP BY sensors_id; - -SELECT COUNT(1) -FROM sensors_rollup; - -INSERT INTO sensors_rollup ( - sensors_id - , datetime_first - , datetime_last - , value_count - , value_avg - , value_min - , value_max - , value_latest -) -SELECT s.sensors_id - , s.first_datetime - , s.last_datetime - , s.value_count - , s.value_avg - , s.value_min - , s.value_max - , m.value -FROM sensors_rollup_patch s -JOIN measurements m ON (s.sensors_id = m.sensors_id AND s.last_datetime = m.datetime) -ON CONFLICT (sensors_id) DO UPDATE -SET datetime_first = EXCLUDED.datetime_first -, datetime_last = EXCLUDED.datetime_last -, value_count = EXCLUDED.value_count -, value_min = EXCLUDED.value_min -, value_max = EXCLUDED.value_max -, value_avg = EXCLUDED.value_avg -, value_latest = COALESCE(sensors_rollup.value_latest, EXCLUDED.value_latest); - - -- when was it last updated DO $$ @@ -816,12 +737,3 @@ BEGIN WHERE datetime = st; END; $$ LANGUAGE plpgsql; - - - - - -SELECT * - FROM hourly_stats - WHERE calculated_on IS NULL - LIMIT 10; diff --git a/openaqdb/tables/users.sql b/openaqdb/tables/users.sql index 7e898c5..ffc4eaa 100644 --- a/openaqdb/tables/users.sql +++ b/openaqdb/tables/users.sql @@ -107,6 +107,7 @@ CREATE TABLE IF NOT EXISTS entities ( , added_by int NOT NULL REFERENCES users DEFAULT 1 , modified_on timestamptz , modified_by int REFERENCES users + , metadata jsonb ); diff --git a/poetry.lock b/poetry.lock index fa60e37..64f13ed 100644 --- a/poetry.lock +++ b/poetry.lock @@ -11,6 +11,154 @@ files = [ {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, ] +[[package]] +name = "boto3" +version = "1.35.0" +description = "The AWS SDK for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "boto3-1.35.0-py3-none-any.whl", hash = "sha256:ada32dab854c46a877cf967b8a55ab1a7d356c3c87f1c8bd556d446ff03dfd95"}, + {file = "boto3-1.35.0.tar.gz", hash = "sha256:bdc242e3ea81decc6ea551b04b2c122f088c29269d8e093b55862946aa0fcfc6"}, +] + +[package.dependencies] +botocore = ">=1.35.0,<1.36.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.10.0,<0.11.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.35.0" +description = "Low-level, data-driven core of boto 3." +optional = false +python-versions = ">=3.8" +files = [ + {file = "botocore-1.35.0-py3-none-any.whl", hash = "sha256:a3c96fe0b6afe7d00bad6ffbe73f2610953065fcdf0ed697eba4e1e5287cc84f"}, + {file = "botocore-1.35.0.tar.gz", hash = "sha256:6ab2f5a5cbdaa639599e3478c65462c6d6a10173dc8b941bfc69b0c9eb548f45"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""} + +[package.extras] +crt = ["awscrt (==0.21.2)"] + +[[package]] +name = "certifi" +version = "2024.7.4" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, + {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -22,6 +170,88 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "greenlet" +version = "3.0.3" +description = "Lightweight in-process concurrent programming" +optional = false +python-versions = ">=3.7" +files = [ + {file = "greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb"}, + {file = "greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9"}, + {file = "greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d"}, + {file = "greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728"}, + {file = "greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6"}, + {file = "greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2"}, + {file = "greenlet-3.0.3-cp37-cp37m-macosx_11_0_universal2.whl", hash = "sha256:5b51e85cb5ceda94e79d019ed36b35386e8c37d22f07d6a751cb659b180d5274"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:daf3cb43b7cf2ba96d614252ce1684c1bccee6b2183a01328c98d36fcd7d5cb0"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99bf650dc5d69546e076f413a87481ee1d2d09aaaaaca058c9251b6d8c14783f"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dd6e660effd852586b6a8478a1d244b8dc90ab5b1321751d2ea15deb49ed414"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3391d1e16e2a5a1507d83e4a8b100f4ee626e8eca43cf2cadb543de69827c4c"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1f145462f1fa6e4a4ae3c0f782e580ce44d57c8f2c7aae1b6fa88c0b2efdb41"}, + {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1a7191e42732df52cb5f39d3527217e7ab73cae2cb3694d241e18f53d84ea9a7"}, + {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0448abc479fab28b00cb472d278828b3ccca164531daab4e970a0458786055d6"}, + {file = "greenlet-3.0.3-cp37-cp37m-win32.whl", hash = "sha256:b542be2440edc2d48547b5923c408cbe0fc94afb9f18741faa6ae970dbcb9b6d"}, + {file = "greenlet-3.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67"}, + {file = "greenlet-3.0.3-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:1996cb9306c8595335bb157d133daf5cf9f693ef413e7673cb07e3e5871379ca"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ddc0f794e6ad661e321caa8d2f0a55ce01213c74722587256fb6566049a8b04"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9db1c18f0eaad2f804728c67d6c610778456e3e1cc4ab4bbd5eeb8e6053c6fc"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7170375bcc99f1a2fbd9c306f5be8764eaf3ac6b5cb968862cad4c7057756506"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b66c9c1e7ccabad3a7d037b2bcb740122a7b17a53734b7d72a344ce39882a1b"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:098d86f528c855ead3479afe84b49242e174ed262456c342d70fc7f972bc13c4"}, + {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:81bb9c6d52e8321f09c3d165b2a78c680506d9af285bfccbad9fb7ad5a5da3e5"}, + {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fd096eb7ffef17c456cfa587523c5f92321ae02427ff955bebe9e3c63bc9f0da"}, + {file = "greenlet-3.0.3-cp38-cp38-win32.whl", hash = "sha256:d46677c85c5ba00a9cb6f7a00b2bfa6f812192d2c9f7d9c4f6a55b60216712f3"}, + {file = "greenlet-3.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:419b386f84949bf0e7c73e6032e3457b82a787c1ab4a0e43732898a761cc9dbf"}, + {file = "greenlet-3.0.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:da70d4d51c8b306bb7a031d5cff6cc25ad253affe89b70352af5f1cb68e74b53"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086152f8fbc5955df88382e8a75984e2bb1c892ad2e3c80a2508954e52295257"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d73a9fe764d77f87f8ec26a0c85144d6a951a6c438dfe50487df5595c6373eac"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7dcbe92cc99f08c8dd11f930de4d99ef756c3591a5377d1d9cd7dd5e896da71"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1551a8195c0d4a68fac7a4325efac0d541b48def35feb49d803674ac32582f61"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64d7675ad83578e3fc149b617a444fab8efdafc9385471f868eb5ff83e446b8b"}, + {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b37eef18ea55f2ffd8f00ff8fe7c8d3818abd3e25fb73fae2ca3b672e333a7a6"}, + {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:77457465d89b8263bca14759d7c1684df840b6811b2499838cc5b040a8b5b113"}, + {file = "greenlet-3.0.3-cp39-cp39-win32.whl", hash = "sha256:57e8974f23e47dac22b83436bdcf23080ade568ce77df33159e019d161ce1d1e"}, + {file = "greenlet-3.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c5ee858cfe08f34712f548c3c363e807e7186f03ad7a5039ebadb29e8c6be067"}, + {file = "greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491"}, +] + +[package.extras] +docs = ["Sphinx", "furo"] +test = ["objgraph", "psutil"] + +[[package]] +name = "idna" +version = "3.7" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -33,6 +263,36 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + +[[package]] +name = "migra" +version = "3.0.1663481299" +description = "Like `diff` but for PostgreSQL schemas" +optional = false +python-versions = ">=3.7,<4" +files = [ + {file = "migra-3.0.1663481299-py3-none-any.whl", hash = "sha256:061643e9af63488e085d729f267ed4af4249789979732b703ddeb2c478ec9a93"}, + {file = "migra-3.0.1663481299.tar.gz", hash = "sha256:0cf0c125d553008d9ff5402663a51703ccc474bb65b5a4f4727906dbf58e217f"}, +] + +[package.dependencies] +schemainspect = ">=3.1.1663480743" +six = "*" +sqlbag = "*" + +[package.extras] +pg = ["psycopg2-binary"] + [[package]] name = "packaging" version = "24.1" @@ -144,6 +404,28 @@ files = [ {file = "psycopg_binary-3.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:921f0c7f39590763d64a619de84d1b142587acc70fd11cbb5ba8fa39786f3073"}, ] +[[package]] +name = "psycopg2" +version = "2.9.9" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "psycopg2-2.9.9-cp310-cp310-win32.whl", hash = "sha256:38a8dcc6856f569068b47de286b472b7c473ac7977243593a288ebce0dc89516"}, + {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, + {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, + {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, + {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, + {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, + {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, + {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, + {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, + {file = "psycopg2-2.9.9-cp38-cp38-win_amd64.whl", hash = "sha256:bac58c024c9922c23550af2a581998624d6e02350f4ae9c5f0bc642c633a2d5e"}, + {file = "psycopg2-2.9.9-cp39-cp39-win32.whl", hash = "sha256:c92811b2d4c9b6ea0285942b2e7cac98a59e166d59c588fe5cfe1eda58e72d59"}, + {file = "psycopg2-2.9.9-cp39-cp39-win_amd64.whl", hash = "sha256:de80739447af31525feddeb8effd640782cf5998e1a4e9192ebdf829717e3913"}, + {file = "psycopg2-2.9.9.tar.gz", hash = "sha256:d1454bde93fb1e224166811694d600e746430c006fbb031ea06ecc2ea41bf156"}, +] + [[package]] name = "pydantic" version = "2.8.2" @@ -306,6 +588,20 @@ pluggy = ">=1.5,<2.0" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + [[package]] name = "python-dotenv" version = "1.0.1" @@ -320,6 +616,177 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "requests" +version = "2.32.3" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "s3transfer" +version = "0.10.2" +description = "An Amazon S3 Transfer Manager" +optional = false +python-versions = ">=3.8" +files = [ + {file = "s3transfer-0.10.2-py3-none-any.whl", hash = "sha256:eca1c20de70a39daee580aef4986996620f365c4e0fda6a86100231d62f1bf69"}, + {file = "s3transfer-0.10.2.tar.gz", hash = "sha256:0711534e9356d3cc692fdde846b4a1e4b0cb6519971860796e6bc4c7aea00ef6"}, +] + +[package.dependencies] +botocore = ">=1.33.2,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] + +[[package]] +name = "schemainspect" +version = "3.1.1663587362" +description = "Schema inspection for PostgreSQL (and possibly others)" +optional = false +python-versions = ">=3.7,<4" +files = [ + {file = "schemainspect-3.1.1663587362-py3-none-any.whl", hash = "sha256:3071265712863c4d4e742940a4b44ac685135af3c93416872ec1bb6c822c4aca"}, + {file = "schemainspect-3.1.1663587362.tar.gz", hash = "sha256:a295ad56f7a19c09e5e1ef9f16dadbf6392e26196cb5f05b5afe613c99ce7468"}, +] + +[package.dependencies] +sqlalchemy = "*" + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "sqlalchemy" +version = "2.0.32" +description = "Database Abstraction Library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "SQLAlchemy-2.0.32-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0c9045ecc2e4db59bfc97b20516dfdf8e41d910ac6fb667ebd3a79ea54084619"}, + {file = "SQLAlchemy-2.0.32-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1467940318e4a860afd546ef61fefb98a14d935cd6817ed07a228c7f7c62f389"}, + {file = "SQLAlchemy-2.0.32-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5954463675cb15db8d4b521f3566a017c8789222b8316b1e6934c811018ee08b"}, + {file = "SQLAlchemy-2.0.32-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:167e7497035c303ae50651b351c28dc22a40bb98fbdb8468cdc971821b1ae533"}, + {file = "SQLAlchemy-2.0.32-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b27dfb676ac02529fb6e343b3a482303f16e6bc3a4d868b73935b8792edb52d0"}, + {file = "SQLAlchemy-2.0.32-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bf2360a5e0f7bd75fa80431bf8ebcfb920c9f885e7956c7efde89031695cafb8"}, + {file = "SQLAlchemy-2.0.32-cp310-cp310-win32.whl", hash = "sha256:306fe44e754a91cd9d600a6b070c1f2fadbb4a1a257b8781ccf33c7067fd3e4d"}, + {file = "SQLAlchemy-2.0.32-cp310-cp310-win_amd64.whl", hash = "sha256:99db65e6f3ab42e06c318f15c98f59a436f1c78179e6a6f40f529c8cc7100b22"}, + {file = "SQLAlchemy-2.0.32-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:21b053be28a8a414f2ddd401f1be8361e41032d2ef5884b2f31d31cb723e559f"}, + {file = "SQLAlchemy-2.0.32-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b178e875a7a25b5938b53b006598ee7645172fccafe1c291a706e93f48499ff5"}, + {file = "SQLAlchemy-2.0.32-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723a40ee2cc7ea653645bd4cf024326dea2076673fc9d3d33f20f6c81db83e1d"}, + {file = "SQLAlchemy-2.0.32-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:295ff8689544f7ee7e819529633d058bd458c1fd7f7e3eebd0f9268ebc56c2a0"}, + {file = "SQLAlchemy-2.0.32-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:49496b68cd190a147118af585173ee624114dfb2e0297558c460ad7495f9dfe2"}, + {file = "SQLAlchemy-2.0.32-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:acd9b73c5c15f0ec5ce18128b1fe9157ddd0044abc373e6ecd5ba376a7e5d961"}, + {file = "SQLAlchemy-2.0.32-cp311-cp311-win32.whl", hash = "sha256:9365a3da32dabd3e69e06b972b1ffb0c89668994c7e8e75ce21d3e5e69ddef28"}, + {file = "SQLAlchemy-2.0.32-cp311-cp311-win_amd64.whl", hash = "sha256:8bd63d051f4f313b102a2af1cbc8b80f061bf78f3d5bd0843ff70b5859e27924"}, + {file = "SQLAlchemy-2.0.32-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6bab3db192a0c35e3c9d1560eb8332463e29e5507dbd822e29a0a3c48c0a8d92"}, + {file = "SQLAlchemy-2.0.32-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:19d98f4f58b13900d8dec4ed09dd09ef292208ee44cc9c2fe01c1f0a2fe440e9"}, + {file = "SQLAlchemy-2.0.32-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cd33c61513cb1b7371fd40cf221256456d26a56284e7d19d1f0b9f1eb7dd7e8"}, + {file = "SQLAlchemy-2.0.32-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d6ba0497c1d066dd004e0f02a92426ca2df20fac08728d03f67f6960271feec"}, + {file = "SQLAlchemy-2.0.32-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2b6be53e4fde0065524f1a0a7929b10e9280987b320716c1509478b712a7688c"}, + {file = "SQLAlchemy-2.0.32-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:916a798f62f410c0b80b63683c8061f5ebe237b0f4ad778739304253353bc1cb"}, + {file = "SQLAlchemy-2.0.32-cp312-cp312-win32.whl", hash = "sha256:31983018b74908ebc6c996a16ad3690301a23befb643093fcfe85efd292e384d"}, + {file = "SQLAlchemy-2.0.32-cp312-cp312-win_amd64.whl", hash = "sha256:4363ed245a6231f2e2957cccdda3c776265a75851f4753c60f3004b90e69bfeb"}, + {file = "SQLAlchemy-2.0.32-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b8afd5b26570bf41c35c0121801479958b4446751a3971fb9a480c1afd85558e"}, + {file = "SQLAlchemy-2.0.32-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c750987fc876813f27b60d619b987b057eb4896b81117f73bb8d9918c14f1cad"}, + {file = "SQLAlchemy-2.0.32-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ada0102afff4890f651ed91120c1120065663506b760da4e7823913ebd3258be"}, + {file = "SQLAlchemy-2.0.32-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:78c03d0f8a5ab4f3034c0e8482cfcc415a3ec6193491cfa1c643ed707d476f16"}, + {file = "SQLAlchemy-2.0.32-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:3bd1cae7519283ff525e64645ebd7a3e0283f3c038f461ecc1c7b040a0c932a1"}, + {file = "SQLAlchemy-2.0.32-cp37-cp37m-win32.whl", hash = "sha256:01438ebcdc566d58c93af0171c74ec28efe6a29184b773e378a385e6215389da"}, + {file = "SQLAlchemy-2.0.32-cp37-cp37m-win_amd64.whl", hash = "sha256:4979dc80fbbc9d2ef569e71e0896990bc94df2b9fdbd878290bd129b65ab579c"}, + {file = "SQLAlchemy-2.0.32-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c742be912f57586ac43af38b3848f7688863a403dfb220193a882ea60e1ec3a"}, + {file = "SQLAlchemy-2.0.32-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:62e23d0ac103bcf1c5555b6c88c114089587bc64d048fef5bbdb58dfd26f96da"}, + {file = "SQLAlchemy-2.0.32-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:251f0d1108aab8ea7b9aadbd07fb47fb8e3a5838dde34aa95a3349876b5a1f1d"}, + {file = "SQLAlchemy-2.0.32-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ef18a84e5116340e38eca3e7f9eeaaef62738891422e7c2a0b80feab165905f"}, + {file = "SQLAlchemy-2.0.32-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3eb6a97a1d39976f360b10ff208c73afb6a4de86dd2a6212ddf65c4a6a2347d5"}, + {file = "SQLAlchemy-2.0.32-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0c1c9b673d21477cec17ab10bc4decb1322843ba35b481585facd88203754fc5"}, + {file = "SQLAlchemy-2.0.32-cp38-cp38-win32.whl", hash = "sha256:c41a2b9ca80ee555decc605bd3c4520cc6fef9abde8fd66b1cf65126a6922d65"}, + {file = "SQLAlchemy-2.0.32-cp38-cp38-win_amd64.whl", hash = "sha256:8a37e4d265033c897892279e8adf505c8b6b4075f2b40d77afb31f7185cd6ecd"}, + {file = "SQLAlchemy-2.0.32-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:52fec964fba2ef46476312a03ec8c425956b05c20220a1a03703537824b5e8e1"}, + {file = "SQLAlchemy-2.0.32-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:328429aecaba2aee3d71e11f2477c14eec5990fb6d0e884107935f7fb6001632"}, + {file = "SQLAlchemy-2.0.32-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85a01b5599e790e76ac3fe3aa2f26e1feba56270023d6afd5550ed63c68552b3"}, + {file = "SQLAlchemy-2.0.32-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaf04784797dcdf4c0aa952c8d234fa01974c4729db55c45732520ce12dd95b4"}, + {file = "SQLAlchemy-2.0.32-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4488120becf9b71b3ac718f4138269a6be99a42fe023ec457896ba4f80749525"}, + {file = "SQLAlchemy-2.0.32-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:14e09e083a5796d513918a66f3d6aedbc131e39e80875afe81d98a03312889e6"}, + {file = "SQLAlchemy-2.0.32-cp39-cp39-win32.whl", hash = "sha256:0d322cc9c9b2154ba7e82f7bf25ecc7c36fbe2d82e2933b3642fc095a52cfc78"}, + {file = "SQLAlchemy-2.0.32-cp39-cp39-win_amd64.whl", hash = "sha256:7dd8583df2f98dea28b5cd53a1beac963f4f9d087888d75f22fcc93a07cf8d84"}, + {file = "SQLAlchemy-2.0.32-py3-none-any.whl", hash = "sha256:e567a8793a692451f706b363ccf3c45e056b67d90ead58c3bc9471af5d212202"}, + {file = "SQLAlchemy-2.0.32.tar.gz", hash = "sha256:c1b88cc8b02b6a5f0efb0345a03672d4c897dc7d92585176f88c67346f565ea8"}, +] + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +typing-extensions = ">=4.6.0" + +[package.extras] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] +aioodbc = ["aioodbc", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx_oracle (>=8)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3_binary"] + +[[package]] +name = "sqlbag" +version = "0.1.1617247075" +description = "various snippets of SQL-related boilerplate" +optional = false +python-versions = "*" +files = [ + {file = "sqlbag-0.1.1617247075-py2.py3-none-any.whl", hash = "sha256:ecdef26d661f8640711030ac6ee618deb92b91f9f0fc2efbf8a3b133af13092d"}, + {file = "sqlbag-0.1.1617247075.tar.gz", hash = "sha256:b9d7862c3b2030356d796ca872907962fd54704066978d7ae89383f5123366ed"}, +] + +[package.dependencies] +packaging = "*" +six = "*" +sqlalchemy = "*" + +[package.extras] +maria = ["pymysql"] +pendulum = ["pendulum", "relativedelta"] +pg = ["psycopg2"] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -342,7 +809,24 @@ files = [ {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, ] +[[package]] +name = "urllib3" +version = "2.2.2" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, + {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "da6bf0ca52b92a9675a3ecfff9c719a068b93f64d015b31245e780b832722085" +content-hash = "76cb8c2e62bedbbec84f213c69ec4374a9c48ca909480d7e28fc34d3fe714d36" diff --git a/pyproject.toml b/pyproject.toml index b613b4b..f8bf3c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,12 @@ description = "" authors = ["OpenAQ "] readme = "README.md" +[tool.poetry.group.dev.dependencies] +boto3 = "^1.35.0" +requests = "^2.32.3" +migra = "^3.0.1663481299" +psycopg2 = "^2.9.9" + [tool.pytest.ini_options] pythonpath = "api" log_format = "[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s"