Skip to content

Commit

Permalink
#49 Monthly citywide and corridor tti/bi function
Browse files Browse the repository at this point in the history
  • Loading branch information
chmnata committed Aug 31, 2020
1 parent 80cc0dd commit 48a9c07
Show file tree
Hide file tree
Showing 5 changed files with 283 additions and 0 deletions.
16 changes: 16 additions & 0 deletions congestion_data_aggregation/sql/ReadMe.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Data source and data flow

## Automatic Pipeline

1) Citywide Travel Time Index
- Function `congestion.generate_citywide_tti_monthly` insert an averaged monthly hourly citywide travel time index from 7am to 11pm every month. It aggregates travel time on a link level up to segments, filtering the segments that does not have at least 80% of its lengeth worth of links. Highway uses a baseline of 25th percetile of travel time while the rest of the streets use a baseline of 10th percentile of travel time. Highway is defined in `congestion.highway_segments_v5`. This function runs monthly with `congestion_refresh` DAG.

2) Citywide Buffer Index
- Function `congestion.generate_citywide_bi_monthly` insert an averaged monthly hourly citywide buffer index from 7am to 11pm every month. It aggregates travel time on a link level up to segments, filtering the segments that does not have at least 80% of its length worth of links. This function runs monthly with `congestion_refresh` DAG.

3) Corridor Travel Time Index
- Function `congestion.generate_corridor_tti_monthly` insert an averaged monthly hourly corridor travel time index from 7am to 11pm every month for each corridor. It first aggregates travel time on a link level up to segments, filtering the segments that does not have at least 80% of its length worth of links. It then aggregates segments up to corridor, also filtering corridors that does not have at least 80% of its length worth of segments. Highway uses a baseline of 25th percetile of travel time while the rest of the streets use a baseline of 10th percentile of travel time. Highway is defined in `congestion.highway_segments_v5`. This function runs monthly with `congestion_refresh` DAG.

4) Corridor Buffer Index
- Function `congestion.generate_corridor_bi_monthly` insert an averaged monthly hourly citywide buffer index from 7am to 11pm every month. It aggregates travel time on a link level up to segments, filtering the segments that does not have at least 80% of its length worth of links. It then aggregates segments up to corridor, also filtering corridors that does not have at least 80% of its length worth of segments. This function runs monthly with `congestion_refresh` DAG.

64 changes: 64 additions & 0 deletions congestion_data_aggregation/sql/generate_citywide_bi_hourly.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
-- FUNCTION: congestion.generate_citywide_bi_hourly(date)

-- DROP FUNCTION congestion.generate_citywide_bi_hourly(date);

CREATE OR REPLACE FUNCTION congestion.generate_citywide_bi_hourly(
_dt date)
RETURNS void
LANGUAGE 'sql'

COST 100
VOLATILE SECURITY DEFINER
AS $BODY$
with speed_links as (
select segment_id, link_dir,
datetime_bin(tx,60) AS datetime_bin,
harmean(mean) AS spd_avg_all,
length AS link_length,
COUNT (DISTINCT tx) AS count_hc
from here.ta
inner join congestion.segment_links_v5_19_4 using (link_dir)
LEFT JOIN ref.holiday hol ON hol.dt = tx::date
where hol.dt IS NULL AND date_part('isodow'::text, tx::date) < 6
and (tx >= _dt AND tx < ( _dt + '1 mon'::interval))
group by segment_id,link_dir,datetime_bin(tx,60), link_length
)
, seg_tt as (
SELECT segment_id, datetime_bin,
CASE WHEN SUM(link_length) >= 0.8 * b.length
THEN SUM(link_length / spd_avg_all * 3.6 ) * b.length / SUM(link_length)
END AS spd_avg_all,
SUM(link_length) / b.length * 100 AS data_pct_hc
FROM speed_links
INNER JOIN congestion.segments_v5 b
USING (segment_id)
WHERE link_length / spd_avg_all IS NOT NULL
GROUP BY segment_id, datetime_bin, b.length
ORDER BY segment_id, datetime_bin)
, seg_bi as (
SELECT a.segment_id,
date_part('month'::text, a.datetime_bin) AS month,
datetime_bin::time without time zone AS time_bin,
count(a.datetime_bin) AS num_bins,
avg(a.spd_avg_all) AS avg_tt,
percentile_cont(0.95::double precision) WITHIN GROUP (ORDER BY a.spd_avg_all) as pct_95,
(percentile_cont(0.95::double precision) WITHIN GROUP (ORDER BY a.spd_avg_all) - avg(a.spd_avg_all))/ avg(a.spd_avg_all) AS bi
FROM seg_tt a
WHERE a.spd_avg_all IS NOT NULL
GROUP BY a.segment_id, (a.datetime_bin::time without time zone), date_part('month'::text, a.datetime_bin)
ORDER BY a.segment_id, date_part('month'::text, a.datetime_bin), (a.datetime_bin::time without time zone))

INSERT INTO congestion.citywide_bi_hourly(month, time_bin, num_segments, bi)
select month::int, time_bin, count(segment_id) as num_segments,
sum(bi * segments_v5.length * segment_aadt_final.aadt)/sum(segments_v5.length * segment_aadt_final.aadt) AS bi
from seg_bi
inner join congestion.segments_v5 using (segment_id)
inner join covid.segment_aadt_final USING (segment_id)
where time_bin <@ '[07:00:00,23:00:00)'::timerange
group by month, time_bin
order by month, time_bin

$BODY$;

ALTER FUNCTION congestion.generate_citywide_bi_hourly(date)
OWNER TO natalie;
64 changes: 64 additions & 0 deletions congestion_data_aggregation/sql/generate_citywide_tti_hourly.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
-- FUNCTION: congestion.generate_citywide_tti_hourly(date)

-- DROP FUNCTION congestion.generate_citywide_tti_hourly(date);

CREATE OR REPLACE FUNCTION congestion.generate_citywide_tti_hourly(
_dt date)
RETURNS void
LANGUAGE 'sql'

COST 100
VOLATILE SECURITY DEFINER
AS $BODY$

with speed_links as (
select segment_id,
link_dir,
length AS link_length,
date_trunc('hour', tx) AS datetime_bin,
harmean(mean) AS spd_avg,
COUNT (DISTINCT tx) AS count_hc
from here.ta
inner join congestion.segment_links_v5_19_4 using (link_dir)
LEFT JOIN ref.holiday hol ON hol.dt = tx::date
where hol.dt IS NULL AND date_part('isodow'::text, tx::date) < 6
and (tx >= _dt AND tx < ( $1 + '1 mon'::interval))
GROUP BY segment_id, link_dir, datetime_bin, length)
, hourly_tti as (SELECT segment_id, datetime_bin,
CASE WHEN SUM(link_length) >= 0.8 * b.length
THEN SUM(link_length / spd_avg * 3.6 ) * b.length / SUM(link_length)
ELSE NULL
END AS segment_tt_avg
FROM speed_links
INNER JOIN congestion.segments_v5 b
USING (segment_id)
WHERE link_length / spd_avg IS NOT NULL
GROUP BY segment_id, datetime_bin, b.length
ORDER BY segment_id, datetime_bin)

, monthly as (select segment_id, date_trunc('month', datetime_bin) as month, datetime_bin::time without time zone as time_bin,
avg(segment_tt_avg) as segment_tt_avg from hourly_tti
where segment_tt_avg is not null
group by month, time_bin,segment_id )

, seg_tti as (SELECT segment_id, month, time_bin,
case when highway.segment_id is not null then tti.segment_tt_avg/b.tt_baseline_10pct_corr
else tti.segment_tt_avg/b.tt_baseline_25pct_corr end AS tti
from monthly tti
LEFT JOIN congestion.tt_segments_baseline_v5_2019_af b USING (segment_id)
left join congestion.highway_segments_v5 highway using (segment_id))

INSERT INTO congestion.citywide_tti_hourly(month, time_bin, num_segments, tti)
select month, time_bin, count(seg_tti.segment_id) AS num_segments,
sum(seg_tti.tti * segments_v5.length * sqrt(segment_aadt_final.aadt)) / sum(segments_v5.length * sqrt(segment_aadt_final.aadt)) AS tti
FROM seg_tti
JOIN congestion.segments_v5 USING (segment_id)
JOIN covid.segment_aadt_final USING (segment_id)
WHERE time_bin <@ '[07:00:00, 23:00:00)'::timerange
GROUP BY month, time_bin
ORDER BY month, time_bin

$BODY$;

ALTER FUNCTION congestion.generate_citywide_tti_hourly(date)
OWNER TO natalie;
69 changes: 69 additions & 0 deletions congestion_data_aggregation/sql/generate_corridor_bi_hourly.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
-- FUNCTION: congestion.generate_corridor_bi_hourly(date)

-- DROP FUNCTION congestion.generate_corridor_bi_hourly(date);

CREATE OR REPLACE FUNCTION congestion.generate_corridor_bi_hourly(
_dt date)
RETURNS void
LANGUAGE 'sql'

COST 100
VOLATILE SECURITY DEFINER
AS $BODY$
with speed_links as (
select segment_id, link_dir,
datetime_bin(tx,60) AS datetime_bin,
harmean(mean) AS spd_avg_all,
length AS link_length,
COUNT (DISTINCT tx) AS count_hc
from here.ta
inner join congestion.segment_links_v5_19_4 using (link_dir)
LEFT JOIN ref.holiday hol ON hol.dt = tx::date
where hol.dt IS NULL AND date_part('isodow'::text, tx::date) < 6
and (tx >= _dt AND tx < ( _dt + '1 mon'::interval))
group by segment_id,link_dir,datetime_bin(tx,60), link_length
)
, seg_tt as (
SELECT segment_id, datetime_bin,
CASE WHEN SUM(link_length) >= 0.8 * b.length
THEN SUM(link_length / spd_avg_all * 3.6 ) * b.length / SUM(link_length)
END AS spd_avg_all,
SUM(link_length) / b.length * 100 AS data_pct_hc
FROM speed_links
INNER JOIN congestion.segments_v5 b
USING (segment_id)
WHERE link_length / spd_avg_all IS NOT NULL
GROUP BY segment_id, datetime_bin, b.length
ORDER BY segment_id, datetime_bin)
, seg_bi as (
SELECT a.segment_id,
date_trunc('month', datetime_bin) as month,
datetime_bin::time without time zone AS time_bin,
count(a.datetime_bin) AS num_bins,
avg(a.spd_avg_all) AS avg_tt,
percentile_cont(0.95::double precision) WITHIN GROUP (ORDER BY a.spd_avg_all) as pct_95,
(percentile_cont(0.95::double precision) WITHIN GROUP (ORDER BY a.spd_avg_all) - avg(a.spd_avg_all))/ avg(a.spd_avg_all) AS bi
FROM seg_tt a
WHERE a.spd_avg_all IS NOT NULL
GROUP BY a.segment_id, (a.datetime_bin::time without time zone), date_trunc('month', datetime_bin)
ORDER BY a.segment_id, date_trunc('month', datetime_bin), (a.datetime_bin::time without time zone))

,cor_bi as (
select corridor_id, month, time_bin, sum(bi*seg.length)/cor.length as bi, sum(seg.length) as seg_length, cor.length as cor_length
from seg_bi
join congestion.segments_v5 seg using (segment_id)
join congestion.corridors_v1_merged_lookup using (segment_id)
join congestion.corridors_v1_merged cor using (corridor_id)
group by corridor_id, month, time_bin, cor.length
)
insert into congestion.corridor_bi_hourly
select corridor_id, month, time_bin, case when cor_length*0.8 < seg_length then bi else null end as bi
FROM cor_bi
WHERE time_bin <@ '[07:00:00, 23:00:00)'::timerange
GROUP BY corridor_id, month, time_bin, cor_length, seg_length, bi
ORDER BY corridor_id, month, time_bin

$BODY$;

ALTER FUNCTION congestion.generate_corridor_bi_hourly(date)
OWNER TO natalie;
70 changes: 70 additions & 0 deletions congestion_data_aggregation/sql/generate_corridor_tti_hourly.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
-- FUNCTION: congestion.generate_corridor_tti_hourly(date)

-- DROP FUNCTION congestion.generate_corridor_tti_hourly(date);

CREATE OR REPLACE FUNCTION congestion.generate_corridor_tti_hourly(
_dt date)
RETURNS void
LANGUAGE 'sql'

COST 100
VOLATILE SECURITY DEFINER
AS $BODY$

with speed_links as (
select segment_id,
link_dir,
length AS link_length,
date_trunc('hour', tx) AS datetime_bin,
harmean(mean) AS spd_avg,
COUNT (DISTINCT tx) AS count_hc
from here.ta
inner join congestion.segment_links_v5_19_4 using (link_dir)
LEFT JOIN ref.holiday hol ON hol.dt = tx::date
where hol.dt IS NULL AND date_part('isodow'::text, tx::date) < 6 and (tx >= _dt AND tx < ( $1 + '1 mon'::interval))

GROUP BY segment_id, link_dir, datetime_bin, length)
, hourly_tti as (SELECT segment_id, datetime_bin,
CASE WHEN SUM(link_length) >= 0.8 * b.length
THEN SUM(link_length / spd_avg * 3.6 ) * b.length / SUM(link_length)
ELSE NULL
END AS segment_tt_avg
FROM speed_links
INNER JOIN congestion.segments_v5 b
USING (segment_id)
WHERE link_length / spd_avg IS NOT NULL
GROUP BY segment_id, datetime_bin, b.length
ORDER BY segment_id, datetime_bin)

, monthly as (select segment_id, date_trunc('month', datetime_bin) as month, datetime_bin::time without time zone as time_bin,
avg(segment_tt_avg) as segment_tt_avg from hourly_tti
where segment_tt_avg is not null
group by month, time_bin,segment_id )

, seg_tti as (SELECT segment_id, month, time_bin,
case when highway.segment_id is not null then tti.segment_tt_avg/b.tt_baseline_10pct_corr
else tti.segment_tt_avg/b.tt_baseline_25pct_corr end AS tti
from monthly tti
LEFT JOIN congestion.tt_segments_baseline_v5_2019_af b USING (segment_id)
left join congestion.highway_segments_v5 highway using (segment_id))


, cor_tti as (
select corridor_id, month, time_bin, sum(tti*seg.length)/cor.length as tti, sum(seg.length) as seg_length, cor.length as cor_length
from seg_tti
join congestion.segments_v5 seg using (segment_id)
join congestion.corridors_v1_merged_lookup using (segment_id)
join congestion.corridors_v1_merged cor using (corridor_id)
group by corridor_id, month, time_bin, cor.length
)
insert into congestion.corridor_tti_hourly
select corridor_id, month, time_bin, case when cor_length*0.8 < seg_length then tti else null end as tti
FROM cor_tti
WHERE time_bin <@ '[07:00:00, 23:00:00)'::timerange
GROUP BY corridor_id, month, time_bin, cor_length, seg_length, tti
ORDER BY corridor_id, month, time_bin

$BODY$;

ALTER FUNCTION congestion.generate_corridor_tti_hourly(date)
OWNER TO natalie;

0 comments on commit 48a9c07

Please sign in to comment.