From a52c27105c93983a1887fb31fbcd2f802f36a999 Mon Sep 17 00:00:00 2001 From: billy-the-fish Date: Wed, 17 Sep 2025 10:03:01 +0100 Subject: [PATCH 01/17] chore: more migration. --- .../hyperfunctions/approximate_row_count.mdx | 47 ++++ .../hyperfunctions/candlestick_agg.mdx | 43 ++++ .../hyperfunctions/compact_state_agg.mdx | 38 ++++ .../hyperfunctions/count_min_sketch.mdx | 35 +++ .../hyperfunctions/counter_agg.mdx | 49 ++++ .../hyperfunctions/days_in_month.mdx | 38 ++++ .../timescaledb/hyperfunctions/first.mdx | 57 +++++ .../timescaledb/hyperfunctions/freq_agg.mdx | 40 ++++ .../timescaledb/hyperfunctions/gauge_agg.mdx | 49 ++++ .../hyperfunctions/heartbeat_agg.mdx | 45 ++++ .../timescaledb/hyperfunctions/histogram.mdx | 63 ++++++ .../hyperfunctions/hyperfunctions.mdx | 25 +++ .../hyperfunctions/hyperloglog.mdx | 56 +++++ .../timescaledb/hyperfunctions/last.mdx | 60 +++++ .../timescaledb/hyperfunctions/max_n.mdx | 30 +++ .../timescaledb/hyperfunctions/max_n_by.mdx | 31 +++ .../timescaledb/hyperfunctions/min_n.mdx | 30 +++ .../timescaledb/hyperfunctions/min_n_by.mdx | 31 +++ .../hyperfunctions/month_normalize.mdx | 57 +++++ .../timescaledb/hyperfunctions/state_agg.mdx | 40 ++++ .../hyperfunctions/stats_agg-one-variable.mdx | 41 ++++ .../stats_agg-two-variables.mdx | 35 +++ .../timescaledb/hyperfunctions/tdigest.mdx | 44 ++++ .../hyperfunctions/time_bucket.mdx | 126 +++++++++++ .../hyperfunctions/time_bucket_gapfill.mdx | 31 +++ .../hyperfunctions/time_bucket_ng.mdx | 211 ++++++++++++++++++ .../hyperfunctions/time_weight.mdx | 49 ++++ .../timescaledb/hyperfunctions/uddsketch.mdx | 52 +++++ .../timescaledb/hypertables/create_table.mdx | 16 +- claude.md | 27 ++- docs.json | 83 +++++++ .../configuration-deployment/terraform.mdx | 147 ++++++++++++ 32 files changed, 1724 insertions(+), 2 deletions(-) create mode 100644 api-reference/timescaledb/hyperfunctions/approximate_row_count.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/candlestick_agg.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/compact_state_agg.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/count_min_sketch.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/counter_agg.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/days_in_month.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/first.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/freq_agg.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/gauge_agg.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/heartbeat_agg.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/histogram.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/hyperfunctions.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/hyperloglog.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/last.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/max_n.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/max_n_by.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/min_n.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/min_n_by.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/month_normalize.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/state_agg.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/stats_agg-one-variable.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/stats_agg-two-variables.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/tdigest.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/time_bucket.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/time_bucket_gapfill.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/time_bucket_ng.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/time_weight.mdx create mode 100644 api-reference/timescaledb/hyperfunctions/uddsketch.mdx create mode 100644 integrations/configuration-deployment/terraform.mdx diff --git a/api-reference/timescaledb/hyperfunctions/approximate_row_count.mdx b/api-reference/timescaledb/hyperfunctions/approximate_row_count.mdx new file mode 100644 index 0000000..bd1b4e4 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/approximate_row_count.mdx @@ -0,0 +1,47 @@ +--- +title: approximate_row_count() +description: Estimate the number of rows in a table +topics: [hyperfunctions] +keywords: [count, hyperfunctions] +tags: [approximate, rows] +license: apache +type: function +hyperfunction: + type: one-step aggregate +products: [cloud, mst, self_hosted] +--- + + Since 0.10.0 + +Get approximate row count for hypertable, distributed hypertable, or regular {PG} table based on catalog estimates. +This function supports tables with nested inheritance and declarative partitioning. + +The accuracy of `approximate_row_count` depends on the database having up-to-date statistics about the table or hypertable, which are updated by `VACUUM`, `ANALYZE`, and a few DDL commands. If you have auto-vacuum configured on your table or hypertable, or changes to the table are relatively infrequent, you might not need to explicitly `ANALYZE` your table as shown below. Otherwise, if your table statistics are too out-of-date, running this command updates your statistics and yields more accurate approximation results. + +## Samples + +Get the approximate row count for a single hypertable. + +```sql +ANALYZE conditions; + +SELECT * FROM approximate_row_count('conditions'); +``` + +The expected output: + +``` +approximate_row_count +--------------------- + 240000 +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `relation` | REGCLASS | - | ✔ | Hypertable or regular {PG} table to get row count for. | + +## Returns + +A numeric estimate of the number of rows in the specified table or hypertable. \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/candlestick_agg.mdx b/api-reference/timescaledb/hyperfunctions/candlestick_agg.mdx new file mode 100644 index 0000000..07ba77c --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/candlestick_agg.mdx @@ -0,0 +1,43 @@ +--- +title: candlestick_agg() +description: Aggregate tick data into an intermediate form for further calculation +topics: [hyperfunctions] +tags: [hyperfunctions, finance, candlestick, open, high, low, close] +license: community +type: function +experimental: false +toolkit: true +hyperfunction: + family: financial analysis + type: aggregate + aggregates: + - candlestick_agg() +products: [cloud, mst, self_hosted] +--- + + Since 1.14.0 + +This is the first step for performing financial calculations on raw tick +data. Use `candlestick_agg` to create an intermediate aggregate from your +tick data. This intermediate form can then be used by one or more accessors +in this group to compute final results. + +Optionally, multiple such intermediate aggregate objects can be combined +using [`rollup()`](#rollup) before an accessor is applied. + +If you're starting with pre-aggregated candlestick data rather than raw tick +data, use the companion [`candlestick()`](#candlestick) function instead. +This function transforms the existing aggregated data into the correct form +for use with the candlestick accessors. + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `ts` | TIMESTAMPTZ | - | ✔ | Timestamp associated with stock price | +| `price` | DOUBLE PRECISION | - | ✔ | Stock quote/price at the given time | +| `volume` | DOUBLE PRECISION | - | ✔ | Volume of the trade | + +## Returns + +An object storing `(timestamp, value)` pairs for each of the opening, high, low, and closing prices, in addition to information used to calculate the total volume and Volume Weighted Average Price. \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/compact_state_agg.mdx b/api-reference/timescaledb/hyperfunctions/compact_state_agg.mdx new file mode 100644 index 0000000..a10d5ad --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/compact_state_agg.mdx @@ -0,0 +1,38 @@ +--- +title: compact_state_agg() +description: Aggregate state data into a state aggregate for further analysis +topics: [hyperfunctions] +license: community +type: function +toolkit: true +experimental: true +hyperfunction: + family: state tracking + type: aggregate + aggregates: + - compact_state_agg() +products: [cloud, mst, self_hosted] +--- + + Early access 1.5.0 + +Aggregate a dataset containing state data into a state aggregate to track the time spent in each state. + +## Samples + +Create a state aggregate to track the status of some devices. + +```sql +SELECT toolkit_experimental.compact_state_agg(time, status) FROM devices; +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `ts` | TIMESTAMPTZ | - | ✔ | Timestamps associated with each state reading | +| `value` | TEXT \| BIGINT | - | ✔ | The state at that time | + +## Returns + +An object storing the total time spent in each state \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/count_min_sketch.mdx b/api-reference/timescaledb/hyperfunctions/count_min_sketch.mdx new file mode 100644 index 0000000..329a14d --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/count_min_sketch.mdx @@ -0,0 +1,35 @@ +--- +title: count_min_sketch() +description: Aggregate data into a `CountMinSketch` for approximate counting +topics: [hyperfunctions] +license: community +type: function +toolkit: true +experimental: true +hyperfunction: + family: frequency analysis + type: aggregate + aggregates: + - count_min_sketch() +products: [cloud, mst, self_hosted] +--- + + Early access 1.8.0 + +Aggregate data into a `CountMinSketch` object, which you can use to estimate the number of times a given item appears in a column. +The sketch produces a biased estimator of frequency. +It might overestimate the item count, but it can't underestimate. + +You can control the relative error and the probability that the estimate falls outside the error bounds. + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `values` | TEXT | - | ✔ | The column of values to count | +| `error` | DOUBLE PRECISION | - | ✔ | Error tolerance in estimate, calculated relative to the number of values added to the sketch | +| `probability` | DOUBLE PRECISION | - | ✔ | Probability that an estimate falls outside the error bounds | + +## Returns + +An object storing a table of counters \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/counter_agg.mdx b/api-reference/timescaledb/hyperfunctions/counter_agg.mdx new file mode 100644 index 0000000..d70df05 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/counter_agg.mdx @@ -0,0 +1,49 @@ +--- +title: counter_agg() +description: Aggregate counter data into an intermediate form for further analysis +topics: [hyperfunctions] +license: community +type: function +toolkit: true +experimental: false +hyperfunction: + family: counters and gauges + type: aggregate + aggregates: + - counter_agg() +products: [cloud, mst, self_hosted] +--- + + Since 1.3.0 + +This is the first step for performing any aggregate calculations +on counter data. Use `counter_agg` to create an intermediate aggregate +from your data. This intermediate form can then be used +by one or more accessors in this group to compute final results. Optionally, +you can combine multiple intermediate aggregate objects using +[`rollup()`](#rollup) before an accessor is applied. + +## Samples + +Create a counter aggregate to summarize daily counter data. + +```sql +SELECT + time_bucket('1 day'::interval, ts) as dt, + counter_agg(ts, val) AS cs +FROM foo +WHERE id = 'bar' +GROUP BY time_bucket('1 day'::interval, ts) +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `ts` | TIMESTAMPTZ | - | ✔ | The time at each point | +| `value` | DOUBLE PRECISION | - | ✔ | The value of the counter at each point | +| `bounds` | TSTZRANGE | - | ❌ | The smallest and largest possible times that can be input to this aggregate. Bounds are required for extrapolation, but not for other accessor functions. If you don't specify bounds at aggregate creation time, you can add them later using the [`with_bounds`](#with_bounds) function. | + +## Returns + +The counter aggregate, containing data about the variables in an intermediate form. Pass the aggregate to accessor functions in the counter aggregates API to perform final calculations. Or, pass the aggregate to rollup functions to combine multiple counter aggregates into larger aggregates. \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/days_in_month.mdx b/api-reference/timescaledb/hyperfunctions/days_in_month.mdx new file mode 100644 index 0000000..105667c --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/days_in_month.mdx @@ -0,0 +1,38 @@ +--- +title: days_in_month() +description: Calculates days in month given a timestamptz +topics: [hyperfunctions] +keywords: [hyperfunctions, Toolkit, normalization] +license: community +type: function +toolkit: true +hyperfunction: + type: one-step operation +products: [cloud, mst, self_hosted] +--- + + Since 1.16.0 + +Given a timestamptz, returns how many days are in that month. + +## Samples + +Calculate how many days in the month of January 1, 2022: + +```sql +SELECT days_in_month('2021-01-01 00:00:00+03'::timestamptz) +``` + +The output looks like this: + +```sql +days_in_month +--------------------- +31 +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `date` | TIMESTAMPTZ | - | ✔ | Timestamp to use to calculate how many days in the month | \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/first.mdx b/api-reference/timescaledb/hyperfunctions/first.mdx new file mode 100644 index 0000000..e9286d8 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/first.mdx @@ -0,0 +1,57 @@ +--- +title: first() +description: Get the first value in one column when rows are ordered by another column +topics: [hyperfunctions] +keywords: [hyperfunctions] +license: apache +type: function +hyperfunction: + type: one-step aggregate +products: [cloud, mst, self_hosted] +--- + + Since 0.0.11-beta + +The `first` aggregate allows you to get the value of one column +as ordered by another. For example, `first(temperature, time)` returns the +earliest temperature value based on time within an aggregate group. + + + +The `last` and `first` commands do not use indexes, they perform a sequential +scan through the group. They are primarily used for ordered selection within a +`GROUP BY` aggregate, and not as an alternative to an +`ORDER BY time DESC LIMIT 1` clause to find the latest value, which uses +indexes. + + + +## Samples + +Get the earliest temperature by device_id: + +```sql +SELECT device_id, first(temp, time) +FROM metrics +GROUP BY device_id; +``` + +This example uses first and last with an aggregate filter, and avoids null +values in the output: + +```sql +SELECT + TIME_BUCKET('5 MIN', time_column) AS interv, + AVG(temperature) as avg_temp, + first(temperature,time_column) FILTER(WHERE time_column IS NOT NULL) AS beg_temp, + last(temperature,time_column) FILTER(WHERE time_column IS NOT NULL) AS end_temp +FROM sensors +GROUP BY interv +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `value` | TEXT | - | ✔ | The value to return | +| `time` | TIMESTAMP or INTEGER | - | ✔ | The timestamp to use for comparison | \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/freq_agg.mdx b/api-reference/timescaledb/hyperfunctions/freq_agg.mdx new file mode 100644 index 0000000..b73a08c --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/freq_agg.mdx @@ -0,0 +1,40 @@ +--- +title: freq_agg() +description: Aggregate data into a space-saving aggregate for further frequency analysis +topics: [hyperfunctions] +license: community +type: function +toolkit: true +experimental: true +hyperfunction: + family: frequency analysis + type: aggregate + aggregates: + - freq_agg() +products: [cloud, mst, self_hosted] +--- + + Early access 1.5.0 + +Aggregate data into a space-saving aggregate object, which stores frequency information in an intermediate form. +You can then use any of the accessors in this group to return estimated frequencies or the most common elements. + +## Samples + +Create a space-saving aggregate over a field `ZIP` in a `HomeSales` table. +This aggregate tracks any `ZIP` value that occurs in at least 5% of rows. + +```sql +SELECT toolkit_experimental.freq_agg(0.05, ZIP) FROM HomeSales; +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `min_freq` | DOUBLE PRECISION | - | ✔ | Frequency cutoff for keeping track of a value. Values that occur less frequently than the cutoff are not stored. | +| `value` | AnyElement | - | ✔ | The column to store frequencies for | + +## Returns + +An object storing the most common elements of the given table and their estimated frequency. You can pass this object to any of the accessor functions to get a final result. \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/gauge_agg.mdx b/api-reference/timescaledb/hyperfunctions/gauge_agg.mdx new file mode 100644 index 0000000..2153dd4 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/gauge_agg.mdx @@ -0,0 +1,49 @@ +--- +title: gauge_agg() +description: Aggregate gauge data into an intermediate form for further analysis +topics: [hyperfunctions] +license: community +type: function +toolkit: true +experimental: true +hyperfunction: + family: counters and gauges + type: aggregate + aggregates: + - gauge_agg() +products: [cloud, mst, self_hosted] +--- + + Early access 1.6.0 + +This is the first step for performing any aggregate calculations +on gauge data. Use `gauge_agg` to create an intermediate aggregate +from your data. This intermediate form can then be used +by one or more accessors in this group to compute final results. Optionally, +you can combine multiple intermediate aggregate objects with +[`rollup()`](#rollup) before an accessor is applied. + +## Samples + +Create a gauge aggregate to summarize daily gauge data. + +```sql +SELECT + time_bucket('1 day'::interval, ts) as dt, + gauge_agg(ts, val) AS cs +FROM foo +WHERE id = 'bar' +GROUP BY time_bucket('1 day'::interval, ts) +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `ts` | TIMESTAMPTZ | - | ✔ | The time at each point | +| `value` | DOUBLE PRECISION | - | ✔ | The value of the gauge at each point | +| `bounds` | TSTZRANGE | - | ❌ | The smallest and largest possible times that can be input to this aggregate. Bounds are required for extrapolation, but not for other accessor functions. If you don't specify bounds at aggregate creation time, you can add them later using the [`with_bounds`](#with_bounds) function. | + +## Returns + +The gauge aggregate, containing data about the variables in an intermediate form. Pass the aggregate to accessor functions in the gauge aggregates API to perform final calculations. Or, pass the aggregate to rollup functions to combine multiple gauge aggregates into larger aggregates. \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/heartbeat_agg.mdx b/api-reference/timescaledb/hyperfunctions/heartbeat_agg.mdx new file mode 100644 index 0000000..82019f5 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/heartbeat_agg.mdx @@ -0,0 +1,45 @@ +--- +title: heartbeat_agg() +description: Create a liveness aggregate from a set of heartbeats +topics: [hyperfunctions] +license: community +type: function +toolkit: true +hyperfunction: + family: state tracking + type: aggregate + aggregates: + - heartbeat_agg() +products: [cloud, mst, self_hosted] +--- + + Since 1.15.0 + +This takes a set of heartbeat timestamps and aggregates the liveness state of +the underlying system for the specified time range. + +## Samples + +Given a table called `system_health` with a `ping_time` column, construct an aggregate of system liveness for 10 days starting from Jan 1, 2022. This assumes a system is unhealthy if it hasn't been heard from in a 5 minute window. + +```sql +SELECT heartbeat_agg( + ping_time, + '01-01-2022 UTC', + '10 days', + '5 min') +FROM system_health; +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `heartbeat` | TIMESTAMPTZ | - | ✔ | The column containing the timestamps of the heartbeats. | +| `agg_start` | TIMESTAMPTZ | - | ✔ | The start of the time range over which this aggregate is tracking liveness. | +| `agg_duration` | INTERVAL | - | ✔ | The length of the time range over which this aggregate is tracking liveness. Any point in this range that doesn't closely follow a heartbeat is considered to be dead. | +| `heartbeat_liveness` | INTERVAL | - | ✔ | How long the system is considered to be live after each heartbeat. | + +## Returns + +The liveness data for the heartbeated system over the provided interval. \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/histogram.mdx b/api-reference/timescaledb/hyperfunctions/histogram.mdx new file mode 100644 index 0000000..57dfa22 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/histogram.mdx @@ -0,0 +1,63 @@ +--- +title: histogram() +description: Partition the dataset into buckets and get the number of counts in each bucket +topics: [hyperfunctions] +keywords: [histogram, hyperfunctions] +license: apache +type: function +hyperfunction: + type: one-step aggregate +products: [cloud, mst, self_hosted] +--- + + Since 0.5.0 + +The `histogram()` function represents the distribution of a set of +values as an array of equal-width buckets. It partitions the dataset +into a specified number of buckets (`nbuckets`) ranging from the +inputted `min` and `max` values. + +The return value is an array containing `nbuckets`+2 buckets, with the +middle `nbuckets` bins for values in the stated range, the first +bucket at the head of the array for values under the lower `min` bound, +and the last bucket for values greater than or equal to the `max` bound. +Each bucket is inclusive on its lower bound, and exclusive on its upper +bound. Therefore, values equal to the `min` are included in the bucket +starting with `min`, but values equal to the `max` are in the last bucket. + +## Samples + +A simple bucketing of device's battery levels from the `readings` dataset: + +```sql +SELECT device_id, histogram(battery_level, 20, 60, 5) +FROM readings +GROUP BY device_id +LIMIT 10; +``` + +The expected output: + +```sql + device_id | histogram +------------+------------------------------ + demo000000 | {0,0,0,7,215,206,572} + demo000001 | {0,12,173,112,99,145,459} + demo000002 | {0,0,187,167,68,229,349} + demo000003 | {197,209,127,221,106,112,28} + demo000004 | {0,0,0,0,0,39,961} + demo000005 | {12,225,171,122,233,80,157} + demo000006 | {0,78,176,170,8,40,528} + demo000007 | {0,0,0,126,239,245,390} + demo000008 | {0,0,311,345,116,228,0} + demo000009 | {295,92,105,50,8,8,442} +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `value` | ANY VALUE | - | ✔ | A set of values to partition into a histogram | +| `min` | NUMERIC | - | ✔ | The histogram's lower bound used in bucketing (inclusive) | +| `max` | NUMERIC | - | ✔ | The histogram's upper bound used in bucketing (exclusive) | +| `nbuckets` | INTEGER | - | ✔ | The integer value for the number of histogram buckets (partitions) | \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/hyperfunctions.mdx b/api-reference/timescaledb/hyperfunctions/hyperfunctions.mdx new file mode 100644 index 0000000..8620661 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/hyperfunctions.mdx @@ -0,0 +1,25 @@ +--- +title: Hyperfunctions +description: The full list of hyperfunctions available in TimescaleDB and TimescaleDB Toolkit, with required arguments, returns, and complete use examples +keywords: [hyperfunctions, Toolkit] +products: [cloud, mst, self_hosted] +--- + +Hyperfunctions in {TIMESCALE_DB} are a specialized set of functions that allow you to +analyze time-series data. You can use hyperfunctions to analyze anything you +have stored as time-series data, including IoT devices, IT systems, marketing +analytics, user behavior, financial metrics, and cryptocurrency. + +Some hyperfunctions are included by default in {TIMESCALE_DB}. For +additional hyperfunctions, you need to install the +[{TOOLKIT_LONG}][install-toolkit] {PG} extension. + +For more information, see the [hyperfunctions +documentation][hyperfunctions-howto]. + + + +[hyperfunctions-howto]: /manage-data/timescaledb/data-management/hyperfunctions/index +[install-toolkit]: /self-host/timescaledb/install-and-update/install-toolkit \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/hyperloglog.mdx b/api-reference/timescaledb/hyperfunctions/hyperloglog.mdx new file mode 100644 index 0000000..29dcf67 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/hyperloglog.mdx @@ -0,0 +1,56 @@ +--- +title: hyperloglog() +description: Aggregate data into a hyperloglog for approximate counting +topics: [hyperfunctions] +license: community +type: function +toolkit: true +hyperfunction: + family: approximate count distinct + type: aggregate + aggregates: + - hyperloglog() +products: [cloud, mst, self_hosted] +--- + + Since 1.3.0 + +This is the first step for estimating the approximate number of distinct +values using the `hyperloglog` algorithm. Use `hyperloglog` to create an +intermediate aggregate from your raw data. This intermediate form can then +be used by one or more accessors in this group to compute final results. + +Optionally, multiple such intermediate aggregate objects can be combined +using [`rollup()`](#rollup) before an accessor is applied. + +If you're not sure what value to set for `buckets`, try using the alternate +aggregate function, [`approx_count_distinct()`](#approx_count_distinct). +`approx_count_distinct` also creates a `hyperloglog`, but it sets a +default bucket value that should work for many use cases. + +## Samples + +Given a table called `samples`, with a column called `weights`, return +a `hyperloglog` over the `weights` column. + +```sql +SELECT hyperloglog(32768, weights) FROM samples; +``` + +Using the same data, build a view from the aggregate that you can pass +to other `hyperloglog` functions. + +```sql +CREATE VIEW hll AS SELECT hyperloglog(32768, data) FROM samples; +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `buckets` | INTEGER | - | ✔ | Number of buckets in the hyperloglog. Increasing the number of buckets improves accuracy but increases memory use. Value is rounded up to the next power of 2, and must be between 2^4 (16) and 2^18. Setting a value less than 2^10 (1,024) may result in poor accuracy if the true cardinality is high and is not recommended. If unsure, start experimenting with 8,192 (2^13) which has an approximate error rate of 1.15%. | +| `value` | AnyElement | - | ✔ | The column containing the elements to count. The type must have an extended, 64-bit, hash function. | + +## Returns + +A `hyperloglog` object which can be passed to other hyperloglog APIs for rollups and final calculation \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/last.mdx b/api-reference/timescaledb/hyperfunctions/last.mdx new file mode 100644 index 0000000..f9c6ca0 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/last.mdx @@ -0,0 +1,60 @@ +--- +title: last() +description: Get the last value in one column when rows are ordered by another column +topics: [hyperfunctions] +keywords: [hyperfunctions] +license: apache +type: function +hyperfunction: + type: one-step aggregate +products: [cloud, mst, self_hosted] +--- + + Since 0.0.11-beta + +The `last` aggregate allows you to get the value of one column +as ordered by another. For example, `last(temperature, time)` returns the +latest temperature value based on time within an aggregate group. + + + +The `last` and `first` commands do not use indexes, they perform a sequential +scan through the group. They are primarily used for ordered selection within a +`GROUP BY` aggregate, and not as an alternative to an +`ORDER BY time DESC LIMIT 1` clause to find the latest value, which uses +indexes. + + + +## Samples + +Get the temperature every 5 minutes for each device over the past day: + +```sql +SELECT device_id, time_bucket('5 minutes', time) AS interval, + last(temp, time) +FROM metrics +WHERE time > now () - INTERVAL '1 day' +GROUP BY device_id, interval +ORDER BY interval DESC; +``` + +This example uses first and last with an aggregate filter, and avoids null +values in the output: + +```sql +SELECT + TIME_BUCKET('5 MIN', time_column) AS interv, + AVG(temperature) as avg_temp, + first(temperature,time_column) FILTER(WHERE time_column IS NOT NULL) AS beg_temp, + last(temperature,time_column) FILTER(WHERE time_column IS NOT NULL) AS end_temp +FROM sensors +GROUP BY interv +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `value` | ANY ELEMENT | - | ✔ | The value to return | +| `time` | TIMESTAMP or INTEGER | - | ✔ | The timestamp to use for comparison | \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/max_n.mdx b/api-reference/timescaledb/hyperfunctions/max_n.mdx new file mode 100644 index 0000000..1071bd3 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/max_n.mdx @@ -0,0 +1,30 @@ +--- +title: max_n() +description: Find the largest values in a set of data +topics: [hyperfunctions] +tags: [hyperfunctions, toolkit, maximum] +license: community +type: function +toolkit: true +hyperfunction: + family: minimum and maximum + type: aggregate + aggregates: + - max_n() +products: [cloud, mst, self_hosted] +--- + + Since 1.16.0 + +Construct an aggregate which will keep track of the largest values passed through it. + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `value` | BIGINT \| DOUBLE PRECISION \| TIMESTAMPTZ | - | ✔ | The values passed into the aggregate | +| `capacity` | BIGINT | - | ✔ | The number of values to retain. | + +## Returns + +The compiled aggregate. Note that the exact type will be `MaxInts`, `MaxFloats`, or `MaxTimes` depending on the input type \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/max_n_by.mdx b/api-reference/timescaledb/hyperfunctions/max_n_by.mdx new file mode 100644 index 0000000..00fdfe1 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/max_n_by.mdx @@ -0,0 +1,31 @@ +--- +title: max_n_by() +description: Track the largest values and associated data in a set of values +topics: [hyperfunctions] +tags: [hyperfunctions, toolkit, maximum] +license: community +type: function +toolkit: true +hyperfunction: + family: minimum and maximum + type: aggregate + aggregates: + - max_n_by() +products: [cloud, mst, self_hosted] +--- + + Since 1.16.0 + +Construct an aggregate that keeps track of the largest values passed through it, as well as some associated data which is passed alongside the value. + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `value` | BIGINT \| DOUBLE PRECISION \| TIMESTAMPTZ | - | ✔ | The values passed into the aggregate | +| `data` | ANYELEMENT | - | ✔ | The data associated with a particular value | +| `capacity` | BIGINT | - | ✔ | The number of values to retain. | + +## Returns + +The compiled aggregate. Note that the exact type will be MaxByInts, MaxByFloats, or MaxByTimes depending on the input type \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/min_n.mdx b/api-reference/timescaledb/hyperfunctions/min_n.mdx new file mode 100644 index 0000000..22955d2 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/min_n.mdx @@ -0,0 +1,30 @@ +--- +title: min_n() +description: Find the smallest values in a set of data +topics: [hyperfunctions] +tags: [hyperfunctions, toolkit, minimum] +license: community +type: function +toolkit: true +hyperfunction: + family: minimum and maximum + type: aggregate + aggregates: + - min_n() +products: [cloud, mst, self_hosted] +--- + + Since 1.16.0 + +Construct an aggregate that keeps track of the smallest values passed through it. + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `value` | BIGINT \| DOUBLE PRECISION \| TIMESTAMPTZ | - | ✔ | The values passed into the aggregate | +| `capacity` | BIGINT | - | ✔ | The number of values to retain. | + +## Returns + +The compiled aggregate. Note that the exact type is `MinInts`, `MinFloats`, or `MinTimes` depending on the input type \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/min_n_by.mdx b/api-reference/timescaledb/hyperfunctions/min_n_by.mdx new file mode 100644 index 0000000..4797c26 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/min_n_by.mdx @@ -0,0 +1,31 @@ +--- +title: min_n_by() +description: Track the smallest values and associated data in a set of values +topics: [hyperfunctions] +tags: [hyperfunctions, toolkit, minimum] +license: community +type: function +toolkit: true +hyperfunction: + family: minimum and maximum + type: aggregate + aggregates: + - min_n_by() +products: [cloud, mst, self_hosted] +--- + + Since 1.16.0 + +Construct an aggregate that keeps track of the smallest values passed through it, as well as some associated data which is passed alongside the value. + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `value` | BIGINT \| DOUBLE PRECISION \| TIMESTAMPTZ | - | ✔ | The values passed into the aggregate | +| `data` | ANYELEMENT | - | ✔ | The data associated with a particular value | +| `capacity` | BIGINT | - | ✔ | The number of values to retain. | + +## Returns + +The compiled aggregate. Note that the exact type is `MinByInts`, `MinByFloats`, or `MinByTimes` depending on the input type \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/month_normalize.mdx b/api-reference/timescaledb/hyperfunctions/month_normalize.mdx new file mode 100644 index 0000000..92f59e6 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/month_normalize.mdx @@ -0,0 +1,57 @@ +--- +title: month_normalize() +description: Normalize a monthly metric based on number of days in month +topics: [hyperfunctions] +keywords: [hyperfunctions, Toolkit, normalization] +license: community +type: function +toolkit: true +hyperfunction: + type: one-step operation +products: [cloud, mst, self_hosted] +--- + + Since 1.16.0 + +Translate a metric to a standard month. A standard month is calculated as the exact number of days in a year divided by the number of months in a year, so 365.25/12 = 30.4375. `month_normalize()` divides a metric by the number of days in the corresponding calendar month and multiplies it by 30.4375. + +This enables you to compare metrics for different months and decide which one performed better, objectively. For example, in the following table that summarizes the number of sales for three months, January has the highest number of total sales: + +| Month | Sales | +|-------|-------| +| Jan | 3000 | +| Feb | 2900 | +| Mar | 2900 | + +When you normalize the sales metrics, you get the following result, showing that February in fact performed better: + +| Month | Normalized sales | +|-------|-------------------| +| Jan | 2945.56 | +| Feb | 3152.46 | +| Mar | 2847.38 | + +## Samples + +Get the normalized value for a metric of 1000, and a reference date of January +1, 2021: + +```sql +SELECT month_normalize(1000,'2021-01-01 00:00:00+03'::timestamptz) +``` + +The output looks like this: + +```sql +month_normalize +--------------------- +981.8548387096774 +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `metric` | float8 | - | ✔ | The metric value to normalize | +| `reference_date` | TIMESTAMPTZ | - | ✔ | Timestamp to normalize the metric with | +| `days` | float8 | 365.25/12 | ❌ | Number of days to use for normalization | \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/state_agg.mdx b/api-reference/timescaledb/hyperfunctions/state_agg.mdx new file mode 100644 index 0000000..20a674d --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/state_agg.mdx @@ -0,0 +1,40 @@ +--- +title: state_agg() +description: Aggregate state data into a state aggregate for further analysis +topics: [hyperfunctions] +license: community +type: function +toolkit: true +hyperfunction: + family: state tracking + type: aggregate + aggregates: + - state_agg() +products: [cloud, mst, self_hosted] +--- + + Since 1.15.0 + +Aggregate state data into a state aggregate to track state transitions. +Unlike [`compact_state_agg`](/api-reference/timescaledb/hyperfunctions/compact_state_agg), +which only stores durations, `state_agg` also stores the timestamps of +state transitions. + +## Samples + +Create a state aggregate to track the status of some devices. + +```sql +SELECT state_agg(time, status) FROM devices; +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `ts` | TIMESTAMPTZ | - | ✔ | Timestamps associated with each state reading | +| `value` | TEXT \| BIGINT | - | ✔ | The state at that time | + +## Returns + +An object storing the periods spent in each state, including timestamps of state transitions \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/stats_agg-one-variable.mdx b/api-reference/timescaledb/hyperfunctions/stats_agg-one-variable.mdx new file mode 100644 index 0000000..ec94fb4 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/stats_agg-one-variable.mdx @@ -0,0 +1,41 @@ +--- +title: stats_agg() (one variable) +description: Aggregate data into an intermediate statistical aggregate form for further calculation +topics: [hyperfunctions] +keywords: [statistics, hyperfunctions, Toolkit] +license: community +type: function +toolkit: true +hyperfunction: + family: statistical and regression analysis + type: aggregate + aggregates: + - stats_agg() (one variable) +products: [cloud, mst, self_hosted] +--- + + Since 1.3.0 + +This is the first step for performing any statistical aggregate calculations +on one-dimensional data. Use `stats_agg` to create an intermediate aggregate +(`StatsSummary1D`) from your data. This intermediate form can then be used +by one or more accessors in this group to compute final results. Optionally, +multiple such intermediate aggregate objects can be combined using +[`rollup()`](#rollup) or [`rolling()`](#rolling) before an accessor is +applied. + +`stats_agg` is well suited for creating a continuous aggregate that can +serve multiple purposes later. For example, you can create a continuous +aggregate using `stats_agg` to calculate average and sum. Later, you can +reuse the same `StatsSummary1D` objects to calculate standard deviation from +the same continuous aggregate. + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `value` | DOUBLE PRECISION | - | ✔ | The variable to use for the statistical aggregate. | + +## Returns + +The statistical aggregate, containing data about the variables in an intermediate form. Pass the aggregate to accessor functions in the statistical aggregates API to perform final calculations. Or, pass the aggregate to rollup functions to combine multiple statistical aggregates into larger aggregates. \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/stats_agg-two-variables.mdx b/api-reference/timescaledb/hyperfunctions/stats_agg-two-variables.mdx new file mode 100644 index 0000000..9760032 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/stats_agg-two-variables.mdx @@ -0,0 +1,35 @@ +--- +title: stats_agg() (two variables) +description: Aggregate data into an intermediate statistical aggregate form for further calculation +topics: [hyperfunctions] +keywords: [statistics, hyperfunctions, Toolkit] +license: community +type: function +toolkit: true +hyperfunction: + family: statistical and regression analysis + type: aggregate + aggregates: + - stats_agg() (two variables) +products: [cloud, mst, self_hosted] +--- + + Since 1.3.0 + +This is the first step for performing any statistical aggregate calculations +on two-dimensional data. Use `stats_agg` to create an intermediate aggregate +(`StatsSummary2D`) from your data. This intermediate form can then be used +by one or more accessors in this group to compute the final results. +Optionally, multiple such intermediate aggregate objects can be combined +using [`rollup()`](#rollup) or [`rolling()`](#rolling) before an accessor is +applied. + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `y, x` | DOUBLE PRECISION | - | ✔ | The variables to use for the statistical aggregate. | + +## Returns + +The statistical aggregate, containing data about the variables in an intermediate form. Pass the aggregate to accessor functions in the statistical aggregates API to perform final calculations. Or, pass the aggregate to rollup functions to combine multiple statistical aggregates into larger aggregates. \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/tdigest.mdx b/api-reference/timescaledb/hyperfunctions/tdigest.mdx new file mode 100644 index 0000000..7215e78 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/tdigest.mdx @@ -0,0 +1,44 @@ +--- +title: tdigest() +description: Aggregate data in a `tdigest` for further calculation of percentile estimates +topics: [hyperfunctions] +license: community +type: function +toolkit: true +hyperfunction: + family: percentile approximation + type: aggregate + aggregates: + - tdigest() +products: [cloud, mst, self_hosted] +--- + + Since 1.0.0 + +This is the first step for calculating approximate percentiles with the +`tdigest` algorithm. Use `tdigest` to create an intermediate aggregate from +your raw data. This intermediate form can then be used by one or more +accessors in this group to compute final results. + +Optionally, multiple such intermediate aggregate objects can be combined +using [`rollup()`](#rollup) before an accessor is applied. + +## Samples + +Given a table called `samples`, with a column called `data`, build a +`tdigest` using the `data` column. Use 100 buckets for the approximation. + +```sql +SELECT tdigest(100, data) FROM samples; +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `buckets` | INTEGER | - | ✔ | Number of buckets in the digest. Increasing this provides more accurate quantile estimates, but requires more memory. | +| `value` | DOUBLE PRECISION | - | ✔ | Column of values to aggregate for the `tdigest` object. | + +## Returns + +A percentile estimator object created to calculate percentiles using the `tdigest` algorithm \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/time_bucket.mdx b/api-reference/timescaledb/hyperfunctions/time_bucket.mdx new file mode 100644 index 0000000..2a95291 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/time_bucket.mdx @@ -0,0 +1,126 @@ +--- +title: time_bucket() +description: Bucket rows by time interval to calculate aggregates +topics: [hyperfunctions] +keywords: [aggregate, hyperfunctions] +tags: [time buckets, date_trunc, date_bin] +license: apache +type: function +hyperfunction: + type: bucket +products: [cloud, mst, self_hosted] +--- + + Since 0.0.10-beta + +The `time_bucket` function is similar to the standard {PG} `date_bin` +function. Unlike `date_bin`, it allows for arbitrary time intervals of months or +longer. The return value is the bucket's start time. + +Buckets are aligned to start at midnight in UTC+0. The time bucket size (`bucket_width`) can be set as INTERVAL or INTEGER. For INTERVAL-type `bucket_width`, you can change the time zone with the optional `timezone` parameter. In this case, the buckets are realigned to start at midnight in the time zone you specify. + +Note that during shifts to and from daylight savings, the amount of data +aggregated into the corresponding buckets can be irregular. For example, if the +`bucket_width` is 2 hours, the number of bucketed hours is either three hours or one hour. + +## Samples + +Simple five-minute averaging: + +```sql +SELECT time_bucket('5 minutes', time) AS five_min, avg(cpu) +FROM metrics +GROUP BY five_min +ORDER BY five_min DESC LIMIT 10; +``` + +To report the middle of the bucket, instead of the left edge: + +```sql +SELECT time_bucket('5 minutes', time) + '2.5 minutes' + AS five_min, avg(cpu) +FROM metrics +GROUP BY five_min +ORDER BY five_min DESC LIMIT 10; +``` + +For rounding, move the alignment so that the middle of the bucket is at the +five-minute mark, and report the middle of the bucket: + +```sql +SELECT time_bucket('5 minutes', time, '-2.5 minutes'::INTERVAL) + '2.5 minutes' + AS five_min, avg(cpu) +FROM metrics +GROUP BY five_min +ORDER BY five_min DESC LIMIT 10; +``` + +In this example, add the explicit cast to ensure that {PG} chooses the +correct function. + +To shift the alignment of the buckets, you can use the origin parameter passed as +a timestamp, timestamptz, or date type. This example shifts the start of the +week to a Sunday, instead of the default of Monday: + +```sql +SELECT time_bucket('1 week', timetz, TIMESTAMPTZ '2017-12-31') + AS one_week, avg(cpu) +FROM metrics +GROUP BY one_week +WHERE time > TIMESTAMPTZ '2017-12-01' AND time < TIMESTAMPTZ '2018-01-03' +ORDER BY one_week DESC LIMIT 10; +``` + +The value of the origin parameter in this example is `2017-12-31`, a Sunday +within the period being analyzed. However, the origin provided to the function +can be before, during, or after the data being analyzed. All buckets are +calculated relative to this origin. So, in this example, any Sunday could have +been used. Note that because `time < TIMESTAMPTZ '2018-01-03'` is used in this +example, the last bucket would have only 4 days of data. This cast to TIMESTAMP +converts the time to local time according to the server's time zone setting. + +```sql +SELECT time_bucket(INTERVAL '2 hours', timetz::TIMESTAMP) + AS five_min, avg(cpu) +FROM metrics +GROUP BY five_min +ORDER BY five_min DESC LIMIT 10; +``` + +Bucket temperature values to calculate the average monthly temperature. Set the +time zone to 'Europe/Berlin' so bucket start and end times are aligned to +midnight in Berlin. + +```sql +SELECT time_bucket('1 month', ts, 'Europe/Berlin') AS month_bucket, + avg(temperature) AS avg_temp +FROM weather +GROUP BY month_bucket +ORDER BY month_bucket DESC LIMIT 10; +``` + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `bucket_width` | INTERVAL | - | ✔ | A {PG} time interval for how long each bucket is | +| `ts` | DATE, TIMESTAMP, or TIMESTAMPTZ | - | ✔ | The timestamp to bucket | +| `timezone` | TEXT | UTC+0 | ❌ | The time zone for calculating bucket start and end times. Can only be used with `TIMESTAMPTZ`. | +| `origin` | DATE, TIMESTAMP, or TIMESTAMPTZ | midnight on January 3, 2000 (for buckets < month) or midnight on January 1, 2000 (for month/year/century buckets) | ❌ | Buckets are aligned relative to this timestamp | +| `offset` | INTERVAL | - | ❌ | The time interval to offset all time buckets by. A positive value shifts bucket start and end times later. A negative value shifts bucket start and end times earlier. `offset` must be surrounded with double quotes when used as a named argument, because it is a reserved key word in {PG}. | + +**For integer time inputs:** + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `bucket_width` | INTEGER | - | ✔ | The bucket width | +| `ts` | INTEGER | - | ✔ | The timestamp to bucket | +| `offset` | INTEGER | - | ❌ | The amount to offset all buckets by. A positive value shifts bucket start and end times later. A negative value shifts bucket start and end times earlier. `offset` must be surrounded with double quotes when used as a named argument, because it is a reserved key word in {PG}. | + + + +If you use months as an interval for `bucket_width`, you cannot combine it with +a non-month component. For example, `1 month` and `3 months` are both valid +bucket widths, but `1 month 1 day` and `3 months 2 weeks` are not. + + \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/time_bucket_gapfill.mdx b/api-reference/timescaledb/hyperfunctions/time_bucket_gapfill.mdx new file mode 100644 index 0000000..ebad0f8 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/time_bucket_gapfill.mdx @@ -0,0 +1,31 @@ +--- +title: time_bucket_gapfill() +description: Bucket rows by time interval while filling gaps in data +topics: [hyperfunctions] +license: community +type: function +hyperfunction: + family: gapfilling + type: bucket + bucket_function: time_bucket_gapfill() +products: [cloud, mst, self_hosted] +--- + + Since 1.2.0 + +Group data into buckets based on time interval, while filling in gaps of missing data. +If you don't provide a gapfilling algorithm, such as `locf` or `interpolate`, gaps are left as `NULL` in the returned data. + +## Arguments + +| Name | Type | Default | Required | Description | +|--|--|--|--|--| +| `bucket_width` | INTERVAL \| INTEGER | - | ✔ | A Postgres time interval to specify the length of each bucket. For example, use `1 day` to get daily buckets. Use `INTEGER` only if your time column is integer-based. | +| `time` | TIMESTAMPTZ \| INTEGER | - | ✔ | The timestamp on which to base the bucket | +| `timezone` | TEXT | - | ❌ | The timezone to use for bucketing. For example, `Europe/Berlin`. Available in TimescaleDB 2.9 or later. Does not work for integer-based time. If you have an untyped `start` or `finish` argument and a `timezone` argument, you might run into a problem where you are not passing your arguments for the parameter that you expect. To solve this, either name your arguments or explicitly type cast them. | +| `start` | TIMESTAMPTZ \| INTEGER | - | ❌ | The start of the period to gapfill. Values before `start` are passed through, but no gapfilling is performed. Use `INTEGER` only if your time column is integer-based. Best practice is to use the `WHERE` clause. Specifying `start` is legacy. The `WHERE` is more performant, because the query planner can filter out chunks by constraint exclusion. | +| `finish` | TIMESTAMPTZ \| INTEGER | - | ❌ | The end of the period to gapfill. Values after `finish` are passed through, but no gapfilling is performed. Use `INTEGER` only if your time column is integer-based. Best practice is to use the `WHERE` clause. Specifying `finish` is legacy. The `WHERE` is more performant, because the query planner can filter out chunks by constraint exclusion. | + +## Returns + +The start time of the time bucket. \ No newline at end of file diff --git a/api-reference/timescaledb/hyperfunctions/time_bucket_ng.mdx b/api-reference/timescaledb/hyperfunctions/time_bucket_ng.mdx new file mode 100644 index 0000000..b79ae81 --- /dev/null +++ b/api-reference/timescaledb/hyperfunctions/time_bucket_ng.mdx @@ -0,0 +1,211 @@ +--- +title: time_bucket_ng() +description: Bucket rows by time interval with support for time zones, months, and years +topics: [hyperfunctions] +keywords: [aggregate, hyperfunctions] +tags: [time buckets] +license: apache +type: function +experimental: true +deprecated: true +hyperfunction: + type: bucket +products: [cloud, mst, self_hosted] +--- + + Deprecated + +The `time_bucket_ng()` function is an experimental version of the +[`time_bucket()`][time_bucket] function. It introduced some new capabilities, +such as monthly buckets and timezone support. Those features are now part of the +regular `time_bucket()` function. + + + +The `time_bucket()` and `time_bucket_ng()` functions are similar, but not +completely compatible. There are two main differences. + +Firstly, `time_bucket_ng()` doesn't work with timestamps prior to `origin`, +while `time_bucket()` does. + +Secondly, the default `origin` values differ. `time_bucket()` uses an origin +date of January 3, 2000, for buckets shorter than a month. `time_bucket_ng()` +uses an origin date of January 1, 2000, for all bucket sizes. + + + +## Samples + +In this example, `time_bucket_ng()` is used to create bucket data in three month +intervals: + +```sql +SELECT timescaledb_experimental.time_bucket_ng('3 month', date '2021-08-01'); + time_bucket_ng +---------------- + 2021-07-01 +(1 row) +``` + +This example uses `time_bucket_ng()` to bucket data in one year intervals: + +```sql +SELECT timescaledb_experimental.time_bucket_ng('1 year', date '2021-08-01'); + time_bucket_ng +---------------- + 2021-01-01 +(1 row) +``` + +To split time into buckets, `time_bucket_ng()` uses a starting point in time +called `origin`. The default origin is `2000-01-01`. `time_bucket_ng` cannot use +timestamps earlier than `origin`: + +```sql +SELECT timescaledb_experimental.time_bucket_ng('100 years', timestamp '1988-05-08'); +ERROR: origin must be before the given date +``` + +Going back in time from `origin` isn't usually possible, especially when you +consider timezones and daylight savings time (DST). Note also that there is no +reasonable way to split time in variable-sized buckets (such as months) from an +arbitrary `origin`, so `origin` defaults to the first day of the month. + +To bypass named limitations, you can override the default `origin`: + +```sql +-- working with timestamps before 2000-01-01 +SELECT timescaledb_experimental.time_bucket_ng('100 years', timestamp '1988-05-08', origin => '1900-01-01'); + time_bucket_ng +--------------------- + 1900-01-01 00:00:00 + +-- unlike the default origin, which is Saturday, 2000-01-03 is Monday +SELECT timescaledb_experimental.time_bucket_ng('1 week', timestamp '2021-08-26', origin => '2000-01-03'); + time_bucket_ng +--------------------- + 2021-08-23 00:00:00 +``` + +This example shows how `time_bucket_ng()` is used to bucket data +by months in a specified timezone: + +```sql +-- note that timestamptz is displayed differently depending on the session parameters +SET TIME ZONE 'Europe/Moscow'; +SET + +SELECT timescaledb_experimental.time_bucket_ng('1 month', timestamptz '2001-02-03 12:34:56 MSK', timezone => 'Europe/Moscow'); + time_bucket_ng +------------------------ + 2001-02-01 00:00:00+03 +``` + +You can use `time_bucket_ng()` with continuous aggregates. This example tracks +the temperature in Moscow over seven day intervals: + +```sql +CREATE TABLE conditions( + day DATE NOT NULL, + city text NOT NULL, + temperature INT NOT NULL); + +SELECT create_hypertable( + 'conditions', by_range('day', INTERVAL '1 day') +); + +INSERT INTO conditions (day, city, temperature) VALUES + ('2021-06-14', 'Moscow', 26), + ('2021-06-15', 'Moscow', 22), + ('2021-06-16', 'Moscow', 24), + ('2021-06-17', 'Moscow', 24), + ('2021-06-18', 'Moscow', 27), + ('2021-06-19', 'Moscow', 28), + ('2021-06-20', 'Moscow', 30), + ('2021-06-21', 'Moscow', 31), + ('2021-06-22', 'Moscow', 34), + ('2021-06-23', 'Moscow', 34), + ('2021-06-24', 'Moscow', 34), + ('2021-06-25', 'Moscow', 32), + ('2021-06-26', 'Moscow', 32), + ('2021-06-27', 'Moscow', 31); + +CREATE MATERIALIZED VIEW conditions_summary_weekly +WITH (timescaledb.continuous) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('7 days', day) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket; + +SELECT to_char(bucket, 'YYYY-MM-DD'), city, min, max +FROM conditions_summary_weekly +ORDER BY bucket; + + to_char | city | min | max +------------+--------+-----+----- + 2021-06-12 | Moscow | 22 | 27 + 2021-06-19 | Moscow | 28 | 34 + 2021-06-26 | Moscow | 31 | 32 +(3 rows) +``` + + + +The `by_range` dimension builder is an addition to TimescaleDB +2.13. For simpler cases, like this one, you can also create the +hypertable using the old syntax: + +```sql +SELECT create_hypertable('', '
', '