Merge branch 'master' into feat/cubestore-driver-rust-parsing

cube-js · Dec 16, 2024 · d891663 · d891663
2 parents 4fcba9e + b8afc84
commit d891663
Show file tree

Hide file tree

Showing 229 changed files with 6,041 additions and 2,122 deletions.
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
@@ -396,6 +396,7 @@ jobs:
     strategy:
       matrix:
         node-version: [ 20.x ]
+        python-version: [ 3.11 ]
       fail-fast: false
 
     steps:
@@ -421,6 +422,10 @@ jobs:
         uses: actions/setup-node@v4
         with:
           node-version: ${{ matrix.node-version }}
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
       - name: Get yarn cache directory path
         id: yarn-cache-dir-path
         run: echo "dir=$(yarn cache dir)" >> "$GITHUB_OUTPUT"
@@ -448,6 +453,11 @@ jobs:
         uses: GoodManWEN/oracle-client-action@main
       - name: Build client
         run: yarn build
+      - name: Build cubejs-backend-native (with Python)
+        run: yarn run native:build-release-python
+        working-directory: ./packages/cubejs-backend-native
+        env:
+          PYO3_PYTHON: python${{ matrix.python-version }}
       - name: Lerna tsc
         run: yarn tsc
       - name: Download cubestored-x86_64-unknown-linux-gnu-release artifact

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,39 @@
 All notable changes to this project will be documented in this file.
 See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
 
+## [1.1.11](https://github.com/cube-js/cube/compare/v1.1.10...v1.1.11) (2024-12-16)
+
+
+### Bug Fixes
+
+* TypeError: Cannot read properties of undefined (reading 'joins') ([14adaeb](https://github.com/cube-js/cube/commit/14adaebdd1c3d398bcd2997012da070999e47d9d))
+
+
+
+
+
+## [1.1.10](https://github.com/cube-js/cube/compare/v1.1.9...v1.1.10) (2024-12-16)
+
+
+### Bug Fixes
+
+* **api-gateway:** allow switch sql user when the new user is the same ([#9037](https://github.com/cube-js/cube/issues/9037)) ([a69c28f](https://github.com/cube-js/cube/commit/a69c28f524fa0625b825b98a38e7f5a211a98f74))
+* **api-gateway:** make sure DAP works sql pushdown ([#9021](https://github.com/cube-js/cube/issues/9021)) ([23695b2](https://github.com/cube-js/cube/commit/23695b2b5e886b5b7daf8b3f74003bb04e5b2e0b))
+* **cubestore:** Allow create an index from expressions ([#9006](https://github.com/cube-js/cube/issues/9006)) ([222cab8](https://github.com/cube-js/cube/commit/222cab897c289bfc929f217483e4905204bac12f))
+* **schema-compiler:** fix DAP with query_rewrite and python config ([#9033](https://github.com/cube-js/cube/issues/9033)) ([849790f](https://github.com/cube-js/cube/commit/849790f965dd0d9fddba11e3d8d124b84397ca9b))
+* **schema-compiler:** join relationship aliases ([ad4e8e3](https://github.com/cube-js/cube/commit/ad4e8e3872307ab77e035709e5208b0191f87f5b))
+
+
+### Features
+
+* **cubesql:** Basic VALUES support in rewrite engine ([#9041](https://github.com/cube-js/cube/issues/9041)) ([368671f](https://github.com/cube-js/cube/commit/368671fd1b53b2ed5ad8df6af113492982f23c0c))
+* **dremio-driver:** Add Dremio Cloud Support ([#8956](https://github.com/cube-js/cube/issues/8956)) ([d2c2fcd](https://github.com/cube-js/cube/commit/d2c2fcdaf8944ea7dd27e73b63c0b151c317022e))
+* **tesseract:** Support multiple join paths within single query ([#9047](https://github.com/cube-js/cube/issues/9047)) ([b62446e](https://github.com/cube-js/cube/commit/b62446e3c3893068f8dd8aa32d7204ea06a16f98))
+
+
+
+
+
 ## [1.1.9](https://github.com/cube-js/cube/compare/v1.1.8...v1.1.9) (2024-12-08)
 
 

diff --git a/docs/pages/product/auth/context.mdx b/docs/pages/product/auth/context.mdx
@@ -220,7 +220,7 @@ def masked(sql, security_context):
   if is_trusted_team:
     return sql
   else:
-    return "\"'--- masked ---'\""
+    return "'--- masked ---'"
 ```
 
 

diff --git a/docs/pages/product/caching.mdx b/docs/pages/product/caching.mdx
@@ -257,21 +257,26 @@ versions.
 
 Any query that is fulfilled by Cube will use one of the following cache types:
 
-- **[Pre-aggregations](#pre-aggregations) in Cube Store.** This is the most
-advantageous and performant option.
+- **[Pre-aggregations](#pre-aggregations) in Cube Store.** This cache type 
+indicates that the query utilized existing pre-aggregations in Cube Store, 
+so it did not need to go to the database for processing.
 - **Pre-aggregations in Cube Store with a suboptimal query plan.** This cache
-type indicates that queries still benefit from pre-aggregations in Cube Store
-but it's possible to get a performance boost by [using indexes][ref-indexes].
+type indicates that the query ultilized pre-aggregations in Cube Store,
+but that it's possible to get a performance boost by [using indexes][ref-indexes].
 - **Pre-aggregations in the data source.** This cache type indicates that
-queries don't benefit from pre-aggregations in Cube Store and it's possible
-to get a massive performance boost by using Cube Store as [pre-aggregation
+the query utilized pre-aggregations from in the upstream data source. 
+These queries could gain a performance boost by using Cube Store as [pre-aggregation
 storage][ref-storage].
-- **[In-memory cache.](#in-memory-cache)** This cache type indicates that
-queries don't benefit from pre-aggregations at all. Queries directly hit the
-upstream data source and in-memory cache is used to speed up the execution of
-identical queries that arrive within a short period of time.
-- **No cache.** This cache type indicates queries that directly hit the
-upstream data source and have the worst performance possible.
+- **[In-memory cache.](#in-memory-cache)** This cache type indicates that the 
+results were retrieved from Cube's in-memory cache. All query results 
+are stored in Cube's in-memory cache, and if the same query is 
+run within a certain time frame, the results will be retrieved from in-memory 
+cache instead of being processed on the database or in Cube Store. This is the 
+fastest query retrieval method, but it requires that the exact same query was 
+run very recently.
+- **No cache.** This cache type indicates that the query was processed in the upstream 
+data source and was not accelrated using pre-aggregations. These queries could have 
+a significant performance boost if pre-aggregations and Cube Store were utilized.
 
 In [Query History][ref-query-history] and throughout Cube Cloud, colored bolt
 icons are used to indicate the cache type. Also, [Performance

diff --git a/docs/pages/reference/configuration/environment-variables.mdx b/docs/pages/reference/configuration/environment-variables.mdx
@@ -560,6 +560,14 @@ The timeout value for any queries made to the database by Cube.
 | ---------------------------------------- | ---------------------- | --------------------- |
 | A number in seconds or a duration string | `10m`                  | `10m`                 |
 
+<InfoBox>
+
+There's a hard limit of 20 minutes for queries that ingest data into Cube Store
+when pre-aggregations are built. If you bump into this limit, consider using
+an export bucket and splitting pre-aggregations into partitions.
+
+</InfoBox>
+
 ## `CUBEJS_DB_FETCH_COLUMNS_BY_ORDINAL_POSITION`
 
 Force fetching of columns by ordinal positions. Certain data-providers (e.g., Redshift) do not guarantee columns in the

diff --git a/docs/pages/reference/data-model/joins.mdx b/docs/pages/reference/data-model/joins.mdx
@@ -404,6 +404,119 @@ cubes:
 
 </CodeTabs>
 
+## Chasm and fan traps
+
+Cube automatically detects chasm and fan traps based on the `many_to_one` and `one_to_many` relationships defined in join.
+When detected, Cube generates a deduplication query that evaluates all distinct primary keys within the multiplied measure's cube and then joins distinct primary keys to this cube on itself to calculate the aggregation result.
+If there's more than one multiplied measure in a query, then such query is generated for every such multiplied measure, and results are joined.
+Cube solves for chasm and fan traps during query time.
+If there's pre-aggregregation that fits measure multiplication requirements it'd be leveraged to serve such a query.
+Such pre-aggregations and queries are always considered non-additive for the purpose of pre-aggregation matching.
+
+Let's consider an example data model:
+
+<CodeTabs>
+
+```javascript
+cube(`orders`, {
+  sql_table: `orders`
+
+  dimensions: {
+    id: {
+      sql: `id`,
+      type: `number`,
+      primary_key: true
+    },
+    city: {
+      sql: `city`,
+      type: `string`
+    }
+  },
+
+  joins: {
+    customers: {
+      relationship: `many_to_one`,
+      sql: `${CUBE}.customer_id = ${customers.id}`,
+    },
+  },
+});
+
+cube(`customers`, {
+  sql_table: `customers`
+
+  measures: {
+    count: {
+      type: `count`,
+    }
+  },
+
+  dimensions: {
+    id: {
+      sql: `id`,
+      type: `number`,
+      primary_key: true
+    }
+  }
+});
+```
+
+```yaml
+cubes:
+  - name: orders
+    sql_table: orders
+
+    dimensions:
+      - name: id
+        sql: id
+        type: number
+        primary_key: true
+      - name: city
+        sql: city
+        type: string
+
+    joins:
+      - name: customers
+        relationship: many_to_one
+        sql: "{orders}.customer_id = {customers.id}"
+
+- name: customers
+    sql_table: customers
+
+    dimensions:
+      - name: id
+        sql: id
+        type: number
+        primary_key: true
+
+    measures:
+      - name: average_age
+        sql: age
+        type: avg
+
+```
+
+</CodeTabs>
+
+If we try to query `customers.average_age` by `orders.city`, the Cube detects that the `average_age` measure in the `customers` cube would be multiplied by `orders` to `customers` and would generate SQL similar to:
+
+```sql
+SELECT
+  "keys"."orders__city",
+  avg("customers_key__customers".age) "customers__average_age"
+FROM
+  (
+    SELECT
+      DISTINCT "customers_key__orders".city "orders__city",
+      "customers_key__customers".id "customers__id"
+    FROM
+      orders AS "customers_key__orders"
+      LEFT JOIN customers AS "customers_key__customers" ON "customers_key__orders".customer_id = "customers_key__customers".id
+  ) AS "keys"
+  LEFT JOIN customers AS "customers_key__customers" ON "keys"."customers__id" = "customers_key__customers".id
+GROUP BY
+  1
+```
+
 ## CUBE reference
 
 When you have several joined cubes, you should accurately use columns’ names to

diff --git a/docs/pages/reference/data-model/pre-aggregations.mdx b/docs/pages/reference/data-model/pre-aggregations.mdx
@@ -921,7 +921,7 @@ cubes:
 
 </CodeTabs>
 
-For possible `every` parameter values please refer to
+To have a pre-aggregation rebuild at a specific time of day, you can use a CRON string with some limitations. For more details about values that can be used with the `every` parameter, please refer to the
 [`refreshKey`][ref-cube-refreshkey] documentation.
 
 You can also use `every` with `sql`:

diff --git a/lerna.json b/lerna.json
@@ -1,5 +1,5 @@
 {
-  "version": "1.1.9",
+  "version": "1.1.11",
   "npmClient": "yarn",
   "useWorkspaces": true,
   "packages": [

diff --git a/packages/cubejs-api-gateway/CHANGELOG.md b/packages/cubejs-api-gateway/CHANGELOG.md
@@ -3,6 +3,19 @@
 All notable changes to this project will be documented in this file.
 See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
 
+## [1.1.10](https://github.com/cube-js/cube/compare/v1.1.9...v1.1.10) (2024-12-16)
+
+
+### Bug Fixes
+
+* **api-gateway:** allow switch sql user when the new user is the same ([#9037](https://github.com/cube-js/cube/issues/9037)) ([a69c28f](https://github.com/cube-js/cube/commit/a69c28f524fa0625b825b98a38e7f5a211a98f74))
+* **api-gateway:** make sure DAP works sql pushdown ([#9021](https://github.com/cube-js/cube/issues/9021)) ([23695b2](https://github.com/cube-js/cube/commit/23695b2b5e886b5b7daf8b3f74003bb04e5b2e0b))
+* **schema-compiler:** fix DAP with query_rewrite and python config ([#9033](https://github.com/cube-js/cube/issues/9033)) ([849790f](https://github.com/cube-js/cube/commit/849790f965dd0d9fddba11e3d8d124b84397ca9b))
+
+
+
+
+
 ## [1.1.9](https://github.com/cube-js/cube/compare/v1.1.8...v1.1.9) (2024-12-08)
 
 **Note:** Version bump only for package @cubejs-backend/api-gateway

diff --git a/packages/cubejs-api-gateway/package.json b/packages/cubejs-api-gateway/package.json
@@ -2,7 +2,7 @@
   "name": "@cubejs-backend/api-gateway",
   "description": "Cube.js API Gateway",
   "author": "Cube Dev, Inc.",
-  "version": "1.1.9",
+  "version": "1.1.10",
   "repository": {
     "type": "git",
     "url": "https://github.com/cube-js/cube.git",
@@ -27,8 +27,8 @@
     "dist/src/*"
   ],
   "dependencies": {
-    "@cubejs-backend/native": "1.1.9",
-    "@cubejs-backend/shared": "1.1.8",
+    "@cubejs-backend/native": "1.1.10",
+    "@cubejs-backend/shared": "1.1.10",
     "@ungap/structured-clone": "^0.3.4",
     "body-parser": "^1.19.0",
     "chrono-node": "^2.6.2",

diff --git a/packages/cubejs-api-gateway/src/gateway.ts b/packages/cubejs-api-gateway/src/gateway.ts
@@ -1214,18 +1214,20 @@ class ApiGateway {
             currentQuery = this.parseMemberExpressionsInQuery(currentQuery);
           }
 
-          let normalizedQuery = normalizeQuery(currentQuery, persistent);
+          const normalizedQuery = normalizeQuery(currentQuery, persistent);
+          let evaluatedQuery = normalizedQuery;
 
           if (hasExpressionsInQuery) {
             // We need to parse/eval all member expressions early as applyRowLevelSecurity
             // needs to access the full SQL query in order to evaluate rules
-            normalizedQuery =
+            evaluatedQuery =
               this.evalMemberExpressionsInQuery(normalizedQuery);
           }
 
           // First apply cube/view level security policies
           const queryWithRlsFilters = await compilerApi.applyRowLevelSecurity(
             normalizedQuery,
+            evaluatedQuery,
             context
           );
           // Then apply user-supplied queryRewrite
@@ -1237,7 +1239,7 @@ class ApiGateway {
           // applyRowLevelSecurity may add new filters which may contain raw member expressions
           // if that's the case, we should run an extra pass of parsing here to make sure
           // nothing breaks down the road
-          if (this.hasExpressionsInQuery(rewrittenQuery)) {
+          if (hasExpressionsInQuery || this.hasExpressionsInQuery(rewrittenQuery)) {
             rewrittenQuery = this.parseMemberExpressionsInQuery(rewrittenQuery);
             rewrittenQuery = this.evalMemberExpressionsInQuery(rewrittenQuery);
           }

diff --git a/packages/cubejs-api-gateway/src/sql-server.ts b/packages/cubejs-api-gateway/src/sql-server.ts
@@ -306,7 +306,11 @@ export class SQLServer {
   protected createDefaultCanSwitchSqlUserFn(options: SQLServerOptions): CanSwitchSQLUserFn {
     const superUser = options.sqlSuperUser || getEnv('sqlSuperUser');
 
-    return async (current: string | null, _user: string) => {
+    return async (current: string | null, newUser: string) => {
+      if (current === newUser) {
+        return true;
+      }
+
       if (superUser) {
         return current === superUser;
       }

diff --git a/packages/cubejs-athena-driver/CHANGELOG.md b/packages/cubejs-athena-driver/CHANGELOG.md
@@ -3,6 +3,22 @@
 All notable changes to this project will be documented in this file.
 See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
 
+## [1.1.11](https://github.com/cube-js/cube/compare/v1.1.10...v1.1.11) (2024-12-16)
+
+**Note:** Version bump only for package @cubejs-backend/athena-driver
+
+
+
+
+
+## [1.1.10](https://github.com/cube-js/cube/compare/v1.1.9...v1.1.10) (2024-12-16)
+
+**Note:** Version bump only for package @cubejs-backend/athena-driver
+
+
+
+
+
 ## [1.1.9](https://github.com/cube-js/cube/compare/v1.1.8...v1.1.9) (2024-12-08)
 
 **Note:** Version bump only for package @cubejs-backend/athena-driver