Pull 2.0.latest back into 1.8.x branch (#659)

databricks · May 10, 2024 · b8acdbf · b8acdbf
1 parent bcea09d
commit b8acdbf
Show file tree

Hide file tree

Showing 159 changed files with 2,958 additions and 1,688 deletions.
diff --git a/.github/ISSUE_TEMPLATE/dependabot.yml b/.github/ISSUE_TEMPLATE/dependabot.yml
@@ -5,4 +5,4 @@ updates:
     directory: "/"
     schedule:
       interval: "daily"
-    rebase-strategy: "disabled"
+    rebase-strategy: "disabled"
diff --git a/.github/ISSUE_TEMPLATE/release.md b/.github/ISSUE_TEMPLATE/release.md
@@ -7,4 +7,4 @@ assignees: ''
 
 ---
 
-### TBD
+### TBD
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -4,7 +4,7 @@ Resolves #
 
 <!---
   Include the number of the issue addressed by this PR above if applicable.
-  
+
   Example:
     resolves #1234
 

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -209,7 +209,7 @@ jobs:
           find ./dist/*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
       - name: Install dbt-core
         run: |
-          python -m pip install dbt-core==1.8.0-b1
+          python -m pip install dbt-core==1.8.0rc2
       - name: Check wheel distributions
         run: |
           dbt --version

diff --git a/.gitignore b/.gitignore
@@ -18,4 +18,4 @@ test.env
 logs/
 .venv*
 *.sublime*
-.python-version
+.python-version
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,20 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.2.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 22.12.0
+    hooks:
+      - id: black
+        language_version: python3.11
+        args: [--config, black.ini]
+  - repo: https://github.com/asottile/reorder-python-imports
+    rev: v3.12.0
+    hooks:
+      - id: reorder-python-imports
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,18 +1,43 @@
-## dbt-databricks 1.8.0 (TBD)
+## dbt-databricks 2.0.0 (TBD)
 
 ### Features
 
 - Support `on_config_change` for materialized views, expand the supported config options ([536](https://github.com/databricks/dbt-databricks/pull/536)))
 - Support `on_config_change` for streaming tables, expand the supported config options ([569](https://github.com/databricks/dbt-databricks/pull/569)))
 - Support insert overwrite on SQL Warehouses ([623](https://github.com/databricks/dbt-databricks/pull/623))
+- Support Databricks tags for tables/views/incrementals ([631](https://github.com/databricks/dbt-databricks/pull/631))
 
 ### Under the Hood
 
 - Upgrade databricks-sql-connector to 3.1.0 ([593](https://github.com/databricks/dbt-databricks/pull/593))
 - Migrate to decoupled architecture ([596](https://github.com/databricks/dbt-databricks/pull/596))
 - Finish migrating integration tests ([623](https://github.com/databricks/dbt-databricks/pull/623))
+- Streamline the process of determining materialization types ([655](https://github.com/databricks/dbt-databricks/pull/655))
+- Improve catalog performance by getting column description from project for UC ([658](https://github.com/databricks/dbt-databricks/pull/658))
 
-## dbt-databricks 1.7.11 (TBD)
+## dbt-databricks 1.7.14 (May 1, 2024)
+
+### Fixes
+
+- Auth headers should now evaluate at call time ([648](https://github.com/databricks/dbt-databricks/pull/648))
+- User-configurable OAuth Scopes (currently limited to AWS) (thanks @stevenayers!) ([641](https://github.com/databricks/dbt-databricks/pull/641))
+
+### Under the hood
+
+- Reduce default idle limit for connection reuse to 60s and start organizing event logging ([648](https://github.com/databricks/dbt-databricks/pull/648))
+
+## dbt-databricks 1.7.13 (April 8, 2024)
+
+### Features
+
+- Apply tblproperties to python models (using alter table) ([633](https://github.com/databricks/dbt-databricks/pull/633))
+- Make OAuth redirect url configurable (thanks @johnsequeira-paradigm for the inspiration!) ([635]https://github.com/databricks/dbt-databricks/pull/635)
+
+### Fixes
+
+- Up default socket timeout to 10 minutes
+
+## dbt-databricks 1.7.11 (Mar 26, 2024)
 
 ### Fixes
 

diff --git a/CONTRIBUTING.MD b/CONTRIBUTING.MD
@@ -5,6 +5,7 @@ We happily welcome contributions to the `dbt-databricks` package. We use [GitHub
 Contributions are licensed on a license-in/license-out basis.
 
 ## Communication
+
 Before starting work on a major feature, please reach out to us via GitHub, Slack, email, etc. We will make sure no one else is already working on it and ask you to open a GitHub issue. A "major feature" is defined as any change that is > 100 LOC altered (not including tests), or changes any user-facing behavior.
 
 We will use the GitHub issue to discuss the feature and come to agreement. This is to prevent your time being wasted, as well as ours. The GitHub review process for major features is also important so that organizations with commit access can come to agreement on design.
@@ -18,21 +19,20 @@ If it is appropriate to write a design document, the document must be hosted eit
 1. [Run the unit tests](#unit-tests) (and the [integration tests](#functional--integration-tests) if you [can](#please-test-what-you-can))
 1. [Sign your commits](#sign-your-work)
 1. [Open a pull request](#pull-request-review-process)
-    - Answer the PR template questions as best as you can
-    - _Recommended: [Allow edits from Maintainers]_
-
+   - Answer the PR template questions as best as you can
+   - _Recommended: [Allow edits from Maintainers]_
 
 ## Pull request review process
 
 dbt-databricks uses a **two-step review process** to merge PRs to `main`. We first squash the patch onto a staging branch so that we can securely run our full matrix of integration tests against a real Databricks workspace. Then we merge the staging branch to `main`.
 
 > **Note:** When you create a pull request we recommend that you _[Allow Edits from Maintainers]_. This smooths our two-step process and also lets your reviewer easily commit minor fixes or changes.
 
-A dbt-databricks maintainer will review your PR and may suggest changes for style and clarity, or they may request that you add unit or integration tests. 
+A dbt-databricks maintainer will review your PR and may suggest changes for style and clarity, or they may request that you add unit or integration tests.
 
 Once your patch is approved, a maintainer will create a staging branch and either you or the maintainer (if you allowed edits from maintainers) will change the base branch of your PR to the staging branch. Then a maintainer will squash and merge the PR into the staging branch.
 
-dbt-databricks uses staging branches to run our full matrix of functional and integration tests via Github Actions. This extra step is required for security because GH Action workflows that run on pull requests from forks can't access our testing Databricks workspace. 
+dbt-databricks uses staging branches to run our full matrix of functional and integration tests via Github Actions. This extra step is required for security because GH Action workflows that run on pull requests from forks can't access our testing Databricks workspace.
 
 If the functional or integration tests fail as a result of your change, a maintainer will work with you to fix it _on your fork_ and then repeat this step.
 
@@ -58,7 +58,10 @@ tox -e linter
 
 To simplify reviews you can commit any format changes in a separate commit.
 
+Alternatively, [install pre-commit hooks](https://pre-commit.com/#3-install-the-git-hook-scripts) and the linting will be run automatically prior to accepting your commit.
+
 ## Sign your work
+
 The sign-off is a simple line at the end of the explanation for the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify the below (from developercertificate.org):
 
 ```
@@ -110,7 +113,6 @@ Use your real name (sorry, no pseudonyms or anonymous contributions.)
 
 If you set your `user.name` and `user.email` git configs, you can sign your commit automatically with `git commit -s`.
 
-
 ## Unit tests
 
 Unit tests do not require a Databricks account. Please confirm that your pull request passes our unit test suite before opening a pull request.
@@ -119,31 +121,30 @@ Unit tests do not require a Databricks account. Please confirm that your pull re
 tox -e unit
 ```
 
-## Functional & Integration Tests
+## Functional Tests
 
-Functional and integration tests require a Databricks account with access to a workspace containing four compute resources. These four comprise a matrix of multi-purpose cluster vs SQL warehouse with and without Unity Catalog enabled. The `tox` commands to run each set of these tests appear below:
+Functional tests require a Databricks account with access to a workspace containing three specific compute resources as detailed below.
+The `tox` commands to run each set of these tests appear below:
 
-|Compute Type |Unity Catalog |Command|
-|-|-|-|
-|SQL warehouse| Yes | `tox -e integration-databricks-uc-sql-endpoint`  |
-|SQL warehouse| No | `tox -e integration-databricks-sql-endpoint` |
-|Multi-purpose| Yes |  `tox -e integration-databricks-uc-cluster` |
-|Multi-Purpose| No | `tox -e integration-databricks-cluster` |
+| Compute Type        | Unity Catalog | Command                                         |
+| ------------------- | ------------- | ----------------------------------------------- |
+| SQL Warehouse       | Yes           | `tox -e integration-databricks-uc-sql-endpoint` |
+| All Purpose Cluster | Yes           | `tox -e integration-databricks-uc-cluster`      |
+| All Purpose         | No            | `tox -e integration-databricks-cluster`         |
 
 These tests are configured with environment variables that `tox` reads from a file called [test.env](/test.env.example) which you can copy from the example:
 
 ```sh
 cp test.env.example test.env
 ```
 
-Update `test.env` with the relevant HTTP paths and tokens. 
-
+Update `test.env` with the relevant HTTP paths and tokens.
 
 ### Please test what you can
-We understand that not every contributor will have all four types of compute resources in their Databricks workspace. For this reason, once a change has been reviewed and merged into a staging branch, we will run the full matrix of tests against our testing workspace at our expense (see the [pull request review process](#pull-request-review-process) for more detail).
-
-That said, we ask that you include integration tests where relevant and that you indicate in your pull request description the environment type(s) you tested the change against.
 
+We understand that not every contributor will have all three types of compute resources in their Databricks workspace.
+For this reason, once a change has been reviewed and merged into a staging branch, we will run the full matrix of tests against our testing workspace at our expense (see the [pull request review process](#pull-request-review-process) for more detail).
 
+That said, we ask that you include integration tests where relevant and that you indicate in your pull request description the environment type(s) you tested the change against.
 
-[Allow Edits from Maintainers]: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork
+[Allow Edits from Maintainers]: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1 +1 @@
-recursive-include dbt/include *.sql *.yml *.md
+recursive-include dbt/include *.sql *.yml *.md
diff --git a/dbt/adapters/databricks/__init__.py b/dbt/adapters/databricks/__init__.py
@@ -1,10 +1,9 @@
-from dbt.adapters.databricks.connections import DatabricksConnectionManager  # noqa
-from dbt.adapters.databricks.connections import DatabricksCredentials
-from dbt.adapters.databricks.relation import DatabricksRelation  # noqa
+from dbt.adapters.base import AdapterPlugin
 from dbt.adapters.databricks.column import DatabricksColumn  # noqa
+from dbt.adapters.databricks.connections import DatabricksConnectionManager  # noqa
+from dbt.adapters.databricks.credentials import DatabricksCredentials
 from dbt.adapters.databricks.impl import DatabricksAdapter
-
-from dbt.adapters.base import AdapterPlugin
+from dbt.adapters.databricks.relation import DatabricksRelation  # noqa
 from dbt.include import databricks
 
 Plugin = AdapterPlugin(

diff --git a/dbt/adapters/databricks/__version__.py b/dbt/adapters/databricks/__version__.py
@@ -1 +1 @@
-version: str = "1.8.0b2"
+version: str = "2.0.0rc1"
diff --git a/dbt/adapters/databricks/auth.py b/dbt/adapters/databricks/auth.py
@@ -1,7 +1,16 @@
-from typing import Any, Dict, Optional
-from databricks.sdk.oauth import ClientCredentials, Token
-from databricks.sdk.core import CredentialsProvider, HeaderFactory, Config, credentials_provider
+from typing import Any
+from typing import Dict
+from typing import Optional
+
+from databricks.sdk.core import Config
+from databricks.sdk.core import credentials_provider
+from databricks.sdk.core import CredentialsProvider
+from databricks.sdk.core import HeaderFactory
+from databricks.sdk.oauth import ClientCredentials
+from databricks.sdk.oauth import Token
 from databricks.sdk.oauth import TokenSource
+from requests import PreparedRequest
+from requests.auth import AuthBase
 
 
 class token_auth(CredentialsProvider):
@@ -77,3 +86,21 @@ def inner() -> Dict[str, str]:
             return {"Authorization": f"{token.token_type} {token.access_token}"}
 
         return inner
+
+
+class BearerAuth(AuthBase):
+    """This mix-in is passed to our requests Session to explicitly
+    use the bearer authentication method.
+
+    Without this, a local .netrc file in the user's home directory
+    will override the auth headers provided by our header_factory.
+
+    More details in issue #337.
+    """
+
+    def __init__(self, header_factory: HeaderFactory):
+        self.header_factory = header_factory
+
+    def __call__(self, r: PreparedRequest) -> PreparedRequest:
+        r.headers.update(**self.header_factory())
+        return r
diff --git a/dbt/adapters/databricks/column.py b/dbt/adapters/databricks/column.py
@@ -1,5 +1,7 @@
 from dataclasses import dataclass
-from typing import ClassVar, Dict, Optional
+from typing import ClassVar
+from typing import Dict
+from typing import Optional
 
 from dbt.adapters.spark.column import SparkColumn
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,7 +4,7 @@ Resolves # @@
     <!---
       Include the number of the issue addressed by this PR above if applicable.
       Example:
         resolves #1234
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		recursive-include dbt/include .sql .yml *.md
		recursive-include dbt/include .sql .yml *.md
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		version: str = "1.8.0b2"
		version: str = "2.0.0rc1"