Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: anelendata/tap-bigquery
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: harrystech/tap-bigquery
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Able to merge. These branches can be automatically merged.
  • 8 commits
  • 3 files changed
  • 3 contributors

Commits on Jan 10, 2024

  1. Copy the full SHA
    201b644 View commit details
  2. Use default get

    jwisdom-harrys committed Jan 10, 2024
    Copy the full SHA
    bea1d59 View commit details

Commits on Jan 12, 2024

  1. Copy the full SHA
    d34b8b5 View commit details
  2. Copy the full SHA
    448a70c View commit details

Commits on Jan 29, 2024

  1. Merge pull request #1 from harrystech/add-static-schemas

    Add capability to load static schemas for catalog
    jwisdom-harrys authored Jan 29, 2024
    Copy the full SHA
    f5ec2cd View commit details

Commits on Jul 16, 2024

  1. Copy the full SHA
    e43a7ec View commit details
  2. Copy the full SHA
    0ad6853 View commit details
  3. Merge pull request #2 from harrystech/jwisdom/refresh-upstream

    Jwisdom/refresh upstream
    rishi-gajula-harrys authored Jul 16, 2024
    Copy the full SHA
    776a5e1 View commit details
Showing with 23 additions and 4 deletions.
  1. +2 −1 README.md
  2. +1 −0 setup.py
  3. +20 −3 tap_bigquery/sync_bigquery.py
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -80,7 +80,8 @@ config.sample.json:
"start_datetime": "2017-01-01T00:00:00Z", // This can be set at the command line argument
"end_datetime": "2017-02-01T00:00:00Z", // end_datetime is optional
"limit": 100,
"start_always_inclusive": false // default is false, optional
"start_always_inclusive": false, // default is false, optional
"schema_path": "/absolute/path/to/schemas/directory" // default None, will attempt to infer schema
}
```

1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.10",
],

install_requires=[
23 changes: 20 additions & 3 deletions tap_bigquery/sync_bigquery.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import copy, datetime, json, time
import os.path

import dateutil.parser
from decimal import Decimal

@@ -76,8 +78,7 @@ def _build_query(keys, filters=[], inclusive_start=True, limit=None):
return query


def do_discover(config, stream, output_schema_file=None,
add_timestamp=True):
def infer_schema(config, stream):
client = get_bigquery_client()

start_datetime = dateutil.parser.parse(
@@ -113,7 +114,23 @@ def do_discover(config, stream, output_schema_file=None,
if not data:
raise Exception("Cannot infer schema: No record returned.")

schema = getschema.infer_schema(data)
return getschema.infer_schema(data)


def do_discover(config, stream, output_schema_file=None,
add_timestamp=True):
schema_file_path = os.path.join(config.get("schema_path", ""), f"{stream['table']}.json")
if not config.get("schema_path"):
LOGGER.info("No schema folder specified, attempting to infer schema")
schema = infer_schema(config, stream)
elif not os.path.exists(schema_file_path):
LOGGER.info(f"No schema file found at {schema_file_path}, attempting to infer schema")
schema = infer_schema(config, stream)
else:
with open(schema_file_path, "r") as fd:
schema = json.loads(fd.read())
LOGGER.info(f"Using static schema defined at {schema_file_path}")

if add_timestamp:
timestamp_format = {"type": ["null", "string"],
"format": "date-time"}