From 721385ecdd336a7fcc72f9c0a914b528c53b3898 Mon Sep 17 00:00:00 2001 From: Albert DeFusco Date: Thu, 14 Oct 2021 13:10:24 -0500 Subject: [PATCH 1/3] allow queries in get_table --- intake_metabase/source.py | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/intake_metabase/source.py b/intake_metabase/source.py index 13c5d46..93c3115 100644 --- a/intake_metabase/source.py +++ b/intake_metabase/source.py @@ -120,13 +120,14 @@ class MetabaseTableSource(DataSource): version = __version__ partition_access = True - def __init__(self, domain, database, table, username=None, password=None, token=None, metadata=None): + def __init__(self, domain, database, table=None, query=None, username=None, password=None, token=None, metadata=None): self.domain = domain self.username = username self.password = password self.database = database self.token = token self.table = table + self.query = query self._df = None self._metabase = MetabaseAPI(self.domain, self.username, self.password, self.token) @@ -135,7 +136,7 @@ def __init__(self, domain, database, table, username=None, password=None, token= def _get_schema(self): if self._df is None: - self._df = self._metabase.get_table(self.database, self.table) + self._df = self._metabase.get_table(self.database, self.table, self.query) return Schema(datashape=None, dtype=self._df.dtypes, @@ -246,23 +247,38 @@ def get_card(self, question): return pd.read_csv(StringIO(csv.encode(res.encoding).decode('utf-8')), parse_dates=date_fields, infer_datetime_format=True) - def get_table(self, database, table): + def get_table(self, database, table=None, query=None): from io import StringIO import pandas as pd self._create_or_refresh_token() - table_metadata = self.get_metadata(table) - date_fields = [f['display_name'] for f in table_metadata['fields'] - if 'date' in f['base_type'].lower()] + if (table is not None) and (query is not None): + raise ValueError('Please set only one of table or query') + + kwargs = {} + if table is not None: + table_metadata = self.get_metadata(table) + key = 'name' if query is not None else 'display_name' + date_fields = [f[key] for f in table_metadata['fields'] + if 'date' in f['base_type'].lower()] + kwargs = { + 'parse_dates': date_fields, + 'infer_datetime_format': True + } body = { "database": database, - "query": {"source-table": table}, - "type": "query", } + if query is None: + body['type'] = 'query' + body['query'] = {'source-table': table} + else: + body['type'] = 'native' + body['native'] = {'query': query} + headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'X-Metabase-Session': self._token @@ -278,4 +294,4 @@ def get_table(self, database, table): csv = res.text return pd.read_csv(StringIO(csv.encode(res.encoding).decode('utf-8')), - parse_dates=date_fields, infer_datetime_format=True) + **kwargs) From 42114ad7faa6308d30635caf209510e144b1d326 Mon Sep 17 00:00:00 2001 From: Albert DeFusco Date: Fri, 22 Oct 2021 15:06:46 -0500 Subject: [PATCH 2/3] new intake version wants to pass name --- intake_metabase/source.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/intake_metabase/source.py b/intake_metabase/source.py index 93c3115..0608380 100644 --- a/intake_metabase/source.py +++ b/intake_metabase/source.py @@ -15,7 +15,8 @@ class MetabaseCatalog(Catalog): version = __version__ # partition_access = False - def __init__(self, domain, username=None, password=None, token=None, metadata=None): + def __init__(self, domain, username=None, password=None, token=None, metadata=None, name=None): + self.name = name self.domain = domain self.username = username self.password = password From 4ff6eecb4d1fd99bbf083e1fb49c0af7a5adaf52 Mon Sep 17 00:00:00 2001 From: Albert DeFusco Date: Mon, 25 Oct 2021 14:58:58 -0500 Subject: [PATCH 3/3] trying to fix some serialization issues when using a remote catalog I'm getting weird errors like KeyError on 'dtype' and cannot serialize a Series --- intake_metabase/source.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/intake_metabase/source.py b/intake_metabase/source.py index 0608380..28ddf30 100644 --- a/intake_metabase/source.py +++ b/intake_metabase/source.py @@ -95,7 +95,7 @@ def _get_schema(self): self._df = self._metabase.get_card(self.question) return Schema(datashape=None, - dtype=self._df.dtypes, + dtype={n: str(t) for (n, t) in self._df.dtypes.items()}, shape=(None, len(self._df.columns)), npartitions=1, extra_metadata={}) @@ -112,7 +112,7 @@ def to_dask(self): raise NotImplementedError() def _close(self): - self._dataframe = None + self._df = None class MetabaseTableSource(DataSource): @@ -140,7 +140,7 @@ def _get_schema(self): self._df = self._metabase.get_table(self.database, self.table, self.query) return Schema(datashape=None, - dtype=self._df.dtypes, + dtype={n: str(t) for (n, t) in self._df.dtypes.items()}, shape=(None, len(self._df.columns)), npartitions=1, extra_metadata={}) @@ -157,7 +157,7 @@ def to_dask(self): raise NotImplementedError() def _close(self): - self._dataframe = None + self._df = None class MetabaseAPI():