Skip to content

Commit

Permalink
Merge pull request #3 from ContinuumIO/cards
Browse files Browse the repository at this point in the history
add Cards
  • Loading branch information
AlbertDeFusco authored Feb 11, 2021
2 parents 0187629 + ab3d3cc commit 94e0d45
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 29 deletions.
36 changes: 33 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,19 @@ list(catalog)
This will produce output like

```
[table1, table2, table3]
['first-db.table_a', 'first-db.table_b', 'questions.3']
```

To load a table as a Pandas DataFrame

```
df = catalog.<table>.read()
df = catalog['<table>'].read()
```

Replace `<table>` with the name of the table from the list.

This driver supports multiple databases and saved questions.

## Load a single table
To load a table as a Pandas DataFrames you will need to know the following information

Expand All @@ -66,12 +68,40 @@ ds = intake.open_metabase_table(domain, username, password,
database, table)
df = ds.read()
```
## Load a single question

To load a table as a Pandas DataFrames you will need to know the following information

* `domain`: The URL where Metabase is running
* `username`: Your username, typically an email address
* `password`: Your password (Google Auth is not yet supported)
* `question`: The numeric id of the question

You can generally determine the numeric id of the question you are interested in by

1. Visit `<domain>/collection/root?type=card`
1. Click on the question
* You'll see in the url the question id `<domain>/question/<question_id>`

```python
import intake
ds = intake.open_metabase_question(domain, username, password,
question)
df = ds.read()
```


## Constructing catalogs
This repository provides three drivers

* `metabase_catalog`: Catalog entry to retrieve all tables and questions
* `metabase_table`: Catalog entry for a single table in a database
* `metabase_question`: Catalog entry for a single saved question

To build a catalog containing a Metabase table it can be useful to use the
[Catalog Templating](https://intake.readthedocs.io/en/latest/catalog.html#templating) features
to avoid writing usernames and passwords into the catalog
to avoid writing usernames and passwords into the catalog. For example this catalog
provides a single table.

```yaml
metadata:
Expand Down
136 changes: 111 additions & 25 deletions intake_metabase/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,85 @@ def _load(self):

self._entries = {}
for db in databases:
for table in db['tables']:
e = LocalCatalogEntry(
name=table['name'],
description=table['description'],
driver=MetabaseTableSource,
catalog=self,
args={
'domain': self.domain,
'username': self.username,
'password': self.password,
'database': db['id'],
'table': table['id']
}
)
e._plugin = [MetabaseTableSource]
# self._entries[db['name']][table['name']] = e
self._entries[table['name']] = e
if db.get('is_saved_questions', False):
for card in db['tables']:
question = card['id'].split('__')[-1]
question_name = f"questions.{question}"
description = card['display_name'] if card['description'] is None else card['description']
e = LocalCatalogEntry(
name=question_name,
description=description,
driver=MetabaseQuestionSource,
catalog=self,
args={
'domain': self.domain,
'username': self.username,
'password': self.password,
'question': question
}
)
e._plugin = [MetabaseQuestionSource]
self._entries[question_name] = e
else:
for table in db['tables']:
table_name = f"{db['name']}.{table['name']}"
e = LocalCatalogEntry(
name=table_name,
description=table['description'],
driver=MetabaseTableSource,
catalog=self,
args={
'domain': self.domain,
'username': self.username,
'password': self.password,
'database': db['id'],
'table': table['id']
}
)
e._plugin = [MetabaseTableSource]
self._entries[table_name] = e


class MetabaseQuestionSource(DataSource):
name = 'metabase_question'
container = 'dataframe'
version = __version__
partition_access = True

def __init__(self, domain, username, password, question, metadata=None):
self.domain = domain
self.username = username
self.password = password
self.question = question
self._df = None

self._metabase = MetabaseAPI(self.domain, self.username, self.password)

super(MetabaseQuestionSource, self).__init__(metadata=metadata)

def _get_schema(self):
if self._df is None:
self._df = self._metabase.get_card(self.question)

return Schema(datashape=None,
dtype=self._df.dtypes,
shape=(None, len(self._df.columns)),
npartitions=1,
extra_metadata={})

def _get_partition(self, i):
self._get_schema()
return self._df

def read(self):
self._get_schema()
return self._df

def to_dask(self):
raise NotImplementedError()

def _close(self):
self._dataframe = None


class MetabaseTableSource(DataSource):
Expand All @@ -54,7 +116,7 @@ class MetabaseTableSource(DataSource):
version = __version__
partition_access = True

def __init__(self, domain, username, password, database, table, *kwargs, metadata=None):
def __init__(self, domain, username, password, database, table, metadata=None):
self.domain = domain
self.username = username
self.password = password
Expand All @@ -71,7 +133,7 @@ def _get_schema(self):
self._df = self._metabase.get_table(self.database, self.table)

return Schema(datashape=None,
dtype=self._df,
dtype=self._df.dtypes,
shape=(None, len(self._df.columns)),
npartitions=1,
extra_metadata={})
Expand Down Expand Up @@ -122,7 +184,7 @@ def get_databases(self):
headers = {
'X-Metabase-Session': self._token
}
params = {'include': 'tables'}
params = {'include': 'tables', 'saved': True}

res = requests.get(
urljoin(self.domain, '/api/database'),
Expand All @@ -145,6 +207,33 @@ def get_metadata(self, table):

return res.json()

def get_card(self, question):
from io import StringIO

import pandas as pd

self._create_or_refresh_token()

card_metadata = self.get_metadata(f'card__{question}')
date_fields = [f['display_name'] for f in card_metadata['fields']
if 'date' in f['base_type'].lower()]

headers = {
'Content-Type': 'application/x-www-form-urlencoded',
'X-Metabase-Session': self._token
}

res = requests.post(
urljoin(self.domain, f'/api/card/{question}/query/csv'),
headers=headers
)

res.raise_for_status()
csv = res.text

return pd.read_csv(StringIO(csv.encode(res.encoding).decode('utf-8')),
parse_dates=date_fields, infer_datetime_format=True)

def get_table(self, database, table):
from io import StringIO

Expand All @@ -160,10 +249,6 @@ def get_table(self, database, table):
"database": database,
"query": {"source-table": table},
"type": "query",
"middleware": {
"js-int-to-string?": True,
"add-default-userland-constraints?": True
}
}

headers = {
Expand All @@ -180,4 +265,5 @@ def get_table(self, database, table):
res.raise_for_status()
csv = res.text

return pd.read_csv(StringIO(csv), parse_dates=date_fields, infer_datetime_format=True)
return pd.read_csv(StringIO(csv.encode(res.encoding).decode('utf-8')),
parse_dates=date_fields, infer_datetime_format=True)
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
'intake.drivers': [
'metabase_catalog = intake_metabase.source:MetabaseCatalog',
'metabase_table = intake_metabase.source:MetabaseTableSource',
]
'metabase_question = intake_metabase.source:MetabaseQuestionSource'
]
},
install_requires=requirements,
keywords='intake-metabase',
Expand Down

0 comments on commit 94e0d45

Please sign in to comment.