Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement source_col param in Isolines service #1326

Merged
merged 2 commits into from
Dec 10, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 26 additions & 31 deletions cartoframes/data/services/isolines.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
QUOTA_SERVICE = 'isolines'
DATA_RANGE_KEY = 'data_range'
RANGE_LABEL_KEY = 'range_label'
CARTO_INDEX_KEY = 'cartodb_id'


class Isolines(Service):
Expand Down Expand Up @@ -50,6 +51,9 @@ def isochrones(self, source, ranges, **args):
quality: (int, optional): Allows you to reduce the quality of the polygons in favor of the response time.
Admitted values: 1/2/3.
geom_col (str, optional): string indicating the geometry column name in the source `DataFrame`.
source_col (str, optional): string indicating the source column name. This column will be used to reference
the generated isolines with the original geometry. By default it uses the `cartodb_id` column if exists,
or the index of the source `DataFrame`.

Returns:
A named-tuple ``(data, metadata)`` containing a ``data`` :py:class:`CartoDataFrame
Expand Down Expand Up @@ -96,6 +100,9 @@ def isodistances(self, source, ranges, **args):
quality: (int, optional): Allows you to reduce the quality of the polygons in favor of the response time.
Admitted values: 1/2/3.
geom_col (str, optional): string indicating the geometry column name in the source `DataFrame`.
source_col (str, optional): string indicating the source column name. This column will be used to reference
the generated isolines with the original geometry. By default it uses the `cartodb_id` column if exists,
or the index of the source `DataFrame`.

Returns:
A named-tuple ``(data, metadata)`` containing a ``data`` :py:class:`CartoDataFrame
Expand All @@ -122,7 +129,8 @@ def _iso_areas(self,
quality=None,
exclusive=True,
function=None,
geom_col=None):
geom_col=None,
source_col=None):
metadata = {}

source_manager = SourceManager(source, self._credentials)
Expand All @@ -148,14 +156,16 @@ def _iso_areas(self,
raise Exception('No valid geometry found. Please provide an input source with ' +
'a valid geometry or specify the "geom_col" param with a geometry column.')

to_carto(source_cdf, temporary_table_name, self._credentials, log_enabled=False)
index_as_cartodbid = CARTO_INDEX_KEY not in source_cdf.columns

to_carto(source_cdf, temporary_table_name, self._credentials, index=index_as_cartodbid,
index_col=CARTO_INDEX_KEY, log_enabled=False)
source_query = 'SELECT * FROM {table}'.format(table=temporary_table_name)

source_columns = source_manager.get_column_names()
source_has_id = 'cartodb_id' in source_columns
if source_col is None:
source_col = CARTO_INDEX_KEY
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is the default value, we could set it in the method signature and avoid this conditional.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't want to add this to the header because of the documentation (reference). The index is set as cartodb_id when there is no cartodb_id column in the dataframe, but this is something I want to keep internally. Does it make sense to you?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, makes sense then.


iso_function = '_cdb_{function}_exception_safe'.format(function=function)
# TODO: use **options argument?
options = {
'is_destination': is_destination,
'mode_type': mode_type,
Expand All @@ -168,10 +178,10 @@ def _iso_areas(self,
iso_options = "ARRAY[{opts}]".format(opts=','.join(iso_options))
iso_ranges = 'ARRAY[{ranges}]'.format(ranges=','.join([str(r) for r in ranges]))

sql = _areas_query(
source_query, source_columns, iso_function, mode, iso_ranges, iso_options, source_has_id or exclusive)
sql = _areas_query(source_query, source_col, iso_function, mode, iso_ranges, iso_options)

if exclusive:
sql = _rings_query(sql, source_has_id)
sql = _rings_query(sql)

# Execute and download the query to generate the isolines
cdf = read_carto(sql, self._credentials)
Expand All @@ -185,7 +195,7 @@ def _iso_areas(self,
to_carto(cdf, table_name, self._credentials, if_exists, log_enabled=dry_run)

if source_manager.is_dataframe():
del cdf['cartodb_id']
del cdf[CARTO_INDEX_KEY]

if temporary_table_name:
delete_table(temporary_table_name, self._credentials, log_enabled=False)
Expand All @@ -197,20 +207,12 @@ def _iso_areas(self,
return result


def _areas_query(source_query, source_columns, iso_function, mode, iso_ranges, iso_options, with_source_id):
select_source_id = 'source_id,' if with_source_id else ''
source_id = ''
if with_source_id:
if 'cartodb_id' in source_columns:
source_id = '_source.cartodb_id AS source_id,'
else:
source_id = 'row_number() over () AS source_id,'

def _areas_query(source_query, source_col, iso_function, mode, iso_ranges, iso_options):
return """
WITH _source AS ({source_query}),
_iso_areas AS (
SELECT
{source_id}
_source.{source_col} AS source_id,
{iso_function}(
_source.the_geom,
'{mode}',
Expand All @@ -221,31 +223,25 @@ def _areas_query(source_query, source_columns, iso_function, mode, iso_ranges, i
)
SELECT
row_number() OVER () AS cartodb_id,
{select_source_id}
source_id,
(_area).data_range,
(_area).the_geom
FROM _iso_areas
""".format(
iso_function=iso_function,
source_query=source_query,
source_id=source_id,
select_source_id=select_source_id,
source_col=source_col,
iso_function=iso_function,
mode=mode,
iso_ranges=iso_ranges,
iso_options=iso_options
)


def _rings_query(areas_query, with_source_id):
if with_source_id:
select_source_id = 'source_id,'
else:
select_source_id = 'row_number() OVER () AS source_id,'

def _rings_query(areas_query):
return """
SELECT
cartodb_id,
{select_source_id}
source_id,
data_range,
COALESCE(
LAG(data_range, 1) OVER (PARTITION BY source_id ORDER BY data_range),
Expand All @@ -257,6 +253,5 @@ def _rings_query(areas_query, with_source_id):
) AS the_geom
FROM ({areas_query}) _areas_query
""".format(
select_source_id=select_source_id,
areas_query=areas_query
)