From 50080ea14d725385bd25cbb2ff395b249f6599d0 Mon Sep 17 00:00:00 2001 From: Jesse Vickery Date: Tue, 14 May 2024 16:52:23 +0000 Subject: [PATCH] feat(logic): added `strip_extra_white` field; - Added `strip_extra_white` field and form fields. - Used `strip_extra_white` to control stripping white space. --- ckanext/xloader/loader.py | 26 ++++++++++++------- ckanext/xloader/parser.py | 8 +++--- ckanext/xloader/plugin.py | 19 +++++++------- .../datastore/snippets/dictionary_form.html | 4 +-- ckanext/xloader/validators.py | 12 --------- 5 files changed, 33 insertions(+), 36 deletions(-) delete mode 100644 ckanext/xloader/validators.py diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py index abe03f41..817b55c1 100644 --- a/ckanext/xloader/loader.py +++ b/ckanext/xloader/loader.py @@ -177,10 +177,13 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None): existing = datastore_resource_exists(resource_id) existing_info = {} if existing: - existing_fields = existing.get('fields', []) + ds_info = p.toolkit.get_action('datastore_info')({'ignore_auth': True}, {'id': resource_id}) + existing_fields = ds_info.get('fields', []) existing_info = dict((f['id'], f['info']) for f in existing_fields if 'info' in f) + existing_fields_by_headers = dict((f['id'], f) + for f in existing_fields) # Column types are either set (overridden) in the Data Dictionary page # or default to text type (which is robust) @@ -195,6 +198,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None): for f in fields: if f['id'] in existing_info: f['info'] = existing_info[f['id']] + f['strip_extra_white'] = existing_fields_by_headers[f['id']].get('strip_extra_white', True) ''' Delete or truncate existing datastore table before proceeding, @@ -211,7 +215,8 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None): else: fields = [ {'id': header_name, - 'type': 'text',} + 'type': 'text', + 'strip_extra_white': True,} for header_name in headers] logger.info('Fields: %s', fields) @@ -225,7 +230,7 @@ def strip_white_space_iter(): for row in super_iter(): for _index, _cell in enumerate(row): # only strip white space if strip_extra_white is True - if fields[_index].get('info', {}).get('strip_extra_white', True) and isinstance(_cell, str): + if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str): row[_index] = _cell.strip() yield row stream.iter = strip_white_space_iter @@ -238,7 +243,7 @@ def strip_white_space_iter(): for row in super_iter(): for _index, _cell in enumerate(row): # only strip white space if strip_extra_white is True - if fields[_index].get('info', {}).get('strip_extra_white', True) and isinstance(_cell, str): + if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str): row[_index] = _cell.strip() yield row stream.iter = strip_white_space_iter @@ -388,10 +393,13 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None): existing = datastore_resource_exists(resource_id) existing_info = None if existing: - existing_fields = existing.get('fields', []) + ds_info = p.toolkit.get_action('datastore_info')({'ignore_auth': True}, {'id': resource_id}) + existing_fields = ds_info.get('fields', []) existing_info = dict( (f['id'], f['info']) for f in existing_fields if 'info' in f) + existing_fields_by_headers = dict((f['id'], f) + for f in existing_fields) # Some headers might have been converted from strings to floats and such. headers = encode_headers(headers) @@ -403,7 +411,7 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None): TYPES, TYPE_MAPPING = get_types() types = type_guess(stream.sample[1:], types=TYPES, strict=True) - info = [] + fields = [] # override with types user requested if existing_info: @@ -415,11 +423,10 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None): }.get(existing_info.get(h, {}).get('type_override'), t) for t, h in zip(types, headers)] for h in headers: - info.append(existing_info.get(h, {})) - + fields.append(existing_fields_by_headers.get(h, {})) headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()] - type_converter = TypeConverter(types=types, info=info) + type_converter = TypeConverter(types=types, fields=fields) with UnknownEncodingStream(table_filepath, file_format, decoding_result, skip_rows=skip_rows, @@ -440,6 +447,7 @@ def row_iterator(): for h in headers_dicts: if h['id'] in existing_info: h['info'] = existing_info[h['id']] + h['strip_extra_white'] = existing_fields_by_headers[h['id']].get('strip_extra_white', True) # create columns with types user requested type_override = existing_info[h['id']].get('type_override') if type_override in list(_TYPE_MAPPING.values()): diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py index d27cd0ce..c587f187 100644 --- a/ckanext/xloader/parser.py +++ b/ckanext/xloader/parser.py @@ -18,9 +18,9 @@ class TypeConverter: as desired. """ - def __init__(self, types=None, info=None): + def __init__(self, types=None, fields=None): self.types = types - self.info = info + self.fields = fields def convert_types(self, extended_rows): """ Try converting cells to numbers or timestamps if applicable. @@ -32,9 +32,9 @@ def convert_types(self, extended_rows): for cell_index, cell_value in enumerate(row): if cell_value is None: row[cell_index] = '' - if self.info: + if self.fields: # only strip white space if strip_extra_white is True - if self.info[cell_index].get('strip_extra_white', True) and isinstance(cell_value, str): + if self.fields[cell_index].get('strip_extra_white', True) and isinstance(cell_value, str): cell_value = cell_value.strip() row[cell_index] = cell_value.strip() if not cell_value: diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py index 6b22d8d8..051185e6 100644 --- a/ckanext/xloader/plugin.py +++ b/ckanext/xloader/plugin.py @@ -10,7 +10,7 @@ from ckan.model.resource import Resource from ckan.model.package import Package -from . import action, auth, helpers as xloader_helpers, utils, validators +from . import action, auth, helpers as xloader_helpers, utils from ckanext.xloader.utils import XLoaderFormats try: @@ -35,7 +35,6 @@ class xloaderPlugin(plugins.SingletonPlugin): plugins.implements(plugins.IResourceController, inherit=True) plugins.implements(plugins.IClick) plugins.implements(plugins.IBlueprint) - plugins.implements(plugins.IValidators) plugins.implements(IDataDictionaryForm, inherit=True) # IClick @@ -210,18 +209,20 @@ def get_helpers(self): "is_resource_supported_by_xloader": xloader_helpers.is_resource_supported_by_xloader, } - # IValidators - - def get_validators(self): - return {'xloader_datastore_fields_validator': validators.datastore_fields_validator} - # IDataDictionaryForm def update_datastore_create_schema(self, schema): - info_validator = toolkit.get_validator('xloader_datastore_fields_validator') - schema['fields']['info'] = [info_validator] + schema['fields']['info'] + default = toolkit.get_validator('default') + boolean_validator = toolkit.get_validator('boolean_validator') + to_datastore_plugin_data = toolkit.get_validator('to_datastore_plugin_data') + schema['fields']['strip_extra_white'] = [default(True), boolean_validator, to_datastore_plugin_data('xloader')] return schema + def update_datastore_info_field(self, field, plugin_data): + # expose all our non-secret plugin data in the field + field.update(plugin_data.get('xloader', {})) + return field + def _should_remove_unsupported_resource_from_datastore(res_dict): if not toolkit.asbool(toolkit.config.get('ckanext.xloader.clean_datastore_tables', False)): diff --git a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html index 1a91b00f..afdf80ff 100644 --- a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html +++ b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html @@ -3,9 +3,9 @@ {% block additional_fields %} {{ super() }} - {{ form.select('info__' ~ position ~ '__strip_extra_white', + {{ form.select('fields__' ~ position ~ '__strip_extra_white', label=_('Strip Extra Leading and Trailing White Space'), options=[ {'text': 'Yes', 'value': true}, {'text': 'No', 'value': false}, - ], selected=field.get('info', {}).get('strip_extra_white')) }} + ], selected=field.get('strip_extra_white')) }} {% endblock %} diff --git a/ckanext/xloader/validators.py b/ckanext/xloader/validators.py deleted file mode 100644 index a14f71f3..00000000 --- a/ckanext/xloader/validators.py +++ /dev/null @@ -1,12 +0,0 @@ -from ckan.plugins.toolkit import asbool - - -def datastore_fields_validator(value, context): - if 'strip_extra_white' not in value: - # default to True - value['strip_extra_white'] = True - - # bool value for strip_extra_white - value['strip_extra_white'] = asbool(value['strip_extra_white']) - - return value