feat(logic): added strip_extra_white field;

- Added `strip_extra_white` field and form fields. - Used `strip_extra_white` to control stripping white space.
ckan · May 14, 2024 · 50080ea · 50080ea
1 parent 54f87e0
commit 50080ea
Show file tree

Hide file tree

Showing 5 changed files with 33 additions and 36 deletions.
diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
@@ -177,10 +177,13 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
         existing = datastore_resource_exists(resource_id)
         existing_info = {}
         if existing:
-            existing_fields = existing.get('fields', [])
+            ds_info = p.toolkit.get_action('datastore_info')({'ignore_auth': True}, {'id': resource_id})
+            existing_fields = ds_info.get('fields', [])
             existing_info = dict((f['id'], f['info'])
                                  for f in existing_fields
                                  if 'info' in f)
+            existing_fields_by_headers = dict((f['id'], f)
+                                              for f in existing_fields)
 
             # Column types are either set (overridden) in the Data Dictionary page
             # or default to text type (which is robust)
@@ -195,6 +198,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
             for f in fields:
                 if f['id'] in existing_info:
                     f['info'] = existing_info[f['id']]
+                    f['strip_extra_white'] = existing_fields_by_headers[f['id']].get('strip_extra_white', True)
 
             '''
             Delete or truncate existing datastore table before proceeding,
@@ -211,7 +215,8 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
         else:
             fields = [
                 {'id': header_name,
-                 'type': 'text',}
+                 'type': 'text',
+                 'strip_extra_white': True,}
                 for header_name in headers]
 
         logger.info('Fields: %s', fields)
@@ -225,7 +230,7 @@ def strip_white_space_iter():
                     for row in super_iter():
                         for _index, _cell in enumerate(row):
                             # only strip white space if strip_extra_white is True
-                            if fields[_index].get('info', {}).get('strip_extra_white', True) and isinstance(_cell, str):
+                            if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
                                 row[_index] = _cell.strip()
                         yield row
                 stream.iter = strip_white_space_iter
@@ -238,7 +243,7 @@ def strip_white_space_iter():
                     for row in super_iter():
                         for _index, _cell in enumerate(row):
                             # only strip white space if strip_extra_white is True
-                            if fields[_index].get('info', {}).get('strip_extra_white', True) and isinstance(_cell, str):
+                            if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
                                 row[_index] = _cell.strip()
                         yield row
                 stream.iter = strip_white_space_iter
@@ -388,10 +393,13 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     existing = datastore_resource_exists(resource_id)
     existing_info = None
     if existing:
-        existing_fields = existing.get('fields', [])
+        ds_info = p.toolkit.get_action('datastore_info')({'ignore_auth': True}, {'id': resource_id})
+        existing_fields = ds_info.get('fields', [])
         existing_info = dict(
             (f['id'], f['info'])
             for f in existing_fields if 'info' in f)
+        existing_fields_by_headers = dict((f['id'], f)
+                                          for f in existing_fields)
 
     # Some headers might have been converted from strings to floats and such.
     headers = encode_headers(headers)
@@ -403,7 +411,7 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
 
     TYPES, TYPE_MAPPING = get_types()
     types = type_guess(stream.sample[1:], types=TYPES, strict=True)
-    info = []
+    fields = []
 
     # override with types user requested
     if existing_info:
@@ -415,11 +423,10 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
             }.get(existing_info.get(h, {}).get('type_override'), t)
             for t, h in zip(types, headers)]
         for h in headers:
-            info.append(existing_info.get(h, {}))
-
+            fields.append(existing_fields_by_headers.get(h, {}))
 
     headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()]
-    type_converter = TypeConverter(types=types, info=info)
+    type_converter = TypeConverter(types=types, fields=fields)
 
     with UnknownEncodingStream(table_filepath, file_format, decoding_result,
                                skip_rows=skip_rows,
@@ -440,6 +447,7 @@ def row_iterator():
             for h in headers_dicts:
                 if h['id'] in existing_info:
                     h['info'] = existing_info[h['id']]
+                    h['strip_extra_white'] = existing_fields_by_headers[h['id']].get('strip_extra_white', True)
                     # create columns with types user requested
                     type_override = existing_info[h['id']].get('type_override')
                     if type_override in list(_TYPE_MAPPING.values()):

diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
@@ -18,9 +18,9 @@ class TypeConverter:
     as desired.
     """
 
-    def __init__(self, types=None, info=None):
+    def __init__(self, types=None, fields=None):
         self.types = types
-        self.info = info
+        self.fields = fields
 
     def convert_types(self, extended_rows):
         """ Try converting cells to numbers or timestamps if applicable.
@@ -32,9 +32,9 @@ def convert_types(self, extended_rows):
             for cell_index, cell_value in enumerate(row):
                 if cell_value is None:
                     row[cell_index] = ''
-                if self.info:
+                if self.fields:
                     # only strip white space if strip_extra_white is True
-                    if self.info[cell_index].get('strip_extra_white', True) and isinstance(cell_value, str):
+                    if self.fields[cell_index].get('strip_extra_white', True) and isinstance(cell_value, str):
                         cell_value = cell_value.strip()
                         row[cell_index] = cell_value.strip()
                 if not cell_value:

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
@@ -10,7 +10,7 @@
 from ckan.model.resource import Resource
 from ckan.model.package import Package
 
-from . import action, auth, helpers as xloader_helpers, utils, validators
+from . import action, auth, helpers as xloader_helpers, utils
 from ckanext.xloader.utils import XLoaderFormats
 
 try:
@@ -35,7 +35,6 @@ class xloaderPlugin(plugins.SingletonPlugin):
     plugins.implements(plugins.IResourceController, inherit=True)
     plugins.implements(plugins.IClick)
     plugins.implements(plugins.IBlueprint)
-    plugins.implements(plugins.IValidators)
     plugins.implements(IDataDictionaryForm, inherit=True)
 
     # IClick
@@ -210,18 +209,20 @@ def get_helpers(self):
             "is_resource_supported_by_xloader": xloader_helpers.is_resource_supported_by_xloader,
         }
 
-    # IValidators
-
-    def get_validators(self):
-        return {'xloader_datastore_fields_validator': validators.datastore_fields_validator}
-
     # IDataDictionaryForm
 
     def update_datastore_create_schema(self, schema):
-        info_validator = toolkit.get_validator('xloader_datastore_fields_validator')
-        schema['fields']['info'] = [info_validator] + schema['fields']['info']
+        default = toolkit.get_validator('default')
+        boolean_validator = toolkit.get_validator('boolean_validator')
+        to_datastore_plugin_data = toolkit.get_validator('to_datastore_plugin_data')
+        schema['fields']['strip_extra_white'] = [default(True), boolean_validator, to_datastore_plugin_data('xloader')]
         return schema
 
+    def update_datastore_info_field(self, field, plugin_data):
+        # expose all our non-secret plugin data in the field
+        field.update(plugin_data.get('xloader', {}))
+        return field
+
 
 def _should_remove_unsupported_resource_from_datastore(res_dict):
     if not toolkit.asbool(toolkit.config.get('ckanext.xloader.clean_datastore_tables', False)):

diff --git a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
@@ -3,9 +3,9 @@
 
 {% block additional_fields %}
   {{ super() }}
-  {{ form.select('info__' ~ position ~ '__strip_extra_white',
+  {{ form.select('fields__' ~ position ~ '__strip_extra_white',
     label=_('Strip Extra Leading and Trailing White Space'), options=[
     {'text': 'Yes', 'value': true},
     {'text': 'No', 'value': false},
-    ], selected=field.get('info', {}).get('strip_extra_white')) }}
+    ], selected=field.get('strip_extra_white')) }}
 {% endblock %}
diff --git a/ckanext/xloader/validators.py b/ckanext/xloader/validators.py