From 7fb1ad995a82b9c400bd3a7963d7aa728fdd1855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 13 Dec 2018 15:10:53 -0500 Subject: [PATCH 01/14] Creates a unique_name when field is added to FormVersion --- src/formpack/schema/datadef.py | 4 ++++ src/formpack/schema/fields.py | 13 +++++++++++++ src/formpack/version.py | 3 ++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/formpack/schema/datadef.py b/src/formpack/schema/datadef.py index 421ea6d3..9480d53f 100644 --- a/src/formpack/schema/datadef.py +++ b/src/formpack/schema/datadef.py @@ -23,6 +23,7 @@ class FormDataDef(object): def __init__(self, name, labels=None, has_stats=False, *args, **kwargs): self.name = name + self.unique_name = name self.labels = labels or {} self.value_names = self.get_value_names() self.has_stats = has_stats @@ -48,6 +49,9 @@ def _extract_json_labels(cls, definition, translations): labels = {} return labels + def create_unique_name(self, suffix): + pass + class FormGroup(FormDataDef): # useful to get __repr__ pass diff --git a/src/formpack/schema/fields.py b/src/formpack/schema/fields.py index 637b3f4f..9a0f7333 100644 --- a/src/formpack/schema/fields.py +++ b/src/formpack/schema/fields.py @@ -55,6 +55,19 @@ def __init__(self, name, labels, data_type, hierarchy=None, # do not include the root section in the path self.path = '/'.join(info.name for info in self.hierarchy[1:]) + def create_unique_name(self, suffix): + self.unique_name = "{signature}_{suffix}".format( + signature=self.signature, + suffix=suffix + ) + + @property + def signature(self): + return "{name}_{type}".format( + name=self.name, + type=self.data_type, + ) + def get_labels(self, lang=UNSPECIFIED_TRANSLATION, group_sep="/", hierarchy_in_labels=False, multiple_select="both"): """ Return a list of labels for this field. diff --git a/src/formpack/version.py b/src/formpack/version.py index baed6785..d010d538 100644 --- a/src/formpack/version.py +++ b/src/formpack/version.py @@ -127,7 +127,7 @@ def __init__(self, form_pack, schema): for data_definition in survey: data_type = data_definition.get('type') - if not data_type: # handle broken data type definition + if not data_type: # handle broken data type definition continue data_type = normalize_data_type(data_type) @@ -191,6 +191,7 @@ def __init__(self, form_pack, schema): hierarchy, section, field_choices, translations=self.translations) + field.create_unique_name(self.id) section.fields[field.name] = field _f = fields_by_name[field.name] From 5af7218dca99d3cc98cfb260c2e3b08072f52c3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 13 Dec 2018 18:13:58 -0500 Subject: [PATCH 02/14] Exports supports data for same questions with different types --- src/formpack/pack.py | 41 ++++++++++++++++++---------- src/formpack/reporting/autoreport.py | 16 +++++------ src/formpack/reporting/export.py | 5 ++-- src/formpack/schema/datadef.py | 20 ++++++++++---- src/formpack/schema/fields.py | 34 +++++++++++------------ 5 files changed, 68 insertions(+), 48 deletions(-) diff --git a/src/formpack/pack.py b/src/formpack/pack.py index 275ad0fa..5e73c212 100644 --- a/src/formpack/pack.py +++ b/src/formpack/pack.py @@ -77,7 +77,6 @@ def version_id_keys(self, _versions=None): _id_keys.append(_id_key) return _id_keys - @property def available_translations(self): translations = set() @@ -191,25 +190,30 @@ def summr(v): return ''.join(out) @staticmethod - def _combine_field_choices(old_field, new_field): + def _combine_field_choices(older_version_field, current_field): """ - Update `new_field.choice` so that it contains everything from - `old_field.choice`. In the event of a conflict, `new_field.choice` + Updates `current_field.choice` so that it contains everything from + `older_version_field.choice`. In the event of a conflict, `current_field.choice` wins. If either field does not have a `choice` attribute, do nothing - :param old_field: FormField - :param new_field: FormField + :param older_version_field: FormField + :param current_field: FormField :return: FormField. Updated new_field """ + try: - old_choice = old_field.choice - new_choice = new_field.choice - new_field.merge_choice(old_choice) + if hasattr(current_field, "merge_choice"): + older_version_choice = older_version_field.choice + current_field.merge_choice(older_version_choice) except AttributeError: pass - return new_field + return current_field + + @staticmethod + def _do_fields_match(older_versioned_field, current_field): + return older_versioned_field.signature == current_field.signature def get_fields_for_versions(self, versions=-1, data_types=None): @@ -271,16 +275,23 @@ def get_fields_for_versions(self, versions=-1, data_types=None): for section_name, section in version.sections.items(): for field_name, field_object in section.fields.items(): if not isinstance(field_object, CopyField): + add_field = True if field_name in positions: position = positions[field_name] latest_field_object = tmp2d[position[0]][position[1]] # Because versions_desc are ordered from latest to oldest, # we use current field object as the old one and the one already # in position as the latest one. - new_object = self._combine_field_choices( - field_object, latest_field_object) - tmp2d[position[0]][position[1]] = new_object - else: + + if self._do_fields_match(field_object, latest_field_object): + new_object = self._combine_field_choices( + field_object, latest_field_object) + tmp2d[position[0]][position[1]] = new_object + add_field = False + else: + field_object.use_unique_name = True + + if add_field: try: current_index_list = tmp2d[index] current_index_list.append(field_object) @@ -291,7 +302,7 @@ def get_fields_for_versions(self, versions=-1, data_types=None): # it can happen when current version has more items than newest one. index = len(tmp2d) - 1 - positions[field_name] = (index, len(tmp2d[index]) - 1) + positions[field_object.contextual_name] = (index, len(tmp2d[index]) - 1) index += 1 diff --git a/src/formpack/reporting/autoreport.py b/src/formpack/reporting/autoreport.py index b2e9a9b9..4ae7b216 100644 --- a/src/formpack/reporting/autoreport.py +++ b/src/formpack/reporting/autoreport.py @@ -49,7 +49,7 @@ def _get_version_id_from_submission(self, submission): def _calculate_stats(self, submissions, fields, versions, lang): - metrics = {field.name: Counter() for field in fields} + metrics = {field.contextual_name: Counter() for field in fields} submissions_count = 0 submission_counts_by_version = Counter() @@ -67,7 +67,7 @@ def _calculate_stats(self, submissions, fields, versions, lang): entry = FormSubmission(entry).data for field in fields: if field.has_stats: - counter = metrics[field.name] + counter = metrics[field.contextual_name] raw_value = entry.get(field.path) if raw_value is not None: values = list(field.parse_values(raw_value)) @@ -80,7 +80,7 @@ def stats_generator(): for field in fields: yield (field, field.get_labels(lang)[0], - field.get_stats(metrics[field.name], lang=lang)) + field.get_stats(metrics[field.contextual_name], lang=lang)) return AutoReportStats(self, stats_generator(), submissions_count, submission_counts_by_version) @@ -110,7 +110,7 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel # field_name2...}, # ...} # - metrics = {f.name: defaultdict(Counter) for f in fields} + metrics = {f.contextual_name: defaultdict(Counter) for f in fields} for sbmssn in submissions: @@ -142,7 +142,7 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel else: values = (None,) - value_metrics = metrics[field.name] + value_metrics = metrics[field.contextual_name] for value in values: counters = value_metrics[value] @@ -177,7 +177,7 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel def stats_generator(): for field in fields: - stats = field.get_disaggregated_stats(metrics[field.name], lang=lang, + stats = field.get_disaggregated_stats(metrics[field.contextual_name], lang=lang, top_splitters=top_splitters) yield (field, field.get_labels(lang)[0], stats) @@ -194,11 +194,11 @@ def get_stats(self, submissions, fields=(), lang=UNSPECIFIED_TRANSLATION, split_ fields = all_fields else: fields.add(split_by) - fields = [field for field in all_fields if field.name in fields] + fields = [field for field in all_fields if field.contextual_name in fields] if split_by: try: - split_by_field = next(f for f in fields if f.name == split_by) + split_by_field = next(f for f in fields if f.contextual_name == split_by) except StopIteration: raise ValueError('No field matching name "%s" ' 'for split_by' % split_by) diff --git a/src/formpack/reporting/export.py b/src/formpack/reporting/export.py index c8ab7752..16d864d3 100644 --- a/src/formpack/reporting/export.py +++ b/src/formpack/reporting/export.py @@ -170,7 +170,7 @@ def get_fields_labels_tags_for_all_versions(self, for field in all_fields: section_fields.setdefault(field.section.name, []).append( - (field.name, field) + (field.contextual_name, field) ) section_labels.setdefault(field.section.name, []).append( field.get_labels(lang, group_sep, @@ -197,7 +197,6 @@ def get_fields_labels_tags_for_all_versions(self, auto_field_names.append( "_submission_{}".format(copy_field)) - # Flatten field labels and names. Indeed, field.get_labels() # and self.names return a list because a multiple select field can # have several values. We needed them grouped to insert them at the @@ -218,7 +217,7 @@ def get_fields_labels_tags_for_all_versions(self, # labels, add the tags once for each label tags.extend( [flatten_tag_list(field.tags, tag_cols_and_seps)] * - len(field.value_names) + len(field.value_names) ) names = [name for name_list in name_lists for name in name_list] diff --git a/src/formpack/schema/datadef.py b/src/formpack/schema/datadef.py index 9480d53f..e12f2e06 100644 --- a/src/formpack/schema/datadef.py +++ b/src/formpack/schema/datadef.py @@ -24,15 +24,25 @@ class FormDataDef(object): def __init__(self, name, labels=None, has_stats=False, *args, **kwargs): self.name = name self.unique_name = name + self.use_unique_name = False self.labels = labels or {} - self.value_names = self.get_value_names() self.has_stats = has_stats def __repr__(self): - return "<%s name='%s'>" % (self.__class__.__name__, self.name) + return "<%s name='%s'>" % (self.__class__.__name__, self.contextual_name) + + @property + def contextual_name(self): + if self.use_unique_name: + return self.unique_name + return self.name + + @property + def value_names(self): + return self.get_value_names() def get_value_names(self): - return [self.name] + return [self.contextual_name] @classmethod def from_json_definition(cls, definition, translations=None): @@ -83,11 +93,11 @@ def from_json_definition(cls, definition, hierarchy=(None,), parent=None, return cls(definition['name'], labels, hierarchy=hierarchy, parent=parent) def get_label(self, lang=UNSPECIFIED_TRANSLATION): - return [self.labels.get(lang) or self.name] + return [self.labels.get(lang) or self.contextual_name] def __repr__(self): parent_name = getattr(self.parent, 'name', None) - return "" % (self.name, parent_name) + return "" % (self.contextual_name, parent_name) class FormChoice(FormDataDef): diff --git a/src/formpack/schema/fields.py b/src/formpack/schema/fields.py index 4e818333..7ae32377 100644 --- a/src/formpack/schema/fields.py +++ b/src/formpack/schema/fields.py @@ -136,12 +136,12 @@ def _get_label(self, lang=UNSPECIFIED_TRANSLATION, group_sep='/', return group_sep.join(path) # even if `lang` can be None, we don't want the `label` to be None. - label = self.labels.get(lang, self.name) + label = self.labels.get(lang, self.contextual_name) # If `label` is None, no matches are found, so return `field` name. - return self.name if label is None else label + return self.contextual_name if label is None else label def __repr__(self): - args = (self.__class__.__name__, self.name, self.data_type) + args = (self.__class__.__name__, self.contextual_name, self.data_type) return "<%s name='%s' type='%s'>" % args @classmethod @@ -218,7 +218,7 @@ def from_json_definition(cls, definition, hierarchy=None, return data_type_classes.get(data_type, cls)(**args) def format(self, val, lang=UNSPECIFIED_TRANSLATION, context=None): - return {self.name: val} + return {self.contextual_name: val} def get_stats(self, metrics, lang=UNSPECIFIED_TRANSLATION, limit=100): @@ -548,7 +548,7 @@ def __init__(self, name, hierarchy=(None,), section=None, *args, **kwargs): def get_labels(self, *args, **kwargs): """ Labels are the just the value name. Groups are ignored """ - return [self.name] + return [self.contextual_name] class ValidationStatusCopyField(CopyField): @@ -567,9 +567,9 @@ def format(self, val, lang=UNSPECIFIED_TRANSLATION, context=None): if isinstance(val, dict): if lang == UNSPECIFIED_TRANSLATION: - value = {self.name: val.get("uid", "")} + value = {self.contextual_name: val.get("uid", "")} else: - value = {self.name: val.get("label", "")} + value = {self.contextual_name: val.get("label", "")} else: value = super(CopyField, self).format(val=val, lang=lang, context=context) @@ -617,10 +617,10 @@ def get_labels(self, lang=UNSPECIFIED_TRANSLATION, group_sep='/', def get_value_names(self, multiple_select="both"): """ Return the list of field identifiers used by this field""" names = [] - names.append(self.name) + names.append(self.contextual_name) for data_type in ('latitude', 'longitude', 'altitude', 'precision'): - names.append('_%s_%s' % (self.name, data_type)) + names.append('_%s_%s' % (self.contextual_name, data_type)) return names @@ -678,7 +678,7 @@ def get_translation(self, val, lang=UNSPECIFIED_TRANSLATION): def format(self, val, lang=UNSPECIFIED_TRANSLATION, multiple_select="both"): val = self.get_translation(val, lang) - return {self.name: val} + return {self.contextual_name: val} def get_stats(self, metrics, lang=UNSPECIFIED_TRANSLATION, limit=100): @@ -792,15 +792,15 @@ def get_value_names(self, multiple_select="both"): """ Return the list of field identifiers used by this field""" names = [] if multiple_select in ("both", "summary"): - names.append(self.name) + names.append(self.contextual_name) if multiple_select in ("both", "details"): for option_name in self.choice.options.keys(): - names.append(self.name + '/' + option_name) + names.append(self.contextual_name + '/' + option_name) return names def __repr__(self): - data = (self.name, self.data_type) + data = (self.contextual_name, self.data_type) return "" % data # maybe try to cache those @@ -823,11 +823,11 @@ def format(self, val, lang=UNSPECIFIED_TRANSLATION, res.append(label) else: res.append(v) - cells[self.name] = " ".join(res) + cells[self.contextual_name] = " ".join(res) if multiple_select in ("both", "details"): for choice in val.split(): - cells[self.name + "/" + choice] = "1" + cells[self.contextual_name + "/" + choice] = "1" return cells def parse_values(self, raw_values): @@ -875,8 +875,8 @@ def __init__(self, *args, **kwargs): def parameter_value_names(self): # Value names must be unique across the entire form! return [ - self.name + '/' + name - for name, label in self.parameters_in_use + self.contextual_name + '/' + name + for name, label in self.parameters_in_use ] def get_labels(self, lang=UNSPECIFIED_TRANSLATION, group_sep='/', From 965245502fe86554502e15fe3be33134e7e3c2c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 14 Dec 2018 10:03:19 -0500 Subject: [PATCH 03/14] Changed unittest to match new representation of FormField --- tests/test_autoreport.py | 245 +++++++++++++++++++-------------------- 1 file changed, 122 insertions(+), 123 deletions(-) diff --git a/tests/test_autoreport.py b/tests/test_autoreport.py index b3381ffa..67c74d16 100644 --- a/tests/test_autoreport.py +++ b/tests/test_autoreport.py @@ -10,12 +10,12 @@ class TestAutoReport(unittest.TestCase): - maxDiff = None def assertDictEquals(self, arg1, arg2): def _j(arg): return json.dumps(arg, indent=4, sort_keys=True) + assert _j(arg1) == _j(arg2) def test_list_fields_on_packs(self): @@ -40,7 +40,7 @@ def test_list_fields_from_many_versions_on_packs(self): fields = { field.name: field for field in fp.get_fields_for_versions( - fp.versions.keys()) + fp.versions.keys()) } field_names = sorted(fields.keys()) self.assertListEqual(field_names, [ @@ -67,7 +67,6 @@ def test_list_fields_from_many_versions_on_packs(self): 'FormChoiceField', ]) - def test_simple_report(self): title, schemas, submissions = build_fixture('restaurant_profile') @@ -82,7 +81,7 @@ def test_simple_report(self): expected = [ ( - "", + "", 'nom du restaurant', {'frequency': [('Taco Truck', 1), ('Harvest', 1), @@ -98,7 +97,7 @@ def test_simple_report(self): 'total_count': 4} ), ( - "", + "", 'lieu', {'not_provided': 0, 'provided': 4, @@ -106,7 +105,7 @@ def test_simple_report(self): 'total_count': 4} ), ( - "", + "", 'type de restaurant', {'frequency': [('traditionnel', 2), ('avec vente \xe0 emporter', 1)], 'not_provided': 1, @@ -134,103 +133,103 @@ def test_simple_multi_version_report(self): self.assertListEqual(stats, [ ( - "", + "", u'inspector', - {u'frequency': [(u'burger', 5), (u'clouseau', 5)], - u'not_provided': 0, - u'percentage': [(u'burger', 50.0), (u'clouseau', 50.0)], - u'provided': 10, - u'show_graph': False, - u'total_count': 10} + {u'frequency': [(u'burger', 5), (u'clouseau', 5)], + u'not_provided': 0, + u'percentage': [(u'burger', 50.0), (u'clouseau', 50.0)], + u'provided': 10, + u'show_graph': False, + u'total_count': 10} ), ( - "", + "", u'did_you_find_the_site', - {u'frequency': [(0, 4), (1, 4), (u'yes', 1), (u'no', 1)], - u'not_provided': 0, - u'percentage': [(0, 40.0), - (1, 40.0), - (u'yes', 10.0), - (u'no', 10.0)], - u'provided': 10, - u'show_graph': True, - u'total_count': 10} + {u'frequency': [(0, 4), (1, 4), (u'yes', 1), (u'no', 1)], + u'not_provided': 0, + u'percentage': [(0, 40.0), + (1, 40.0), + (u'yes', 10.0), + (u'no', 10.0)], + u'provided': 10, + u'show_graph': True, + u'total_count': 10} ), ( - "", + "", u'was_there_damage_to_the_site', - {u'frequency': [(0, 2), (1, 2), (u'yes', 1)], - u'not_provided': 5, - u'percentage': [(0, 40.0), (1, 40.0), (u'yes', 20.0)], - u'provided': 5, - u'show_graph': True, - u'total_count': 10} + {u'frequency': [(0, 2), (1, 2), (u'yes', 1)], + u'not_provided': 5, + u'percentage': [(0, 40.0), (1, 40.0), (u'yes', 20.0)], + u'provided': 5, + u'show_graph': True, + u'total_count': 10} ), ( - "", + "", u'was_there_damage_to_the_site_dupe', - {u'frequency': [(1, 1), (u'yes', 1)], - u'not_provided': 8, - u'percentage': [(1, 50.0), (u'yes', 50.0)], - u'provided': 2, - u'show_graph': True, - u'total_count': 10} + {u'frequency': [(1, 1), (u'yes', 1)], + u'not_provided': 8, + u'percentage': [(1, 50.0), (u'yes', 50.0)], + u'provided': 2, + u'show_graph': True, + u'total_count': 10} ), ( - "", + "", u'ping', - {u'mean': 238.4, - u'median': 123, - u'mode': u'*', - u'not_provided': 5, - u'provided': 5, - u'show_graph': False, - u'stdev': 255.77392361224003, - u'total_count': 10} + {u'mean': 238.4, + u'median': 123, + u'mode': u'*', + u'not_provided': 5, + u'provided': 5, + u'show_graph': False, + u'stdev': 255.77392361224003, + u'total_count': 10} ), ( - "", + "", u'rssi', - {u'mean': 63.8, - u'median': u'65', - u'mode': u'*', - u'not_provided': 5, - u'provided': 5, - u'show_graph': False, - u'stdev': 35.22357165308481, - u'total_count': 10} + {u'mean': 63.8, + u'median': u'65', + u'mode': u'*', + u'not_provided': 5, + u'provided': 5, + u'show_graph': False, + u'stdev': 35.22357165308481, + u'total_count': 10} ), ( - "", + "", u'is_the_gate_secure', - {u'frequency': [(0, 2), (1, 2), (u'no', 1)], - u'not_provided': 5, - u'percentage': [(0, 40.0), (1, 40.0), (u'no', 20.0)], - u'provided': 5, - u'show_graph': True, - u'total_count': 10} + {u'frequency': [(0, 2), (1, 2), (u'no', 1)], + u'not_provided': 5, + u'percentage': [(0, 40.0), (1, 40.0), (u'no', 20.0)], + u'provided': 5, + u'show_graph': True, + u'total_count': 10} ), ( - "", + "", u'is_plant_life_encroaching', - {u'frequency': [(0, 1), (1, 3), (u'yes', 1)], - u'not_provided': 5, - u'percentage': [(0, 20.0), (1, 60.0), (u'yes', 20.0)], - u'provided': 5, - u'show_graph': True, - u'total_count': 10} + {u'frequency': [(0, 1), (1, 3), (u'yes', 1)], + u'not_provided': 5, + u'percentage': [(0, 20.0), (1, 60.0), (u'yes', 20.0)], + u'provided': 5, + u'show_graph': True, + u'total_count': 10} ), ( - "", + "", u'please_rate_the_impact_of_any_defects_observed', - {u'frequency': [(u'moderate', 4), (u'severe', 3), (u'low', 3)], - u'not_provided': 0, - u'percentage': [(u'moderate', 40.0), - (u'severe', 30.0), - (u'low', 30.0)], - u'provided': 10, - u'show_graph': True, - u'total_count': 10} + {u'frequency': [(u'moderate', 4), (u'severe', 3), (u'low', 3)], + u'not_provided': 0, + u'percentage': [(u'moderate', 40.0), + (u'severe', 30.0), + (u'low', 30.0)], + u'provided': 10, + u'show_graph': True, + u'total_count': 10} ) ]) @@ -247,47 +246,47 @@ def test_rich_report(self): stats = [(unicode(repr(f)), n, d) for f, n, d in stats] expected = [ - ("", - 'restaurant_name', - {'frequency': [('Felipes', 2), - ('The other one', 2), - ('That one', 1)], - 'not_provided': 1, - 'percentage': [('Felipes', 33.33), - ('The other one', 33.33), - ('That one', 16.67)], - 'provided': 5, - 'show_graph': False, - 'total_count': 6}), - ("", - 'location', - {'not_provided': 1, - 'provided': 5, - 'show_graph': False, - 'total_count': 6}), - ("", - 'when', - {'frequency': [('2001-01-01', 2), - ('2002-01-01', 2), - ('2003-01-01', 1)], - 'not_provided': 1, - 'percentage': [('2001-01-01', 33.33), - ('2002-01-01', 33.33), - ('2003-01-01', 16.67)], + ("", + 'restaurant_name', + {'frequency': [('Felipes', 2), + ('The other one', 2), + ('That one', 1)], + 'not_provided': 1, + 'percentage': [('Felipes', 33.33), + ('The other one', 33.33), + ('That one', 16.67)], + 'provided': 5, + 'show_graph': False, + 'total_count': 6}), + ("", + 'location', + {'not_provided': 1, + 'provided': 5, + 'show_graph': False, + 'total_count': 6}), + ("", + 'when', + {'frequency': [('2001-01-01', 2), + ('2002-01-01', 2), + ('2003-01-01', 1)], + 'not_provided': 1, + 'percentage': [('2001-01-01', 33.33), + ('2002-01-01', 33.33), + ('2003-01-01', 16.67)], - 'provided': 5, - 'show_graph': True, - 'total_count': 6}), - ("", - 'howmany', - {'mean': 1.6, - 'median': 2, - 'mode': 2, - 'not_provided': 1, - 'provided': 5, - 'show_graph': False, - 'stdev': 0.5477225575051661, - 'total_count': 6} + 'provided': 5, + 'show_graph': True, + 'total_count': 6}), + ("", + 'howmany', + {'mean': 1.6, + 'median': 2, + 'mode': 2, + 'not_provided': 1, + 'provided': 5, + 'show_graph': False, + 'stdev': 0.5477225575051661, + 'total_count': 6} ) ] for (i, stat) in enumerate(stats): @@ -307,7 +306,7 @@ def test_disaggregate(self): stats = [(unicode(repr(f)), n, d) for f, n, d in stats] expected = [ - ("", + ("", 'restaurant_name', {'not_provided': 1, 'provided': 5, @@ -334,13 +333,13 @@ def test_disaggregate(self): 'percentage': [('2001-01-01', 0.00), ('2002-01-01', 0.00), ('2003-01-01', 16.67)]})]}), - ("", + ("", 'location', {'not_provided': 1, 'provided': 5, 'show_graph': False, 'total_count': 6}), - ("", + ("", 'howmany', {'not_provided': 1, 'provided': 5, @@ -350,14 +349,14 @@ def test_disaggregate(self): {'mean': 1.5, 'median': 1.5, 'mode': '*', - 'stdev': 0.7071067811865476}), + 'stdev': 0.7071067811865476}), ('2002-01-01', {'mean': 2.0, 'median': 2.0, 'mode': 2, 'stdev': 0.0}), ('2003-01-01', {'mean': 1.0, 'median': 1, 'mode': '*', - 'stdev': u'*'}))})] + 'stdev': u'*'}))})] for (i, stat) in enumerate(stats): assert stat == expected[i] From 5953cde0952a900c60f8675992a052c7c9d8190f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 14 Dec 2018 10:04:26 -0500 Subject: [PATCH 04/14] Fixed stats calculation when multiple versions contains same question with different types --- src/formpack/reporting/autoreport.py | 39 ++++++++++++++++++++++++++-- src/formpack/schema/datadef.py | 2 +- src/formpack/schema/fields.py | 4 +-- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/formpack/reporting/autoreport.py b/src/formpack/reporting/autoreport.py index 4ae7b216..7fa40dd6 100644 --- a/src/formpack/reporting/autoreport.py +++ b/src/formpack/reporting/autoreport.py @@ -54,6 +54,16 @@ def _calculate_stats(self, submissions, fields, versions, lang): submissions_count = 0 submission_counts_by_version = Counter() + # When form contains questions with the same name with different types, + # Older found versions are pushed at the end of the list `fields` + # Because we want to match submission values with fields, we need to try + # to match with older version first. + # For example: Form contains two versions with one question. + # `reversed_fields` look this: + # [, + # ] + reversed_fields = list(reversed(fields)) + for entry in submissions: version_id = self._get_version_id_from_submission(entry) @@ -62,14 +72,39 @@ def _calculate_stats(self, submissions, fields, versions, lang): submissions_count += 1 submission_counts_by_version[version_id] += 1 + fields_to_skip = [] # TODO: do we really need FormSubmission ? entry = FormSubmission(entry).data - for field in fields: - if field.has_stats: + + for field in reversed_fields: + if field.has_stats and field.name not in fields_to_skip: counter = metrics[field.contextual_name] raw_value = entry.get(field.path) + field_contextual_name = "{}_{}_{}".format( + field.name, + field.data_type, + version_id + ) + if raw_value is not None: + # Matches with question with same name but different type + # takes place here. + # Reminder `field.contextual_name` equals `__`, + # If `field.contextual_name` matchs this pattern, it's only valid for submissions of + # the same ``. + # Thanks to `reversed_fields`, we compared those fields before the ones of the latest version + if field.contextual_name.startswith("{}_{}_v".format( + field.name, + field.data_type + )): + # We have a match, we won't evaluate other fields with same name + # for this submission. + if field.contextual_name == field_contextual_name: + fields_to_skip.append(field.name) + else: + continue + values = list(field.parse_values(raw_value)) counter.update(values) counter['__submissions__'] += 1 diff --git a/src/formpack/schema/datadef.py b/src/formpack/schema/datadef.py index e12f2e06..88124b85 100644 --- a/src/formpack/schema/datadef.py +++ b/src/formpack/schema/datadef.py @@ -97,7 +97,7 @@ def get_label(self, lang=UNSPECIFIED_TRANSLATION): def __repr__(self): parent_name = getattr(self.parent, 'name', None) - return "" % (self.contextual_name, parent_name) + return "" % (self.name, parent_name) class FormChoice(FormDataDef): diff --git a/src/formpack/schema/fields.py b/src/formpack/schema/fields.py index 7ae32377..8c81484b 100644 --- a/src/formpack/schema/fields.py +++ b/src/formpack/schema/fields.py @@ -142,7 +142,7 @@ def _get_label(self, lang=UNSPECIFIED_TRANSLATION, group_sep='/', def __repr__(self): args = (self.__class__.__name__, self.contextual_name, self.data_type) - return "<%s name='%s' type='%s'>" % args + return "<%s contextual_name='%s' type='%s'>" % args @classmethod def from_json_definition(cls, definition, hierarchy=None, @@ -801,7 +801,7 @@ def get_value_names(self, multiple_select="both"): def __repr__(self): data = (self.contextual_name, self.data_type) - return "" % data + return "" % data # maybe try to cache those def format(self, val, lang=UNSPECIFIED_TRANSLATION, From cc58e06a9b829e1bf6de9539184ecb1520ebe288 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 14 Dec 2018 10:23:59 -0500 Subject: [PATCH 05/14] Fixed typo --- src/formpack/reporting/autoreport.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/formpack/reporting/autoreport.py b/src/formpack/reporting/autoreport.py index 7fa40dd6..df62ddb9 100644 --- a/src/formpack/reporting/autoreport.py +++ b/src/formpack/reporting/autoreport.py @@ -88,10 +88,8 @@ def _calculate_stats(self, submissions, fields, versions, lang): ) if raw_value is not None: - # Matches with question with same name but different type - # takes place here. # Reminder `field.contextual_name` equals `__`, - # If `field.contextual_name` matchs this pattern, it's only valid for submissions of + # If `field.contextual_name` matches this pattern, it's only valid for submissions of # the same ``. # Thanks to `reversed_fields`, we compared those fields before the ones of the latest version if field.contextual_name.startswith("{}_{}_v".format( From f4809a4e89fdc838a237b86f0fc98b9935402990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 14 Dec 2018 11:06:22 -0500 Subject: [PATCH 06/14] Used property 'contextual_name' instead of 'name' --- tests/test_autoreport.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_autoreport.py b/tests/test_autoreport.py index 67c74d16..6432f506 100644 --- a/tests/test_autoreport.py +++ b/tests/test_autoreport.py @@ -25,7 +25,7 @@ def test_list_fields_on_packs(self): fields = fp.get_fields_for_versions() - field_names = [field.name for field in fields] + field_names = [field.contextual_name for field in fields] assert field_names == ['restaurant_name', 'location', 'eatery_type'] field_types = [field.__class__.__name__ for field in fields] @@ -39,7 +39,7 @@ def test_list_fields_from_many_versions_on_packs(self): self.assertEqual(len(fp.versions), 5) fields = { - field.name: field for field in fp.get_fields_for_versions( + field.contextual_name: field for field in fp.get_fields_for_versions( fp.versions.keys()) } field_names = sorted(fields.keys()) From dc693133429f1566ff62e440734e6021b5ae3d32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 14 Dec 2018 11:17:42 -0500 Subject: [PATCH 07/14] Changed unittests for multiple versions with questions with same name but different type --- tests/test_exports.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/test_exports.py b/tests/test_exports.py index 4ca6213f..55ad1b46 100644 --- a/tests/test_exports.py +++ b/tests/test_exports.py @@ -1427,7 +1427,8 @@ def test_copy_fields_multiple_versions(self): 'fields': ['restaurant_name', 'location', '_location_latitude', '_location_longitude', '_location_altitude', '_location_precision', 'eatery_type', - 'eatery_type/sit_down', 'eatery_type/takeaway', '_uuid'], + 'eatery_type/sit_down', 'eatery_type/takeaway', + 'eatery_type_select_one_rpV3', '_uuid'], 'data': [ [ 'Felipes', @@ -1439,6 +1440,7 @@ def test_copy_fields_multiple_versions(self): '', '', '', + '', '5dd6ecda-b993-42fc-95c2-7856a8940acf', ], @@ -1452,6 +1454,7 @@ def test_copy_fields_multiple_versions(self): '', '', '', + '', 'd6dee2e1-e0e6-4d08-9ad4-d78d77079f85', ], @@ -1462,9 +1465,10 @@ def test_copy_fields_multiple_versions(self): '-25.43', '', '', - 'takeaway', '', '', + '', + 'takeaway', '3f2ac742-305a-4b0d-b7ef-f7f57fcd14dc', ], @@ -1475,9 +1479,10 @@ def test_copy_fields_multiple_versions(self): '-24.53', '', '', - 'sit_down', '', '', + '', + 'sit_down', '3195b926-1578-4bac-80fc-735129a34090', ], @@ -1491,6 +1496,7 @@ def test_copy_fields_multiple_versions(self): 'takeaway sit_down', '1', '1', + '', '04cbcf32-ecbd-4801-829b-299463dcd125', ], @@ -1504,6 +1510,7 @@ def test_copy_fields_multiple_versions(self): 'sit_down', '1', '0', + '', '1f21b881-db1d-4629-9b82-f4111630187d', ], @@ -1517,6 +1524,7 @@ def test_copy_fields_multiple_versions(self): '', '0', '0', + '', 'fda7e49b-6c84-4cfe-b1a8-3de997ac0880', ], @@ -1530,6 +1538,7 @@ def test_copy_fields_multiple_versions(self): '', '', '', + '', 'a4277940-c8f3-4564-ad3b-14e28532a976', ] ] From 3985489bbba3b03e285ce1080b2ef6da6b3702b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 14 Dec 2018 15:35:44 -0500 Subject: [PATCH 08/14] Removed useless try/except when trying parsing values of NumField --- src/formpack/schema/fields.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/formpack/schema/fields.py b/src/formpack/schema/fields.py index 8c81484b..452015d8 100644 --- a/src/formpack/schema/fields.py +++ b/src/formpack/schema/fields.py @@ -525,14 +525,10 @@ def get_disaggregated_stats(self, metrics, top_splitters, return stats def parse_values(self, raw_values): - try: - if self.data_type == "integer": - yield int(raw_values) - else: - yield float(raw_values) - except ValueError as e: - # TODO Remove try/except when https://github.com/kobotoolbox/formpack/issues/151 is fixed - logging.warning(str(e), exc_info=True) + if self.data_type == "integer": + yield int(raw_values) + else: + yield float(raw_values) class CopyField(FormField): From dc05c1de4b9a98027a218ef61ef8ad665f14636e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Tue, 18 Dec 2018 17:58:49 -0500 Subject: [PATCH 09/14] Applied requested changes for PR #187 --- src/formpack/pack.py | 5 ++--- src/formpack/reporting/autoreport.py | 30 +++++++++++++--------------- src/formpack/reporting/export.py | 5 ++--- src/formpack/schema/fields.py | 22 +++++++++++++++++--- 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/src/formpack/pack.py b/src/formpack/pack.py index 5e73c212..2aed1973 100644 --- a/src/formpack/pack.py +++ b/src/formpack/pack.py @@ -203,9 +203,8 @@ def _combine_field_choices(older_version_field, current_field): """ try: - if hasattr(current_field, "merge_choice"): - older_version_choice = older_version_field.choice - current_field.merge_choice(older_version_choice) + older_version_choice = older_version_field.choice + current_field.merge_choice(older_version_choice) except AttributeError: pass diff --git a/src/formpack/reporting/autoreport.py b/src/formpack/reporting/autoreport.py index df62ddb9..e31aaa07 100644 --- a/src/formpack/reporting/autoreport.py +++ b/src/formpack/reporting/autoreport.py @@ -81,26 +81,24 @@ def _calculate_stats(self, submissions, fields, versions, lang): if field.has_stats and field.name not in fields_to_skip: counter = metrics[field.contextual_name] raw_value = entry.get(field.path) - field_contextual_name = "{}_{}_{}".format( - field.name, - field.data_type, - version_id - ) if raw_value is not None: - # Reminder `field.contextual_name` equals `__`, - # If `field.contextual_name` matches this pattern, it's only valid for submissions of - # the same ``. - # Thanks to `reversed_fields`, we compared those fields before the ones of the latest version - if field.contextual_name.startswith("{}_{}_v".format( - field.name, - field.data_type - )): - # We have a match, we won't evaluate other fields with same name - # for this submission. - if field.contextual_name == field_contextual_name: + # Because `field.path` is the same for all fields which + # have the same name, we want to be sure we don't append + # data multiple times. + + # If `field.use_unique_name` is `True`, `data` could be + # mapped to it depending on entry's version ID. + if field.use_unique_name: + if field.contextual_name == field.get_unique_name(version_id): + # We have a match. Skip other fields with the same name + # for this submission fields_to_skip.append(field.name) else: + # If we reach this line, it's because user has changed + # the type of question more than once and + # version is not the correct one yet. + # We need to keep looking for the good one. continue values = list(field.parse_values(raw_value)) diff --git a/src/formpack/reporting/export.py b/src/formpack/reporting/export.py index 16d864d3..200ac076 100644 --- a/src/formpack/reporting/export.py +++ b/src/formpack/reporting/export.py @@ -215,10 +215,9 @@ def get_fields_labels_tags_for_all_versions(self, # Add the tags for this field. If the field has multiple # labels, add the tags once for each label - tags.extend( - [flatten_tag_list(field.tags, tag_cols_and_seps)] * + tags_list = [flatten_tag_list(field.tags, tag_cols_and_seps)] * \ len(field.value_names) - ) + tags.extend(tags_list) names = [name for name_list in name_lists for name in name_list] diff --git a/src/formpack/schema/fields.py b/src/formpack/schema/fields.py index 452015d8..f8cdcf59 100644 --- a/src/formpack/schema/fields.py +++ b/src/formpack/schema/fields.py @@ -56,13 +56,31 @@ def __init__(self, name, labels, data_type, hierarchy=None, self.path = '/'.join(info.name for info in self.hierarchy[1:]) def create_unique_name(self, suffix): - self.unique_name = "{signature}_{suffix}".format( + self.unique_name = self.get_unique_name(suffix) + + def get_unique_name(self, suffix): + """ + Returns a unique name based on `self.signature` and `suffix`. + + :param suffix: str + :return: str + """ + return "{signature}_{suffix}".format( signature=self.signature, suffix=suffix ) @property def signature(self): + """ + Returns a string signature based `self.name` and `self.data_type`. + + Useful to compare two fields to each other to determine whether they + are the same. (same name, same type) + + :param suffix: str + :return: str + """ return "{name}_{type}".format( name=self.name, type=self.data_type, @@ -730,9 +748,7 @@ def merge_choice(self, choice): combined_options = choice.options.copy() combined_options.update(self.choice.options) self.choice.options = combined_options - self._empty_result() - self.value_names = self.get_value_names() def _empty_result(self): """ From 50b5a2e82aa98078986e265c5f351e2ee5db5d0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Tue, 18 Dec 2018 18:06:04 -0500 Subject: [PATCH 10/14] Changed FormDataDef representation string --- src/formpack/schema/datadef.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/formpack/schema/datadef.py b/src/formpack/schema/datadef.py index 88124b85..6dd4e9b3 100644 --- a/src/formpack/schema/datadef.py +++ b/src/formpack/schema/datadef.py @@ -29,7 +29,7 @@ def __init__(self, name, labels=None, has_stats=False, *args, **kwargs): self.has_stats = has_stats def __repr__(self): - return "<%s name='%s'>" % (self.__class__.__name__, self.contextual_name) + return "<%s contextual_name='%s'>" % (self.__class__.__name__, self.contextual_name) @property def contextual_name(self): @@ -134,7 +134,6 @@ def all_from_json_definition(cls, definition, translation_list): option['name'] = choice_name return all_choices - @property def translations(self): for option in self.options.values(): From 16a896a28a258db9a21b69fc4d6e4b856ed8057a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 19 Dec 2018 12:15:04 -0500 Subject: [PATCH 11/14] Applied PEP-8 guidance --- src/formpack/reporting/export.py | 7 ++++--- tests/test_autoreport.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/formpack/reporting/export.py b/src/formpack/reporting/export.py index 200ac076..967b07d3 100644 --- a/src/formpack/reporting/export.py +++ b/src/formpack/reporting/export.py @@ -215,9 +215,10 @@ def get_fields_labels_tags_for_all_versions(self, # Add the tags for this field. If the field has multiple # labels, add the tags once for each label - tags_list = [flatten_tag_list(field.tags, tag_cols_and_seps)] * \ - len(field.value_names) - tags.extend(tags_list) + tags.extend( + [flatten_tag_list(field.tags, tag_cols_and_seps)] + * len(field.value_names) + ) names = [name for name_list in name_lists for name in name_list] diff --git a/tests/test_autoreport.py b/tests/test_autoreport.py index 6432f506..ccdbf64d 100644 --- a/tests/test_autoreport.py +++ b/tests/test_autoreport.py @@ -40,7 +40,7 @@ def test_list_fields_from_many_versions_on_packs(self): fields = { field.contextual_name: field for field in fp.get_fields_for_versions( - fp.versions.keys()) + fp.versions.keys()) } field_names = sorted(fields.keys()) self.assertListEqual(field_names, [ From d1c54293d1bf1021b23bcd27fba0673098a16bb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 19 Dec 2018 12:16:42 -0500 Subject: [PATCH 12/14] Removed useless import --- src/formpack/schema/fields.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/formpack/schema/fields.py b/src/formpack/schema/fields.py index f8cdcf59..cda87438 100644 --- a/src/formpack/schema/fields.py +++ b/src/formpack/schema/fields.py @@ -6,7 +6,6 @@ from operator import itemgetter from functools import partial -import logging try: xrange = xrange From f0ddf7d68b792cdf7c59101ee79dc631bbcd3d0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 20 Dec 2018 10:01:51 -0500 Subject: [PATCH 13/14] Applied logic of '_calculate_stats()' to '_disaggregate_stats()' in order to match data with correct field --- src/formpack/reporting/autoreport.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/formpack/reporting/autoreport.py b/src/formpack/reporting/autoreport.py index e31aaa07..c5e161c9 100644 --- a/src/formpack/reporting/autoreport.py +++ b/src/formpack/reporting/autoreport.py @@ -128,6 +128,7 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel submission_counts_by_version = Counter() fields = [f for f in fields if f != split_by_field] + reversed_fields = list(reversed(fields)) # Then we map fields, values and splitters: # {field_name1: { @@ -143,10 +144,10 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel # metrics = {f.contextual_name: defaultdict(Counter) for f in fields} - for sbmssn in submissions: + for submission in submissions: # Skip unrequested versions - version_id = self._get_version_id_from_submission(sbmssn) + version_id = self._get_version_id_from_submission(submission) if version_id not in versions: continue @@ -159,16 +160,35 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel # since we are going to pop one entry, we make a copy # of it to avoid side effect - entry = dict(FormSubmission(sbmssn).data) + entry = dict(FormSubmission(submission).data) splitter = entry.pop(split_by_field.path, None) + fields_to_skip = [] - for field in fields: + for field in reversed_fields: if field.has_stats: raw_value = entry.get(field.path) if raw_value is not None: + # Because `field.path` is the same for all fields which + # have the same name, we want to be sure we don't append + # data multiple times. + + # If `field.use_unique_name` is `True`, `data` could be + # mapped to it depending on entry's version ID. + if field.use_unique_name: + if field.contextual_name == field.get_unique_name(version_id): + # We have a match. Skip other fields with the same name + # for this submission + fields_to_skip.append(field.name) + else: + # If we reach this line, it's because user has changed + # the type of question more than once and + # version is not the correct one yet. + # We need to keep looking for the good one. + continue + values = field.parse_values(raw_value) else: values = (None,) From 785ebdabdf8bcea602dfab441d71a966f5f23d86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Mon, 8 Jul 2019 10:52:28 -0400 Subject: [PATCH 14/14] Restored try/except to handle unexpected values as blank when calculating stats --- src/formpack/reporting/autoreport.py | 12 +++++++++--- tests/test_autoreport.py | 6 +++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/formpack/reporting/autoreport.py b/src/formpack/reporting/autoreport.py index d70a0cba..3c14a4e8 100644 --- a/src/formpack/reporting/autoreport.py +++ b/src/formpack/reporting/autoreport.py @@ -102,9 +102,15 @@ def _calculate_stats(self, submissions, fields, versions, lang): # We need to keep looking for the good one. continue - values = list(field.parse_values(raw_value)) - counter.update(values) - counter['__submissions__'] += 1 + try: + values = list(field.parse_values(raw_value)) + except ValueError as e: + logging.warning(str(e), exc_info=True) + # Treat the bad value as a blank response + counter[None] += 1 + else: + counter.update(values) + counter['__submissions__'] += 1 else: counter[None] += 1 diff --git a/tests/test_autoreport.py b/tests/test_autoreport.py index 66e28df5..d53d6a51 100644 --- a/tests/test_autoreport.py +++ b/tests/test_autoreport.py @@ -382,10 +382,10 @@ def test_disaggregate_extended_fields(self): assert percentage_responses[-1] == "..." def test_stats_with_non_numeric_value_for_numeric_field(self): - ''' + """ A string response to an integer question, for example, should not cause a crash; it should be treated as if no response was provided - ''' + """ title = 'Just one number' schemas = [{ @@ -414,7 +414,7 @@ def test_stats_with_non_numeric_value_for_numeric_field(self): stats = [(unicode(repr(f)), n, d) for f, n, d in stats] expected = [( - "", 'the_number', + "", 'the_number', { 'mean': 20.0, 'median': 20,