kobotoolbox · noliveleger · Dec 13, 2018 · Dec 13, 2018 · Dec 13, 2018 · Dec 14, 2018
diff --git a/src/formpack/pack.py b/src/formpack/pack.py
@@ -77,7 +77,6 @@ def version_id_keys(self, _versions=None):
                 _id_keys.append(_id_key)
         return _id_keys
 
-
     @property
     def available_translations(self):
         translations = set()
@@ -191,25 +190,30 @@ def summr(v):
         return ''.join(out)
 
     @staticmethod
-    def _combine_field_choices(old_field, new_field):
+    def _combine_field_choices(older_version_field, current_field):
         """
-        Update `new_field.choice` so that it contains everything from
-        `old_field.choice`. In the event of a conflict, `new_field.choice`
+        Updates `current_field.choice` so that it contains everything from
+        `older_version_field.choice`. In the event of a conflict, `current_field.choice`
         wins. If either field does not have a `choice` attribute, do
         nothing
 
-        :param old_field: FormField
-        :param new_field: FormField
+        :param older_version_field: FormField
+        :param current_field: FormField
         :return: FormField. Updated new_field
         """
+
         try:
-            old_choice = old_field.choice
-            new_choice = new_field.choice
-            new_field.merge_choice(old_choice)
+            if hasattr(current_field, "merge_choice"):
+                older_version_choice = older_version_field.choice
+                current_field.merge_choice(older_version_choice)
         except AttributeError:
             pass
 
-        return new_field
+        return current_field
+
+    @staticmethod
+    def _do_fields_match(older_versioned_field, current_field):
+        return older_versioned_field.signature == current_field.signature
 
     def get_fields_for_versions(self, versions=-1, data_types=None):
 
@@ -233,7 +237,6 @@ def get_fields_for_versions(self, versions=-1, data_types=None):
             if isinstance(data_types, str_types):
                 data_types = [data_types]
 
-
         # tmp2 is a 2 dimensions list of `field`.
         # First dimension is the position of fields where they should be in the latest version
         # Second dimension is their position in the stack at the same position.
@@ -272,16 +275,23 @@ def get_fields_for_versions(self, versions=-1, data_types=None):
             for section_name, section in version.sections.items():
                 for field_name, field_object in section.fields.items():
                     if not isinstance(field_object, CopyField):
+                        add_field = True
                         if field_name in positions:
                             position = positions[field_name]
                             latest_field_object = tmp2d[position[0]][position[1]]
                             # Because versions_desc are ordered from latest to oldest,
                             # we use current field object as the old one and the one already
                             # in position as the latest one.
-                            new_object = self._combine_field_choices(
-                                field_object, latest_field_object)
-                            tmp2d[position[0]][position[1]] = new_object
-                        else:
+
+                            if self._do_fields_match(field_object, latest_field_object):
+                                new_object = self._combine_field_choices(
+                                    field_object, latest_field_object)
+                                tmp2d[position[0]][position[1]] = new_object
+                                add_field = False
+                            else:
+                                field_object.use_unique_name = True
+
+                        if add_field:
                             try:
                                 current_index_list = tmp2d[index]
                                 current_index_list.append(field_object)
@@ -292,7 +302,7 @@ def get_fields_for_versions(self, versions=-1, data_types=None):
                                 # it can happen when current version has more items than newest one.
                                 index = len(tmp2d) - 1
 
-                            positions[field_name] = (index, len(tmp2d[index]) - 1)
+                            positions[field_object.contextual_name] = (index, len(tmp2d[index]) - 1)
 
                         index += 1
 

diff --git a/src/formpack/reporting/autoreport.py b/src/formpack/reporting/autoreport.py
@@ -49,11 +49,21 @@ def _get_version_id_from_submission(self, submission):
 
     def _calculate_stats(self, submissions, fields, versions, lang):
 
-        metrics = {field.name: Counter() for field in fields}
+        metrics = {field.contextual_name: Counter() for field in fields}
 
         submissions_count = 0
         submission_counts_by_version = Counter()
 
+        # When form contains questions with the same name with different types,
+        # Older found versions are pushed at the end of the list `fields`
+        # Because we want to match submission values with fields, we need to try
+        # to match with older version first.
+        # For example: Form contains two versions with one question.
+        # `reversed_fields` look this:
+        # [<FormField type="text" contextual_name="question_text_v123456">,
+        #  <FormField type="integer" contextual_name="question">]
+        reversed_fields = list(reversed(fields))
+
         for entry in submissions:
 
             version_id = self._get_version_id_from_submission(entry)
@@ -62,14 +72,37 @@ def _calculate_stats(self, submissions, fields, versions, lang):
 
             submissions_count += 1
             submission_counts_by_version[version_id] += 1
+            fields_to_skip = []
 
             # TODO: do we really need FormSubmission ?
             entry = FormSubmission(entry).data
-            for field in fields:
-                if field.has_stats:
-                    counter = metrics[field.name]
+
+            for field in reversed_fields:
+                if field.has_stats and field.name not in fields_to_skip:
+                    counter = metrics[field.contextual_name]
                     raw_value = entry.get(field.path)
+                    field_contextual_name = "{}_{}_{}".format(
+                        field.name,
+                        field.data_type,
+                        version_id
+                    )
+
                     if raw_value is not None:
+                        # Reminder `field.contextual_name` equals `<name>_<type>_<version_uid>`,
+                        # If `field.contextual_name` matches this pattern, it's only valid for submissions of
+                        # the same `<version_uid>`.
+                        # Thanks to `reversed_fields`, we compared those fields before the ones of the latest version
+                        if field.contextual_name.startswith("{}_{}_v".format(
+                            field.name,
+                            field.data_type
+                        )):
+                            # We have a match, we won't evaluate other fields with same name
+                            # for this submission.
+                            if field.contextual_name == field_contextual_name:
+                                fields_to_skip.append(field.name)
+                            else:
+                                continue
+
                         values = list(field.parse_values(raw_value))
                         counter.update(values)
                         counter['__submissions__'] += 1
@@ -80,7 +113,7 @@ def stats_generator():
             for field in fields:
                 yield (field,
                        field.get_labels(lang)[0],
-                       field.get_stats(metrics[field.name], lang=lang))
+                       field.get_stats(metrics[field.contextual_name], lang=lang))
 
         return AutoReportStats(self, stats_generator(), submissions_count,
                                submission_counts_by_version)
@@ -110,7 +143,7 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel
         #              field_name2...},
         #         ...}
         #
-        metrics = {f.name: defaultdict(Counter) for f in fields}
+        metrics = {f.contextual_name: defaultdict(Counter) for f in fields}
 
         for sbmssn in submissions:
 
@@ -142,7 +175,7 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel
                     else:
                         values = (None,)
 
-                    value_metrics = metrics[field.name]
+                    value_metrics = metrics[field.contextual_name]
 
                     for value in values:
                         counters = value_metrics[value]
@@ -177,7 +210,7 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel
 
         def stats_generator():
             for field in fields:
-                stats = field.get_disaggregated_stats(metrics[field.name], lang=lang,
+                stats = field.get_disaggregated_stats(metrics[field.contextual_name], lang=lang,
                                                       top_splitters=top_splitters)
                 yield (field, field.get_labels(lang)[0], stats)
 
@@ -194,11 +227,11 @@ def get_stats(self, submissions, fields=(), lang=UNSPECIFIED_TRANSLATION, split_
             fields = all_fields
         else:
             fields.add(split_by)
-            fields = [field for field in all_fields if field.name in fields]
+            fields = [field for field in all_fields if field.contextual_name in fields]
 
         if split_by:
             try:
-                split_by_field = next(f for f in fields if f.name == split_by)
+                split_by_field = next(f for f in fields if f.contextual_name == split_by)
             except StopIteration:
                 raise ValueError('No field matching name "%s" '
                                  'for split_by' % split_by)

diff --git a/src/formpack/reporting/export.py b/src/formpack/reporting/export.py
@@ -170,7 +170,7 @@ def get_fields_labels_tags_for_all_versions(self,
 
         for field in all_fields:
             section_fields.setdefault(field.section.name, []).append(
-                (field.name, field)
+                (field.contextual_name, field)
             )
             section_labels.setdefault(field.section.name, []).append(
                 field.get_labels(lang, group_sep,
@@ -197,7 +197,6 @@ def get_fields_labels_tags_for_all_versions(self,
                         auto_field_names.append(
                             "_submission_{}".format(copy_field))
 
-
         # Flatten field labels and names. Indeed, field.get_labels()
         # and self.names return a list because a multiple select field can
         # have several values. We needed them grouped to insert them at the
@@ -218,7 +217,7 @@ def get_fields_labels_tags_for_all_versions(self,
                 # labels, add the tags once for each label
                 tags.extend(
                     [flatten_tag_list(field.tags, tag_cols_and_seps)] *
-                        len(field.value_names)
+                    len(field.value_names)
                 )
 
             names = [name for name_list in name_lists for name in name_list]

diff --git a/src/formpack/schema/datadef.py b/src/formpack/schema/datadef.py
@@ -23,15 +23,26 @@ class FormDataDef(object):
 
     def __init__(self, name, labels=None, has_stats=False, *args, **kwargs):
         self.name = name
+        self.unique_name = name
+        self.use_unique_name = False
         self.labels = labels or {}
-        self.value_names = self.get_value_names()
         self.has_stats = has_stats
 
     def __repr__(self):
-        return "<%s name='%s'>" % (self.__class__.__name__, self.name)
+        return "<%s name='%s'>" % (self.__class__.__name__, self.contextual_name)
+
+    @property
+    def contextual_name(self):
+        if self.use_unique_name:
+            return self.unique_name
+        return self.name
+
+    @property
+    def value_names(self):
+        return self.get_value_names()
 
     def get_value_names(self):
-        return [self.name]
+        return [self.contextual_name]
 
     @classmethod
     def from_json_definition(cls, definition, translations=None):
@@ -48,6 +59,9 @@ def _extract_json_labels(cls, definition, translations):
             labels = {}
         return labels
 
+    def create_unique_name(self, suffix):
+        pass
+
 
 class FormGroup(FormDataDef):  # useful to get __repr__
     pass
@@ -79,7 +93,7 @@ def from_json_definition(cls, definition, hierarchy=(None,), parent=None,
         return cls(definition['name'], labels, hierarchy=hierarchy, parent=parent)
 
     def get_label(self, lang=UNSPECIFIED_TRANSLATION):
-        return [self.labels.get(lang) or self.name]
+        return [self.labels.get(lang) or self.contextual_name]
 
     def __repr__(self):
         parent_name = getattr(self.parent, 'name', None)