diff --git a/src/azul/service/avro_pfb.py b/src/azul/service/avro_pfb.py
index 3f689ec9a..06561c0ef 100644
--- a/src/azul/service/avro_pfb.py
+++ b/src/azul/service/avro_pfb.py
@@ -60,6 +60,7 @@
     value_and_unit,
 )
 from azul.types import (
+    AnyJSON,
     AnyMutableJSON,
     JSON,
     MutableJSON,
@@ -643,6 +644,22 @@ def _update_replica_schema(*,
                            path: Sequence[str],
                            key: str,
                            value: AnyMutableJSON):
+    """
+    Update in place a (part of an) existing PFB schema to ensure that it
+    accommodates a given (part of a) JSON document. The schema will only ever
+    expand, so after updating it will describe a superset of the documents that
+    it described pre-update. Starting from an empty schema, repeatedly calling
+    this function this allows us to discover a general schema for a series of
+    documents of unknown shape.
+    :param schema: a part of a PFB schema. It may be empty.
+    :param path: the series of field names that locate `schema` within its
+                 top-level parent schema. The first entry should be the name of
+                 the underlying PFB entity's record type.
+    :param key: the key within `schema` whose associated value will be updated
+                to describe `value`. This is the only part of `schema` that may
+                be mutated.
+    :param value: a part of a PFB entity.
+    """
     try:
         old_type = schema[key]
     except KeyError:
@@ -700,11 +717,20 @@ def _update_replica_schema(*,
 
 def _new_replica_schema(*,
                         path: Sequence[str],
-                        value: AnyMutableJSON,
+                        value: AnyJSON,
                         ) -> AnyMutableJSON:
+    """
+    Create a part of a PFB schema to describe a part of a PFB entity represented
+    as a JSON document.
+    :param path: the location of `value` within the root document as a series
+                 of keys. The first key should be the name of the underlying PFB
+                 entity's type within the schema.
+    :param value: a part of a PFB entity.
+    :return: JSON describing the contents of `value` as a part of PFB schema.
+    """
     if value is None:
         result = 'null'
-    elif isinstance(value, list):
+    elif isinstance(value, (tuple, list)):
         # Empty list indicates "no type" (emtpy union). This will be replaced
         # with an actual type unless we never encounter a non-empty array.
         result = {'type': 'array', 'items': []}
diff --git a/test/service/test_manifest.py b/test/service/test_manifest.py
index 82d2af11d..0b256528b 100644
--- a/test/service/test_manifest.py
+++ b/test/service/test_manifest.py
@@ -78,6 +78,7 @@
 )
 from azul.json import (
     copy_json,
+    json_hash,
 )
 from azul.logging import (
     configure_test_logging,
@@ -1298,12 +1299,12 @@ def test_manifest_content_disposition_header(self):
                      'The format is replica-based')
     @manifest_test
     def test_verbatim_jsonl_manifest(self):
-        bundle = self._load_canned_bundle(one(self.bundles()))
         expected = [
             {
                 'type': replica_type,
                 'contents': bundle.metadata_files[key],
             }
+            for bundle in map(self._load_canned_bundle, self.bundles())
             for replica_type, key in [
                 ('links', 'links.json'),
                 ('cell_suspension', 'cell_suspension_0.json'),
@@ -1320,12 +1321,8 @@ def test_verbatim_jsonl_manifest(self):
             for row in response.content.decode().splitlines()
         ]
 
-        def sort_key(hca_doc: JSON) -> str:
-            try:
-                return hca_doc['contents']['provenance']['document_id']
-            except KeyError:
-                assert hca_doc['contents']['schema_type'] == 'link_bundle'
-                return ''
+        def sort_key(row: JSON) -> str:
+            return json_hash(row).digest()
 
         expected.sort(key=sort_key)
         response.sort(key=sort_key)