Skip to content

Commit

Permalink
Merge pull request #2032 from microbiomedata/2031-migrations-restore-…
Browse files Browse the repository at this point in the history
…adapter-method-that-was-discarded-during-merge

Restore adapter method lost during PR #2009 merge: `do_for_each_document`
  • Loading branch information
turbomam authored Jun 5, 2024
2 parents 076d7b3 + 19f0f4f commit c2692d9
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 0 deletions.
10 changes: 10 additions & 0 deletions nmdc_schema/migrators/adapters/adapter_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,13 @@ def set_field_of_each_document(
This method is a specialized alternative to the `process_each_document` method.
"""
pass

@abstractmethod
def do_for_each_document(
self, collection_name: str, action: Callable[[dict], None]
) -> None:
r"""
Passes each document in the specified collection to the specified function. This method was designed
to facilitate iterating over all documents in a collection without actually modifying them.
"""
pass
32 changes: 32 additions & 0 deletions nmdc_schema/migrators/adapters/dictionary_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,3 +332,35 @@ def set_field_of_each_document(
if collection_name in self._db:
for document in self._db[collection_name]:
document[field_name] = value

def do_for_each_document(
self, collection_name: str, action: Callable[[dict], None]
) -> None:
r"""
Passes each document in the specified collection to the specified function. This method was designed
to facilitate iterating over all documents in a collection without actually modifying them.
>>> total = 0
>>> def add_to_total(payment: dict) -> None:
... global total
... total += payment["amount"]
>>>
>>> database = {
... "payment_set": [
... {"id": "111", "amount": 100},
... {"id": "222", "amount": 200},
... {"id": "333", "amount": 300}
... ]
... }
>>> da = DictionaryAdapter(database)
>>> total
0
>>> da.do_for_each_document("payment_set", add_to_total)
>>> total
600
"""

# Iterate over every document in the collection, if the collection exists.
if collection_name in self._db:
for document in self._db[collection_name]:
action(document)
13 changes: 13 additions & 0 deletions nmdc_schema/migrators/adapters/mongo_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,16 @@ def set_field_of_each_document(
collection = self._db.get_collection(name=collection_name)
collection.update_many({}, {"$set": {field_name: value}})

def do_for_each_document(
self, collection_name: str, action: Callable[[dict], None]
) -> None:
r"""
Passes each document in the specified collection to the specified function. This method was designed
to facilitate iterating over all documents in a collection without actually modifying them.
"""

# Iterate over every document in the collection, if the collection exists.
if collection_name in self._db.list_collection_names():
collection = self._db.get_collection(name=collection_name)
for document in collection.find():
action(document)
36 changes: 36 additions & 0 deletions nmdc_schema/migrators/adapters/test_dictionary_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,42 @@ def test_set_field_of_each_document(self):
assert len([doc for doc in collection if doc["_id"] == 2 and doc["id"] == 2 and doc["x"] == "new"]) == 1
assert len([doc for doc in collection if doc["_id"] == 3 and doc["id"] == 3 and doc["x"] == "new"]) == 1

def test_do_for_each_document(self):
# Set up:
collection_name = "my_collection"
document_1 = dict(_id=1, id=1, x="a")
document_2 = dict(_id=2, id=2, x="b")
document_3 = dict(_id=3, id=3, x="c")
self.db[collection_name] = [document_1, document_2, document_3]
assert len(self.db[collection_name]) == 3
# Temporarily add an attribute to this class instance so that
# this test has something persistent it can modify and examine.
self._characters = []

def append_x_to_sequence(doc: dict) -> None:
r"""Example pipeline stage that appends the `x` value to some list."""
self._characters.append(doc["x"])

# Invoke function-under-test:
adapter = DictionaryAdapter(database=self.db)
adapter.do_for_each_document(
collection_name, append_x_to_sequence
)

# Validate result:
# - The list consists of the `x` values from the documents in the collection.
assert len(self._characters) == 3
assert self._characters[0] == "a"
assert self._characters[1] == "b"
assert self._characters[2] == "c"
# - The collection was not modified.
collection = self.db[collection_name]
assert len([doc for doc in collection if doc["_id"] == 1 and doc["id"] == 1 and doc["x"] == "a"]) == 1
assert len([doc for doc in collection if doc["_id"] == 2 and doc["id"] == 2 and doc["x"] == "b"]) == 1
assert len([doc for doc in collection if doc["_id"] == 3 and doc["id"] == 3 and doc["x"] == "c"]) == 1

# Clean up:
delattr(self, "_characters")

def test_callbacks(self):
# Set up:
Expand Down
38 changes: 38 additions & 0 deletions nmdc_schema/migrators/adapters/test_mongo_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,44 @@ def test_set_field_of_each_document(self):
assert collection.count_documents({"_id": 2, "id": 2, "x": "new"}) == 1
assert collection.count_documents({"_id": 3, "id": 3, "x": "new"}) == 1

def test_do_for_each_document(self):
# Set up:
collection_name = "my_collection"
document_1 = dict(_id=1, id=1, x="a")
document_2 = dict(_id=2, id=2, x="b")
document_3 = dict(_id=3, id=3, x="c")
self.db.create_collection(collection_name)
self.db.get_collection(collection_name).insert_many(
[document_1, document_2, document_3]
)
# Temporarily add an attribute to this class instance so that
# this test has something persistent it can modify and examine.
self._characters = []

def append_x_to_sequence(doc: dict) -> None:
r"""Example pipeline stage that appends the `x` value to some list."""
self._characters.append(doc["x"])

# Invoke function-under-test:
adapter = MongoAdapter(database=self.db)
adapter.do_for_each_document(
collection_name, append_x_to_sequence
)

# Validate result:
# - The list consists of the `x` values from the documents in the collection.
assert len(self._characters) == 3
assert self._characters[0] == "a"
assert self._characters[1] == "b"
assert self._characters[2] == "c"
# - The collection was not modified.
collection = self.db.get_collection(collection_name)
assert collection.count_documents({"_id": 1, "id": 1, "x": "a"}) == 1
assert collection.count_documents({"_id": 2, "id": 2, "x": "b"}) == 1
assert collection.count_documents({"_id": 3, "id": 3, "x": "c"}) == 1

# Clean up:
delattr(self, "_characters")

def test_callbacks(self):
# Set up:
Expand Down

0 comments on commit c2692d9

Please sign in to comment.