generated from linkml/linkml-template
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2013 from microbiomedata/2011-queries-to-delete-l…
…egacy-orphan-data-objects-and-functional-annotation-agg-records Delete queries for orphan functional_annotation_agg records and id non-conformaing Data Objects
- Loading branch information
Showing
5 changed files
with
81 additions
and
1 deletion.
There are no files selected for viewing
2 changes: 1 addition & 1 deletion
2
...ts/mongodb_queries/data_qc/metabolomics_analysis_acrtivity_has_calibration_data_object.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
12 changes: 12 additions & 0 deletions
12
assets/mongodb_queries/find_id_nonconforming_data_objects.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
db.getCollection('data_object_set').aggregate( | ||
[ | ||
{ | ||
$match: { | ||
id: { | ||
$not: { $regex: RegExp('nmdc:dobj-') } | ||
} | ||
} | ||
} | ||
], | ||
{ maxTimeMS: 60000, allowDiskUse: true } | ||
); |
26 changes: 26 additions & 0 deletions
26
assets/mongodb_queries/find_orphan_legacy_id_functional_annotation_agg.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
db.getCollection( | ||
'functional_annotation_agg' | ||
).aggregate( | ||
[ | ||
{ | ||
$lookup: { | ||
from: 'metagenome_annotation_activity_set', | ||
localField: 'metagenome_annotation_id', | ||
foreignField: 'id', | ||
as: 'annotation_activities' | ||
} | ||
}, | ||
{ | ||
$match: { | ||
annotation_activities: { $size: 0 } | ||
} | ||
}, | ||
{ | ||
$group: { | ||
_id: '$metagenome_annotation_id', | ||
count: { $sum: 1 } | ||
} | ||
} | ||
], | ||
{ maxTimeMS: 90000, allowDiskUse: true } | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import json | ||
import csv | ||
|
||
|
||
def gen_delete(delete_list: list, key: str, limit: int, del_coll: str): | ||
r""" | ||
Generates HTTP request bodies that can be sent to the Runtime API's | ||
`/queries:run` endpoint in order to delete documents from the database. | ||
""" | ||
if limit not in (0, 1): | ||
raise ValueError("limit must be 0 or 1.") | ||
delete_statement = [] | ||
for record in delete_list: | ||
delete_statement.append({"q": {key: r"%s" % record}, "limit": limit}) | ||
request_body_json = {"delete": del_coll, "deletes": delete_statement} | ||
request_body_file = del_coll + "_request_body.json" | ||
with open(request_body_file, "w") as f: | ||
json.dump(request_body_json, f) | ||
|
||
|
||
# delete legacy orphan functional_annotation_agg records | ||
delete_agg_list = [] | ||
with open("to_delete_nmdc.functional_annotation_agg.csv", newline="") as csvfile: | ||
agg_records = csv.reader(csvfile, delimiter=",") | ||
for row in agg_records: | ||
delete_agg_list.append(row[0]) | ||
del_coll = "functional_annotation_agg" | ||
gen_delete(delete_agg_list, "metagenome_annotation_id", 0, del_coll) | ||
|
||
# delete leagacy orphan data_object_set records | ||
# uses regex as a workaround for newline issues double backslash issues | ||
delete_do_list = [] | ||
with open("legacy_orphan.nmdc.data_object_set.csv", newline="") as csvfile: | ||
do_records = csv.reader(csvfile, delimiter=",") | ||
for row in do_records: | ||
strip_id = row[1].replace("\\n", "") | ||
print(strip_id) | ||
delete_do_list.append({"q": {"id": {"$regex": strip_id}}, "limit": 1}) | ||
request_body_json = {"delete": "data_object_set", "deletes": delete_do_list} | ||
request_body_file = "data_object_set_request_body.json" | ||
with open(request_body_file, "w") as f: | ||
json.dump(request_body_json, f) |