Skip to content

Commit e773ca5

Browse files
committed
Add create_new_database_for_flows_with_missing_top_level_context
1 parent 7364f91 commit e773ca5

File tree

2 files changed

+166
-15
lines changed

2 files changed

+166
-15
lines changed

bw2io/importers/base_lci.py

Lines changed: 94 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
ParameterizedExchange,
1414
ProjectParameter,
1515
)
16-
import randonneur as rd
16+
import randonneur as rn
1717

1818
from ..errors import NonuniqueCode, StrategyError, WrongDatabase
1919
from ..export.excel import write_lci_matching
@@ -25,6 +25,7 @@
2525
link_iterable_by_fields,
2626
link_technosphere_based_on_name_unit_location,
2727
link_technosphere_by_activity_hash,
28+
match_against_only_available_in_given_context_tree,
2829
match_against_top_level_context,
2930
normalize_units,
3031
strip_biosphere_exc_locations,
@@ -33,6 +34,30 @@
3334
from .base import ImportBase
3435

3536

37+
EXCHANGE_SPECIFIC_KEYS = (
38+
"amount",
39+
"functional",
40+
"loc",
41+
"maximum",
42+
"minimum",
43+
"output",
44+
"scale",
45+
"shape",
46+
"temporal_distribution",
47+
"uncertainty type",
48+
"uncertainty_type",
49+
)
50+
51+
52+
def _reformat_biosphere_exc_as_new_node(exc: dict, db_name: str) -> dict:
53+
return {k: v for k, v in exc.items() if k not in EXCHANGE_SPECIFIC_KEYS} | {
54+
"type": labels.biosphere_node_default,
55+
"exchanges": [],
56+
"database": db_name,
57+
"code": activity_hash(exc),
58+
}
59+
60+
3661
class LCIImporter(ImportBase):
3762
"""Base class for format-specific importers.
3863
@@ -399,8 +424,8 @@ def match_database(
399424
def match_database_against_top_level_context(
400425
self,
401426
other_db_name: str,
402-
fields: Optional[List[str]] = None,
403-
kinds: Optional[List[str]] = None,
427+
fields: List[str] = ["name", "unit", "categories"],
428+
kinds: List[str] = labels.biosphere_edge_types,
404429
# randonneur_transformations: Optional[list] = None
405430
) -> None:
406431
"""
@@ -461,23 +486,79 @@ def match_database_against_only_available_in_given_context_tree(
461486
)
462487
)
463488

489+
def create_new_database_for_flows_with_missing_top_level_context(
490+
self,
491+
target_db_name: str,
492+
placeholder_db_name: str,
493+
fields: List[str] = ["name", "unit", "categories"],
494+
kinds: List[str] = labels.biosphere_edge_types,
495+
) -> None:
496+
"""
497+
Create proxy datasets for flows who have corresponding flows in another database, but not
498+
with the given top-level context.
499+
500+
In other words, if we are trying to match `{'name': 'foo', 'categories': ['foo']}`, and
501+
our corresponding database only has `{'name': 'foo', 'categories': ['bar']}`, then we can
502+
create a placeholder dataset in a new database, as no amount of category manipulation will
503+
result in a match in the given target database.
504+
"""
505+
506+
def get_key(
507+
obj: dict, fields: List[str], include_categories: bool = True
508+
) -> tuple:
509+
return tuple(
510+
[obj.get(field) for field in fields]
511+
+ ([tuple(obj["categories"])[0]] if include_categories else [])
512+
)
513+
514+
if target_db_name not in databases:
515+
raise StrategyError(f"Can't find target database {target_db_name}")
516+
if "categories" not in fields:
517+
raise StrategyError("`fields` must include `categories`")
518+
519+
placeholder = Database(placeholder_db_name)
520+
if placeholder_db_name not in databases:
521+
placeholder.register(
522+
format=self.format,
523+
comment=f"Database for unlinked biosphere flows with wrong top-level context from {self.db_name}. Generated by `bw2io` method `create_new_database_for_flows_with_missing_top_level_context`",
524+
)
525+
526+
ffields = [field for field in fields if field != "categories"]
527+
mapping = {
528+
get_key(obj, ffields): obj.key
529+
for obj in Database(target_db_name)
530+
if obj.get("categories")
531+
}
532+
existence = {
533+
get_key(obj, ffields, False)
534+
for obj in Database(target_db_name)
535+
if obj.get("categories")
536+
}
537+
538+
for ds in self.data:
539+
for exc in filter(
540+
lambda x: "input" not in x and x.get("type") in kinds,
541+
ds.get("exchanges", []),
542+
):
543+
if (
544+
get_key(exc, ffields) not in mapping
545+
and get_key(exc, ffields, False) in existence
546+
):
547+
node = placeholder.new_node(
548+
**_reformat_biosphere_exc_as_new_node(exc, placeholder_db_name)
549+
)
550+
node.save()
551+
exc["input"] = node.key
552+
464553
def create_new_biosphere(self, biosphere_name: str):
465554
"""Create new biosphere database from unlinked biosphere flows in ``self.data``"""
466555
if biosphere_name in databases:
467556
raise ValueError(f"{biosphere_name} database already exists")
468557

469-
def reformat(exc):
470-
return exc | {
471-
"type": labels.biosphere_node_default,
472-
"exchanges": [],
473-
"database": biosphere_name,
474-
"code": activity_hash(exc),
475-
}
476-
477558
bio_data = {
478559
(flow["database"], flow["code"]): flow
479560
for flow in [
480-
reformat(exc)
561+
_reformat_biosphere_exc_as_new_node(exc, biosphere_name)
481562
for ds in self.data
482563
for exc in ds.get("exchanges", [])
483564
if exc["type"] in labels.biosphere_edge_types and not exc.get("input")
@@ -579,7 +660,7 @@ def randonneur(
579660
edges_label="exchanges",
580661
verbose=verbose,
581662
case_sensitive=case_sensitive,
582-
)
663+
),
583664
)
584665

585666
def migrate(self, migration_name):

tests/base_lci_importer.py

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
import numpy as np
44
import pytest
5-
from bw2data import Database
5+
from bw2data import Database, databases
66
from bw2data.parameters import *
77
from bw2data.tests import bw2test
88

9-
from bw2io import ExcelImporter
9+
from bw2io.errors import StrategyError
1010
from bw2io.errors import NonuniqueCode, WrongDatabase
1111
from bw2io.importers.base_lci import LCIImporter
1212

@@ -695,3 +695,73 @@ def test_delete_pe_update_still_deletes():
695695
== 1
696696
)
697697
assert ParameterizedExchange.get(group="h").formula == "6 + 7"
698+
699+
700+
@bw2test
701+
def test_create_new_database_for_flows_with_missing_top_level_context_new_database():
702+
importer = LCIImporter("testcase")
703+
importer.data = [
704+
{
705+
"exchanges": [
706+
{
707+
"type": "custom",
708+
"name": "a",
709+
"extra": True,
710+
"unit": "b",
711+
"categories": ("c", "d"),
712+
},
713+
{
714+
"type": "custom",
715+
"name": "wrong",
716+
"extra": True,
717+
"unit": "b",
718+
"categories": ("c", "d"),
719+
},
720+
{
721+
"type": "custom",
722+
"name": "a",
723+
"extra": True,
724+
"unit": "b",
725+
"categories": ("e", "c"),
726+
},
727+
]
728+
}
729+
]
730+
731+
with pytest.raises(StrategyError):
732+
importer.create_new_database_for_flows_with_missing_top_level_context(
733+
"missing",
734+
"placeholder",
735+
)
736+
737+
Database("matchable").write(
738+
{
739+
("matchable", "a"): {
740+
"name": "a",
741+
"unit": "b",
742+
"extra": True,
743+
"categories": ("e", "f"),
744+
}
745+
}
746+
)
747+
748+
with pytest.raises(StrategyError):
749+
importer.create_new_database_for_flows_with_missing_top_level_context(
750+
"matchable", "placeholder", fields=["name", "unit"]
751+
)
752+
753+
importer.create_new_database_for_flows_with_missing_top_level_context(
754+
"matchable",
755+
"placeholder",
756+
fields=["categories", "name", "unit", "extra"],
757+
kinds=["custom"],
758+
)
759+
assert "placeholder" in databases
760+
placeholder = Database("placeholder")
761+
assert len(placeholder) == 1
762+
placeholder_node = list(placeholder)[0]
763+
assert placeholder_node["name"] == "a"
764+
assert placeholder_node["unit"] == "b"
765+
assert placeholder_node["categories"] == ("c", "d")
766+
assert importer.data[0]["exchanges"][0]["input"] == placeholder_node.key
767+
assert not any("input" in exc for exc in importer.data[0]["exchanges"][1:])

0 commit comments

Comments
 (0)