|
13 | 13 | ParameterizedExchange,
|
14 | 14 | ProjectParameter,
|
15 | 15 | )
|
16 |
| -import randonneur as rd |
| 16 | +import randonneur as rn |
17 | 17 |
|
18 | 18 | from ..errors import NonuniqueCode, StrategyError, WrongDatabase
|
19 | 19 | from ..export.excel import write_lci_matching
|
|
25 | 25 | link_iterable_by_fields,
|
26 | 26 | link_technosphere_based_on_name_unit_location,
|
27 | 27 | link_technosphere_by_activity_hash,
|
| 28 | + match_against_only_available_in_given_context_tree, |
28 | 29 | match_against_top_level_context,
|
29 | 30 | normalize_units,
|
30 | 31 | strip_biosphere_exc_locations,
|
|
33 | 34 | from .base import ImportBase
|
34 | 35 |
|
35 | 36 |
|
| 37 | +EXCHANGE_SPECIFIC_KEYS = ( |
| 38 | + "amount", |
| 39 | + "functional", |
| 40 | + "loc", |
| 41 | + "maximum", |
| 42 | + "minimum", |
| 43 | + "output", |
| 44 | + "scale", |
| 45 | + "shape", |
| 46 | + "temporal_distribution", |
| 47 | + "uncertainty type", |
| 48 | + "uncertainty_type", |
| 49 | +) |
| 50 | + |
| 51 | + |
| 52 | +def _reformat_biosphere_exc_as_new_node(exc: dict, db_name: str) -> dict: |
| 53 | + return {k: v for k, v in exc.items() if k not in EXCHANGE_SPECIFIC_KEYS} | { |
| 54 | + "type": labels.biosphere_node_default, |
| 55 | + "exchanges": [], |
| 56 | + "database": db_name, |
| 57 | + "code": activity_hash(exc), |
| 58 | + } |
| 59 | + |
| 60 | + |
36 | 61 | class LCIImporter(ImportBase):
|
37 | 62 | """Base class for format-specific importers.
|
38 | 63 |
|
@@ -399,8 +424,8 @@ def match_database(
|
399 | 424 | def match_database_against_top_level_context(
|
400 | 425 | self,
|
401 | 426 | other_db_name: str,
|
402 |
| - fields: Optional[List[str]] = None, |
403 |
| - kinds: Optional[List[str]] = None, |
| 427 | + fields: List[str] = ["name", "unit", "categories"], |
| 428 | + kinds: List[str] = labels.biosphere_edge_types, |
404 | 429 | # randonneur_transformations: Optional[list] = None
|
405 | 430 | ) -> None:
|
406 | 431 | """
|
@@ -461,23 +486,79 @@ def match_database_against_only_available_in_given_context_tree(
|
461 | 486 | )
|
462 | 487 | )
|
463 | 488 |
|
| 489 | + def create_new_database_for_flows_with_missing_top_level_context( |
| 490 | + self, |
| 491 | + target_db_name: str, |
| 492 | + placeholder_db_name: str, |
| 493 | + fields: List[str] = ["name", "unit", "categories"], |
| 494 | + kinds: List[str] = labels.biosphere_edge_types, |
| 495 | + ) -> None: |
| 496 | + """ |
| 497 | + Create proxy datasets for flows who have corresponding flows in another database, but not |
| 498 | + with the given top-level context. |
| 499 | +
|
| 500 | + In other words, if we are trying to match `{'name': 'foo', 'categories': ['foo']}`, and |
| 501 | + our corresponding database only has `{'name': 'foo', 'categories': ['bar']}`, then we can |
| 502 | + create a placeholder dataset in a new database, as no amount of category manipulation will |
| 503 | + result in a match in the given target database. |
| 504 | + """ |
| 505 | + |
| 506 | + def get_key( |
| 507 | + obj: dict, fields: List[str], include_categories: bool = True |
| 508 | + ) -> tuple: |
| 509 | + return tuple( |
| 510 | + [obj.get(field) for field in fields] |
| 511 | + + ([tuple(obj["categories"])[0]] if include_categories else []) |
| 512 | + ) |
| 513 | + |
| 514 | + if target_db_name not in databases: |
| 515 | + raise StrategyError(f"Can't find target database {target_db_name}") |
| 516 | + if "categories" not in fields: |
| 517 | + raise StrategyError("`fields` must include `categories`") |
| 518 | + |
| 519 | + placeholder = Database(placeholder_db_name) |
| 520 | + if placeholder_db_name not in databases: |
| 521 | + placeholder.register( |
| 522 | + format=self.format, |
| 523 | + comment=f"Database for unlinked biosphere flows with wrong top-level context from {self.db_name}. Generated by `bw2io` method `create_new_database_for_flows_with_missing_top_level_context`", |
| 524 | + ) |
| 525 | + |
| 526 | + ffields = [field for field in fields if field != "categories"] |
| 527 | + mapping = { |
| 528 | + get_key(obj, ffields): obj.key |
| 529 | + for obj in Database(target_db_name) |
| 530 | + if obj.get("categories") |
| 531 | + } |
| 532 | + existence = { |
| 533 | + get_key(obj, ffields, False) |
| 534 | + for obj in Database(target_db_name) |
| 535 | + if obj.get("categories") |
| 536 | + } |
| 537 | + |
| 538 | + for ds in self.data: |
| 539 | + for exc in filter( |
| 540 | + lambda x: "input" not in x and x.get("type") in kinds, |
| 541 | + ds.get("exchanges", []), |
| 542 | + ): |
| 543 | + if ( |
| 544 | + get_key(exc, ffields) not in mapping |
| 545 | + and get_key(exc, ffields, False) in existence |
| 546 | + ): |
| 547 | + node = placeholder.new_node( |
| 548 | + **_reformat_biosphere_exc_as_new_node(exc, placeholder_db_name) |
| 549 | + ) |
| 550 | + node.save() |
| 551 | + exc["input"] = node.key |
| 552 | + |
464 | 553 | def create_new_biosphere(self, biosphere_name: str):
|
465 | 554 | """Create new biosphere database from unlinked biosphere flows in ``self.data``"""
|
466 | 555 | if biosphere_name in databases:
|
467 | 556 | raise ValueError(f"{biosphere_name} database already exists")
|
468 | 557 |
|
469 |
| - def reformat(exc): |
470 |
| - return exc | { |
471 |
| - "type": labels.biosphere_node_default, |
472 |
| - "exchanges": [], |
473 |
| - "database": biosphere_name, |
474 |
| - "code": activity_hash(exc), |
475 |
| - } |
476 |
| - |
477 | 558 | bio_data = {
|
478 | 559 | (flow["database"], flow["code"]): flow
|
479 | 560 | for flow in [
|
480 |
| - reformat(exc) |
| 561 | + _reformat_biosphere_exc_as_new_node(exc, biosphere_name) |
481 | 562 | for ds in self.data
|
482 | 563 | for exc in ds.get("exchanges", [])
|
483 | 564 | if exc["type"] in labels.biosphere_edge_types and not exc.get("input")
|
@@ -579,7 +660,7 @@ def randonneur(
|
579 | 660 | edges_label="exchanges",
|
580 | 661 | verbose=verbose,
|
581 | 662 | case_sensitive=case_sensitive,
|
582 |
| - ) |
| 663 | + ), |
583 | 664 | )
|
584 | 665 |
|
585 | 666 | def migrate(self, migration_name):
|
|
0 commit comments