Skip to content

Commit

Permalink
Merge pull request #203 from investigativedata/develop
Browse files Browse the repository at this point in the history
v0.5.1
  • Loading branch information
simonwoerpel authored Jan 3, 2024
2 parents 87042e7 + 1a5b7d6 commit 9fb0530
Show file tree
Hide file tree
Showing 14 changed files with 3,447 additions and 2,781 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.5.0
current_version = 0.5.1
commit = True
tag = True
sign_tags = True
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install poetry
Expand Down
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# * Run "pre-commit install".
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
Expand All @@ -31,13 +31,13 @@ repos:
- id: absolufy-imports

- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
args: ["--profile", "black"]

- repo: https://github.com/psf/black
rev: 23.9.1
rev: 23.12.1
hooks:
- id: black

Expand Down Expand Up @@ -66,7 +66,7 @@ repos:
- id: rst-inline-touching-normal

- repo: https://github.com/python-poetry/poetry
rev: 1.6.0
rev: 1.7.0
hooks:
- id: poetry-check
- id: poetry-lock
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.0
0.5.1
2 changes: 1 addition & 1 deletion investigraph/logic/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
if TYPE_CHECKING:
from investigraph.model import Context

from ftmq.io import make_proxy
from ftmq.util import make_proxy

from investigraph.types import CEGenerator, SDict

Expand Down
2 changes: 1 addition & 1 deletion investigraph/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def get_env(env: str, default: Any | None = None) -> Any | None:
return os.environ.get(env, default)


VERSION = "0.5.0"
VERSION = "0.5.1"

DEBUG = as_bool(get_env("DEBUG", 0))
DATA_ROOT = Path(get_env("DATA_ROOT", Path.cwd() / "data")).absolute()
Expand Down
18 changes: 11 additions & 7 deletions investigraph/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
import orjson
from banal import clean_dict, ensure_dict, ensure_list, is_listish, is_mapping
from followthemoney.util import join_text as _join_text
from ftmq.util import clean_name, make_dataset
from ftmq.util import clean_name
from ftmq.util import make_proxy as _make_proxy
from nomenklatura.dataset import DefaultDataset
from nomenklatura.entity import CE, CompositeEntity
from nomenklatura.entity import CE
from normality import slugify
from pydantic import BaseModel
from runpandarun.util import PathLike
Expand All @@ -26,11 +27,14 @@ def slugified_dict(data: dict[Any, Any]) -> SDict:
return {slugify(k, "_"): v for k, v in ensure_dict(data).items()}


def make_proxy(schema: str, dataset: str | None = DefaultDataset, **properties) -> CE:
data = {"schema": schema, "properties": properties}
if isinstance(dataset, str):
dataset = make_dataset(dataset)
return CompositeEntity.from_data(dataset, data)
def make_proxy(
schema: str,
id: str,
dataset: str | None = DefaultDataset,
**properties,
) -> CE:
data = {"id": id, "schema": schema, "properties": properties}
return _make_proxy(data, dataset)


@cache
Expand Down
3,189 changes: 1,795 additions & 1,394 deletions poetry.lock

Large diffs are not rendered by default.

22 changes: 9 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "investigraph"
version = "0.5.0"
version = "0.5.1"
description = "etl pipeline for investigations with follow the money data"
authors = ["Simon Wörpel <simon@investigativedata.org>"]
license = "MIT"
Expand All @@ -24,30 +24,26 @@ investigraph = "investigraph.cli:cli"

[tool.poetry.dependencies]
python = ">=3.11,<3.12"
banal = "^1.0.6"
cachelib = "^0.10.2"
dateparser = "^1.1.8"
fakeredis = "^2.20.0"
followthemoney = "^3.5.5"
followthemoney-store = "^3.0.6"
html2text = "^2020.1.16"
pandas = "^2.1.3"
pandas = "^2.1.4"
pantomime = "^0.6.1"
prefect = "^2.14.4"
prefect-dask = "^0.2.4"
prefect = "^2.14.12"
prefect-dask = "^0.2.6"
redis = "^5.0.1"
requests = "^2.31.0"
sqlalchemy = "^2.0.23"
tabulate = "^0.9.0"
typer = "^0.9.0"
ftmq = "^0.5.0"
prefect-ray = ">=0.3.1"
ray = "^2.8.0"
ftmq = ">=0.5.1"
prefect-ray = "^0.3.3"
ray = "^2.9.0"
runpandarun = "^0.4.0"
cryptography = ">=41.0.5"
cryptography = "^41.0.7"
xlrd = "^2.0.1"
normality = "^2.5.0"
pydantic = "^2.5.0"
pydantic = "^2.5.3"

[tool.poetry.group.dev.dependencies]
pytest-env = ">=1.1.1"
Expand Down
2,936 changes: 1,600 additions & 1,336 deletions requirements.txt

Large diffs are not rendered by default.

34 changes: 18 additions & 16 deletions tests/fixtures/ec_meetings/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,24 @@


def make_address(ctx: Context, data: SDict) -> CE:
proxy = ctx.make_proxy("Address")
proxy.id = ctx.make_id(proxy.caption, prefix="addr")
proxy.add("full", data.pop("Location"))
location = data.pop("Location")
id_ = ctx.make_id(location, prefix="addr")
proxy = ctx.make_proxy("Address", id_)
proxy.add("full", location)
return proxy


def make_person(ctx: Context, name: str, role: str, body: CE) -> CE:
proxy = ctx.make_proxy("Person")
proxy.id = ctx.make_slug("person", make_entity_id(body.id, fp(name)))
id_ = ctx.make_slug("person", make_entity_id(body.id, fp(name)))
proxy = ctx.make_proxy("Person", id_)
proxy.add("name", name)
proxy.add("description", role)
return proxy


def make_organization(ctx: Context, regId: str, name: str | None = None) -> CE:
proxy = ctx.make_proxy("Organization")
proxy.id = ctx.make_slug(regId, prefix="eu-tr")
id_ = ctx.make_slug(regId, prefix="eu-tr")
proxy = ctx.make_proxy("Organization", id_)
if fp(name):
proxy.add("name", name)
proxy.add("idNumber", regId)
Expand Down Expand Up @@ -81,12 +82,12 @@ def make_event(
) -> Generator[CE, None, None]:
date = data.pop("Date of meeting")
participants = [o for o in make_organizations(ctx, data)]
proxy = ctx.make_proxy("Event")
proxy.id = ctx.make_slug(
id_ = ctx.make_slug(
"meeting",
date,
make_entity_id(organizer.id, *sorted([p.id for p in participants])),
)
proxy = ctx.make_proxy("Event", id_)
label = join_text(*[p.first("name") for p in participants])
name = f"{date} - {organizer.caption} x {label}"
proxy.add("name", name)
Expand Down Expand Up @@ -114,8 +115,8 @@ def parse_record(ctx: Context, data: SDict, body: CE):
yield from involved

for member in involved:
rel = ctx.make_proxy("Membership")
rel.id = ctx.make_slug("membership", make_entity_id(body.id, member.id))
id_ = ctx.make_slug("membership", make_entity_id(body.id, member.id))
rel = ctx.make_proxy("Membership", id_)
rel.add("organization", body)
rel.add("member", member)
rel.add("role", member.get("description"))
Expand All @@ -124,9 +125,10 @@ def parse_record(ctx: Context, data: SDict, body: CE):

def parse_record_ec(ctx: Context, data: SDict):
# meetings of EC representatives
body = ctx.make_proxy("PublicBody")
body.id = ctx.make_slug(fp(body.caption))
body.add("name", data.pop("Name of cabinet"))
name = data.pop("Name of cabinet")
id_ = ctx.make_slug(fp(name))
body = ctx.make_proxy("PublicBody", id_)
body.add("name", name)
body.add("jurisdiction", "eu")

yield body
Expand All @@ -136,8 +138,8 @@ def parse_record_ec(ctx: Context, data: SDict):
def parse_record_dg(ctx: Context, data: SDict):
# meetings of EC Directors-General
acronym = data.pop("Name of DG - acronym")
body = ctx.make_proxy("PublicBody")
body.id = ctx.make_slug("dg", acronym)
id_ = ctx.make_slug("dg", acronym)
body = ctx.make_proxy("PublicBody", id_)
body.add("name", data.pop("Name of DG - full name"))
body.add("weakAlias", acronym)
body.add("jurisdiction", "eu")
Expand Down
4 changes: 2 additions & 2 deletions tests/fixtures/eu_authorities/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

def parse_record(ctx: Context, data: SDict, ix: int):
slug = data.pop("URL name")
body = ctx.make_proxy("PublicBody")
body.id = ctx.make_slug(slug)
id_ = ctx.make_slug(slug)
body = ctx.make_proxy("PublicBody", id_)
body.add("name", data.pop("Name"))
body.add("weakAlias", data.pop("Short name"))
tags = data.pop("Tags").split()
Expand Down
4 changes: 2 additions & 2 deletions tests/fixtures/eu_authorities/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@


def handle(ctx: Context, data: dict[str, Any], ix: int):
body = make_proxy("PublicBody")
slug = data.pop("URL name")
body.id = join_slug(ctx.prefix, slug)
id_ = join_slug(ctx.prefix, slug)
body = make_proxy("PublicBody", id_)
body.add("name", data.pop("Name"))
body.add("weakAlias", data.pop("Short name"))
tags = data.pop("Tags").split()
Expand Down
3 changes: 1 addition & 2 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ def test_cache():
key = cache.set(data)
assert cache.get(key) == data

proxy = make_proxy("Person")
proxy.id = "id-alice"
proxy = make_proxy("Person", "id-alice")
proxy.add("name", "Alice")
key = cache.set(proxy.to_dict())
assert cache.get(key) == proxy.to_dict()
Expand Down

0 comments on commit 9fb0530

Please sign in to comment.