Skip to content

Commit 52c7017

Browse files
committed
move registry files back
1 parent 944c9e4 commit 52c7017

File tree

4 files changed

+109
-46
lines changed

4 files changed

+109
-46
lines changed

pyproject.toml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,18 +53,18 @@ dreambank = "dreambank.curation:curate"
5353
[tool.setuptools.dynamic]
5454
version = {attr = "dreambank.__version__"}
5555

56-
# [tool.setuptools]
57-
# include-package-data = true
56+
[tool.setuptools]
57+
include-package-data = true
5858

59-
# [tool.setuptools.packages.find]
60-
# namespaces = false
61-
# where = ["src"]
59+
[tool.setuptools.packages.find]
60+
namespaces = false
61+
where = ["src"]
6262

63-
# [tool.setuptools.package-data]
64-
# dreambank = [
65-
# "dreambank/data/registry.txt",
66-
# "dreambank/data/registry-source.txt",
67-
# ]
63+
[tool.setuptools.package-data]
64+
dreambank = [
65+
"dreambank/data/registry.txt",
66+
"dreambank/data/registry-source.txt",
67+
]
6868

6969
[tool.ruff]
7070
line-length = 100
File renamed without changes.
File renamed without changes.

src/dreambank/fetchers.py

Lines changed: 99 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,60 +5,123 @@
55
import pandas as pd
66
import pooch
77

8+
from importlib.metadata import version as installed_version
89
from importlib.resources import files
910

10-
1111
__all__ = [
1212
"available_datasets",
13-
"get_registry_filepath",
1413
"fetch",
1514
"read_dreams",
1615
"read_info",
1716
]
1817

1918

19+
repository = pooch.create(
20+
base_url="https://github.com/dxelab/dreambank/raw/{version}/data/",
21+
path=pooch.os_cache("dreambank"),
22+
version=f"v{installed_version("dreambank")}",
23+
version_dev="dev",
24+
)
25+
repository.load_registry(files("dreambank.data").joinpath("registry.txt"))
2026

21-
registry_hashes = {
22-
"main": "sha256:c2307b5f93ec13883b472d4764488b2386acf604aca7b78ad760656898247bcb",
23-
}
24-
25-
def get_registry_filepath(version_str):
26-
version_str = pooch.check_version(version_str, fallback="main")
27-
url = f"https://github.com/dxelab/dreambank/raw/{version_str}/registry.txt"
28-
known_hash = registry_hashes[version_str]
29-
fp = pooch.retrieve(url, known_hash=known_hash, path=pooch.os_cache("dreambank"))
30-
return fp
3127

32-
def create_pup(version):
33-
# Offers version control.
34-
version_str = f"v{version}"
35-
pup = pooch.create(
36-
path=pooch.os_cache("dreambank"),
37-
base_url="https://github.com/dxelab/dreambank/raw/{version}/data/",
38-
version=version_str,
39-
version_dev="main",
40-
)
41-
registry_filepath = get_registry_filepath(version_str)
42-
pup.load_registry(registry_filepath)
43-
return pup
4428

29+
def available_datasets():
30+
"""Return a list of all unique dataset IDs available in `dreambank`.
4531
46-
def available_datasets(version=1):
47-
return sorted(set(x.split(".")[0] for x in create_pup(version).registry_files))
32+
Returns
33+
-------
34+
dataset_ids : list
35+
A sorted list of strings, each a unique dataset ID.
4836
49-
def fetch(fname, version=1):
37+
Examples
38+
--------
39+
>>> import dreambank
40+
>>> dataset_ids = dreambank.available_datasets()
41+
>>> print(dataset_ids[:5])
42+
['alta', 'angie', 'arlie', 'b', 'b-baseline']
43+
>>> print(dataset_ids[-5:])
44+
['vonuslar', 'wedding', 'west_coast_teens', 'zurich-f', 'zurich-m']
5045
"""
51-
If you just want filepath to load manually
46+
return sorted(set(fn.split(".")[0] for fn in repository.registry_files))
47+
48+
49+
def fetch(fname):
50+
"""Fetch a single `dreambank` file and return the filepath.
51+
52+
The main use case of this would be if a user wants to read the file with custom code.
53+
54+
Parameters
55+
----------
56+
fname : str
57+
Dataset ID and extension (e.g., ``'alta.tsv'``, ``'alta.json'``).
58+
59+
Returns
60+
-------
61+
fp : str
62+
Full filepath of local file.
63+
64+
Examples
65+
--------
66+
>>> import dreambank
67+
>>> import pandas as pd
68+
>>>
69+
>>> fp = dreambank.fetch("bosnak.tsv")
70+
>>> bosnak = pd.read_table(fp, index_col="n")
5271
"""
53-
fp = create_pup(version).fetch(fname)
72+
fp = repository.fetch(fname)
5473
return fp
5574

56-
def read_dreams(dataset_id, version=1):
57-
fp = create_pup(version).fetch(f"{dataset_id}.tsv")
58-
return pd.read_table(fp)
75+
def read_dreams(dataset_id):
76+
"""Return a :class:`pandas.DataFrame` of dreams.
5977
60-
def read_info(fname, version=1):
61-
fp = create_pup(version).fetch(f"{dataset_id}.json")
78+
Parameters
79+
----------
80+
dataset_id : str
81+
The dataset to read in.
82+
83+
Returns
84+
-------
85+
dreams : :class:`pandas.DataFrame`
86+
A :class:`~pandas.DataFrame` with 2 or 3 columns.
87+
88+
Examples
89+
--------
90+
>>> import dreambank
91+
>>> dreams = dreambank.read_dreams("izzy22_25")
92+
>>> dreams.head(3)
93+
"""
94+
fp = fetch(f"{dataset_id}.tsv")
95+
dreams = pd.read_table(fp, dtype="string")
96+
return dreams
97+
98+
def read_info(dataset_id):
99+
"""Read info (i.e., metadata) for a given dataset.
100+
101+
Parameters
102+
----------
103+
dataset_id : str
104+
The dataset to read in.
105+
106+
Returns
107+
-------
108+
info : dict
109+
A dictionary with metadata for the given dataset.
110+
111+
* ``short_name``: dataset_id
112+
* ``long_name``: long_name
113+
* ``n_dreams``: n_dreams
114+
* ``timeframe``: timeframe
115+
* ``sex``: sex
116+
* ``description``: description
117+
118+
Examples
119+
--------
120+
>>> import dreambank
121+
>>> info = dreambank.read_info("izzy22_25")
122+
>>> info
123+
"""
124+
fp = fetch(f"{dataset_id}.json")
62125
with open(fp, "rt", encoding="utf-8") as f:
63-
data = json.load(f)
64-
return data
126+
info = json.load(f)
127+
return info

0 commit comments

Comments
 (0)