Skip to content

Commit 12b3b60

Browse files
authored
Merge pull request #16 from csdms/mcflugen/update-list
Update standard names list
2 parents 52cc7a3 + f391272 commit 12b3b60

25 files changed

+7406
-1150
lines changed

README.md

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,67 @@
1-
[![Test](https://github.com/csdms/standard_names/actions/workflows/test.yml/badge.svg)](https://github.com/csdms/standard_names/actions/workflows/test.yml)
2-
[![Documentation Status](https://readthedocs.org/projects/standard-names/badge/?version=latest)](http://standard-names.readthedocs.io/en/latest/?badge=latest)
3-
[![Coverage Status](https://coveralls.io/repos/github/csdms/standard_names/badge.svg?branch=master)](https://coveralls.io/github/csdms/standard_names?branch=master)
4-
[![Conda Version](https://img.shields.io/conda/vn/conda-forge/standard_names.svg)](https://anaconda.org/conda-forge/standard_names)
5-
[![PyPI](https://img.shields.io/pypi/v/standard_names)](https://pypi.org/project/standard_names)
1+
![[Python][pypi-link]][python-badge]
2+
![[Build Status][build-link]][build-badge]
3+
![[PyPI][pypi-link]][pypi-badge]
4+
![[Build Status][anaconda-link]][anaconda-badge]
65

76

8-
standard_names
9-
==============
7+
[anaconda-badge]: https://anaconda.org/conda-forge/standard_names/badges/version.svg
8+
[anaconda-link]: https://anaconda.org/conda-forge/standard_names
9+
[build-badge]: https://github.com/csdms/standard_names/actions/workflows/test.yml/badge.svg
10+
[build-link]: https://github.com/csdms/standard_names/actions/workflows/test.yml
11+
[csdms-workbench]: https://csdms.colorado.edu/wiki/Workbench
12+
[pypi-badge]: https://badge.fury.io/py/standard_names.svg
13+
[pypi-link]: https://pypi.org/project/standard_names/
14+
[python-badge]: https://img.shields.io/pypi/pyversions/standard_names.svg
15+
16+
# standard_names
1017

1118
Python utilities for working with CSDMS Standard Names.
1219

13-
CSDMS Standard Names is an element of the [CSDMS Workbench](https://csdms.colorado.edu/wiki/Workbench),
20+
CSDMS Standard Names is an element of the [CSDMS Workbench][csdms-workbench],
1421
an integrated system of software tools, technologies, and standards
1522
for building and coupling models.
1623

24+
## As Regular Expression
25+
26+
```
27+
^ # Start of the object name
28+
[a-z]+ # Starts with one or more lowercase letters
29+
(?: # Start of a non-capturing group for subsequent parts
30+
[-~_]? # Optional separator: hyphen, tilde, or underscore
31+
[a-zA-Z0-9]+ # One or more alphanumeric characters
32+
)* # Zero or more repetitions of the group
33+
__ # Double underscore separator
34+
[a-z]+ # Start of the quantity
35+
(?: # Start of a non-capturing group for subsequent parts
36+
[-~_]? # Optional separator: hyphen, tilde, or underscore
37+
[a-zA-Z0-9]+ # One or more alphanumeric characters
38+
)* # Zero or more repetitions of the group
39+
$ # End of the name
40+
```
41+
42+
## As Parsing Expression Grammar
43+
44+
```peg
45+
Start
46+
= LowercaseWord UnderscoreSeparator LowercaseWord
47+
48+
LowercaseWord
49+
= [a-z] AdditionalCharacters*
50+
51+
AdditionalCharacters
52+
= Separator? Alphanumeric+
53+
54+
Separator
55+
= "-" / "~" / "_"
56+
57+
Alphanumeric
58+
= [a-zA-Z0-9]
59+
60+
UnderscoreSeparator
61+
= "__"
62+
```
1763

18-
Links
19-
-----
64+
# Links
2065

2166
* [Source code](http://github.com/csdms/standard_names): The
2267
*standard_names* source code repository.

noxfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
@nox.session(python=PYTHON_VERSION, venv_backend="conda")
1515
def test(session: nox.Session) -> None:
1616
"""Run the tests."""
17-
session.install(".[testing]")
17+
session.install(".[peg,testing]")
1818

1919
args = ["--cov", PROJECT, "-vvv"] + session.posargs
2020

pyproject.toml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ dynamic = [
3838
]
3939
dependencies = [
4040
"packaging",
41-
"pyyaml",
4241
]
4342

4443
[project.license]
@@ -51,6 +50,9 @@ Issues = "https://github.com/csdms/standard_names/issues"
5150
Repository = "https://github.com/csdms/standard_names"
5251

5352
[project.optional-dependencies]
53+
peg = [
54+
"pyparsing",
55+
]
5456
dev = [
5557
"nox",
5658
]
@@ -64,11 +66,7 @@ docs = [
6466
]
6567

6668
[project.scripts]
67-
snbuild = "standard_names.cmd.snbuild:run"
68-
sndump = "standard_names.cmd.sndump:run"
69-
snscrape = "standard_names.cmd.snscrape:run"
70-
snsql = "standard_names.cmd.snsql:run"
71-
snvalidate = "standard_names.cmd.snvalidate:run"
69+
"standard-names" = "standard_names.cmd.main:main"
7270

7371
[build-system]
7472
requires = [

src/standard_names/__main__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from __future__ import annotations
2+
3+
from standard_names.cli.main import main
4+
5+
if __name__ == "__main__":
6+
raise SystemExit(main())

src/standard_names/_format.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
from collections.abc import Iterable
2+
3+
4+
def as_wiki_list(
5+
items: Iterable[str], heading: str | None = None, level: int = 1
6+
) -> str:
7+
"""
8+
Examples
9+
--------
10+
>>> from standard_names._format import as_wiki_list
11+
12+
>>> print(as_wiki_list(["line 1", "line 2"], heading="Lines"))
13+
= Lines =
14+
<tt>
15+
line 1<br/>
16+
line 2<br/>
17+
</tt>
18+
"""
19+
newline = "\n"
20+
21+
if heading is not None:
22+
formatted_lines = [f"{'=' * level} {heading} {'=' * level}"]
23+
else:
24+
formatted_lines = []
25+
26+
formatted_lines += ["<tt>"] + [item.strip() + "<br/>" for item in items] + ["</tt>"]
27+
28+
return newline.join(formatted_lines)
29+
30+
31+
def as_yaml_list(
32+
items: Iterable[str], heading: str | None = None, level: int = 1
33+
) -> str:
34+
"""
35+
36+
Examples
37+
--------
38+
>>> from standard_names._format import as_yaml_list
39+
40+
>>> print(as_yaml_list(["line 1", "line 2"], heading="Lines"))
41+
Lines:
42+
- line 1
43+
- line 2
44+
"""
45+
newline = "\n"
46+
indent = 2 if heading else 0
47+
formatted_lines = [f"{heading}:"] if heading else []
48+
49+
if heading is None:
50+
formatted_lines = []
51+
indent = 0
52+
else:
53+
formatted_lines = [f"{heading}:"]
54+
indent = 2
55+
56+
stripped_items = [stripped for item in items if (stripped := item.strip())]
57+
58+
if stripped_items:
59+
formatted_lines += [f"{' ' * indent}- {item}" for item in stripped_items]
60+
else:
61+
formatted_lines += [f"{' ' * indent}[]"]
62+
63+
return newline.join(formatted_lines)
64+
65+
66+
def as_myst_list(
67+
items: Iterable[str], heading: str | None = None, level: int = 1
68+
) -> str:
69+
"""
70+
71+
Examples
72+
--------
73+
>>> from standard_names._format import as_myst_list
74+
75+
>>> print(as_myst_list(["line 1", "line 2"], heading="Lines"))
76+
# Lines
77+
* line 1
78+
* line 2
79+
"""
80+
newline = "\n"
81+
82+
formatted_lines = ([f"# {heading}"] if heading else []) + [
83+
f"* {stripped}" for item in items if (stripped := item.strip())
84+
]
85+
86+
return newline.join(formatted_lines)
87+
88+
89+
def as_text_list(
90+
items: Iterable[str], heading: str | None = None, level: int = 1
91+
) -> str:
92+
"""
93+
94+
Examples
95+
--------
96+
>>> from standard_names._format import as_text_list
97+
98+
>>> print(as_text_list(["line 1", "line 2"], heading="# Lines"))
99+
# Lines
100+
line 1
101+
line 2
102+
"""
103+
newline = "\n"
104+
105+
formatted_lines = ([heading] if heading else []) + [
106+
stripped for item in items if (stripped := item.strip())
107+
]
108+
109+
return newline.join(formatted_lines)
110+
111+
112+
FORMATTERS = {
113+
"wiki": as_wiki_list,
114+
"yaml": as_yaml_list,
115+
"text": as_text_list,
116+
"myst": as_myst_list,
117+
}

src/standard_names/cli/_scrape.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#! /usr/bin/env python
2+
"""
3+
Example usage:
4+
5+
```bash
6+
snscrape http://csdms.colorado.edu/wiki/CSN_Quantity_Templates \
7+
http://csdms.colorado.edu/wiki/CSN_Object_Templates \
8+
http://csdms.colorado.edu/wiki/CSN_Operation_Templates \
9+
> data/scraped.yaml
10+
```
11+
"""
12+
from __future__ import annotations
13+
14+
from collections.abc import Iterable
15+
from urllib.request import urlopen
16+
17+
from standard_names.registry import NamesRegistry
18+
19+
20+
def scrape_names(files: Iterable[str]) -> NamesRegistry:
21+
"""Scrape standard names from a file or URL.
22+
23+
Parameters
24+
----------
25+
files : iterable of str
26+
Files to search for names.
27+
28+
Returns
29+
-------
30+
NamesRegistry
31+
A registry of the names found in the files.
32+
"""
33+
registry = NamesRegistry([])
34+
for file in files:
35+
registry |= NamesRegistry(search_file_for_names(file))
36+
return registry
37+
38+
39+
def find_all_names(lines: Iterable[str], engine: str = "regex") -> set[str]:
40+
"""Find standard names.
41+
42+
Examples
43+
--------
44+
>>> from standard_names.cli._scrape import find_all_names
45+
46+
>>> contents = '''
47+
... A file with text and names (air__temperature) mixed in. Some names
48+
... have double underscores (like, Water__Temperature) by are not
49+
... valid names. Others, like water__temperature, or "wind__speed" are good.
50+
... '''
51+
>>> sorted(find_all_names(contents.splitlines(), engine="regex"))
52+
['air__temperature', 'water__temperature', 'wind__speed']
53+
54+
>>> sorted(find_all_names(contents.splitlines(), engine="peg"))
55+
['air__temperature', 'water__temperature', 'wind__speed']
56+
"""
57+
if engine == "regex":
58+
from standard_names.regex import findall
59+
elif engine == "peg":
60+
from standard_names.peg import findall
61+
else:
62+
raise ValueError(
63+
"engine not understood: {engine!r} is not one of 'regex', 'peg'"
64+
)
65+
66+
names = set()
67+
for line in lines:
68+
names |= set(findall(line.strip()))
69+
70+
return names
71+
72+
73+
def search_file_for_names(path: str) -> set[str]:
74+
names = set()
75+
if path.startswith(("http://", "https://")):
76+
with urlopen(path) as response:
77+
names = find_all_names(line.decode("utf-8") for line in response)
78+
else:
79+
with open(path) as fp:
80+
names = find_all_names(fp)
81+
82+
return names

src/standard_names/cmd/snsql.py renamed to src/standard_names/cli/_sql.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#! /usr/bin/env python
1+
from __future__ import annotations
22

33
import os
44

@@ -46,7 +46,7 @@ def as_sql_commands(names: NamesRegistry, newline: str = os.linesep) -> str:
4646
Examples
4747
--------
4848
>>> from standard_names.registry import NamesRegistry
49-
>>> from standard_names.cmd.snsql import as_sql_commands
49+
>>> from standard_names.cli._sql import as_sql_commands
5050
5151
>>> names = NamesRegistry()
5252
>>> names.add("air__temperature")
@@ -99,25 +99,3 @@ def as_sql_commands(names: NamesRegistry, newline: str = os.linesep) -> str:
9999
commands = newline.join(db.iterdump())
100100

101101
return commands
102-
103-
104-
def main() -> str:
105-
"""
106-
Build a database of CSDMS standard names from a list.
107-
"""
108-
import argparse
109-
110-
parser = argparse.ArgumentParser(
111-
description="Build an sqlite database from a list of names"
112-
)
113-
parser.add_argument(
114-
"file", nargs="+", type=argparse.FileType("r"), help="List of names"
115-
)
116-
args = parser.parse_args()
117-
118-
names = NamesRegistry(args.file)
119-
return as_sql_commands(names)
120-
121-
122-
def run() -> None:
123-
print(main())

0 commit comments

Comments
 (0)