Skip to content

Commit d8a8a16

Browse files
authored
Added toml generalizer (#90)
* Added toml generalizer * modifying test
1 parent 835fc96 commit d8a8a16

File tree

6 files changed

+156
-22
lines changed

6 files changed

+156
-22
lines changed

poetry.lock

Lines changed: 31 additions & 22 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ pypandoc = "^1.7.5"
3636
twine = "^4.0.0"
3737
oaklib = "^0.1.17"
3838
pandera = "^0.11.0"
39+
tomlkit = "^0.11.4"
3940

4041

4142
[tool.poetry.dev-dependencies]

schema_automator/cli.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,31 @@ def generalize_json(input, output, schema_name, format, omit_null, **kwargs):
237237
write_schema(schema, output)
238238

239239

240+
@main.command()
241+
@click.argument('input')
242+
@output_option
243+
@schema_name_option
244+
@click.option('--container-class-name', default='Container', help="name of root class")
245+
@click.option('--enum-columns', '-E', multiple=True, help='column(s) that is forced to be an enum')
246+
@click.option('--enum-mask-columns', multiple=True, help='column(s) that are excluded from being enums')
247+
@click.option('--max-enum-size', default=50, help='do not create an enum if more than max distinct members')
248+
@click.option('--enum-threshold', default=0.1, help='if the number of distinct values / rows is less than this, do not make an enum')
249+
@click.option('--omit-null/--no-omit-null', default=False, help="if true, ignore null values")
250+
def generalize_toml(input, output, schema_name, omit_null, **kwargs):
251+
"""
252+
Generalizes from a TOML file to a schema
253+
254+
See :ref:`generalizers` for more on the generalization framework
255+
256+
Example:
257+
258+
schemauto generalize-toml my/data/conf.toml -o my.yaml
259+
"""
260+
ie = JsonDataGeneralizer(omit_null=omit_null)
261+
schema = ie.convert(input, format='toml', **kwargs)
262+
write_schema(schema, output)
263+
264+
240265
@main.command()
241266
@click.argument('input')
242267
@output_option

schema_automator/generalizers/json_instance_generalizer.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from typing import Union, Dict, List, Any
33
from collections import defaultdict
44
import json
5+
6+
import tomlkit
57
import yaml
68
import gzip
79

@@ -53,6 +55,11 @@ def convert(self, input: Union[str, Dict], format: str = 'json',
5355
obj = json.load(stream)
5456
elif format == 'yaml':
5557
obj = yaml.safe_load(stream)
58+
elif format == 'toml':
59+
obj_str = "".join(stream.readlines())
60+
toml_obj = tomlkit.parse(obj_str)
61+
json_str = json.dumps(toml_obj)
62+
obj = json.loads(json_str)
5663
else:
5764
raise Exception(f'bad format {format}')
5865
rows_by_table = defaultdict(list)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import click
2+
from typing import Union, Dict, List, Any
3+
from collections import defaultdict
4+
import json
5+
import yaml
6+
import gzip
7+
8+
from dataclasses import dataclass
9+
10+
from linkml_runtime import SchemaView
11+
from linkml_runtime.linkml_model import SchemaDefinition
12+
13+
from schema_automator import JsonDataGeneralizer
14+
from schema_automator.generalizers.generalizer import Generalizer
15+
from schema_automator.generalizers.csv_data_generalizer import CsvDataGeneralizer
16+
from linkml_runtime.utils.formatutils import camelcase
17+
18+
from schema_automator.utils.schemautils import write_schema
19+
20+
21+
@dataclass
22+
class TomlDataGeneralizer(Generalizer):
23+
"""
24+
A generalizer that abstract from TOML instance data
25+
"""
26+
mappings: dict = None
27+
omit_null: bool = None
28+
29+
def convert(self, input: str, **kwargs) -> SchemaDefinition:
30+
"""
31+
Generalizes from a JSON file
32+
33+
:param input:
34+
:param kwargs:
35+
:return:
36+
"""
37+
w
38+
json_engine = JsonDataGeneralizer(**kwargs)
39+
return json_engine.convert(input, format='toml', **kwargs)

tests/resources/MPLEx.toml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Chemical derivatization method for global GC-MS metabolomics
2+
title = 'MPLEx extraction (Grass) '
3+
author = 'First Last'
4+
section = 'Metabolomics'
5+
updated = '08/10/2022'
6+
protocol_url = ''
7+
protocol_doi = ''
8+
description = 'Procedure for chloroform/methanol Folch-type extraction applicable for soil and environmental samples.'
9+
10+
[sample-storage-activity]
11+
id = 'mplex:1'
12+
organic_matter = { numeric_value = '1', unit = 'g' }
13+
contanier = { type = 'Olympus', numeric_value = '50', unit = 'mL'}
14+
temperature = { numeric_value = '-70', unit = 'C' }
15+
16+
[homogenization-activity]
17+
id = 'mplex:2'
18+
source_material = 'mplex:1'
19+
solvent = 'MeOH/Water'
20+
concentration = { numeric_value = '4/3', unit = 'ratio' }
21+
volume = { numeric_value = '15', unit = 'mL' }
22+
shaker = {type = 'vortex'}
23+
instrument = {type = 'Omni Homogenizer', details = "disposable probes"}
24+
25+
[solubilization-activity]
26+
id = 'mplex:3'
27+
source_material = 'mplex:2'
28+
solvent = {type = 'Chloroform', temperature = "ice-cold"}
29+
volume = { numeric_value = '18.5', unit = 'mL' }
30+
shaker = {type = 'vortex', numeric_value = '1', unit = 'minutes'}
31+
32+
[sample-handling-activity]
33+
id = 'mplex:4'
34+
source_material = 'mplex:3'
35+
activity = {type = 'ice', numeric_value = '5', unit = 'minutes'}
36+
37+
[mixing-activity]
38+
id = 'mplex:5'
39+
source_material = 'mplex:4'
40+
shaker = {type = 'vortex', numeric_value = '1', unit = 'minutes'}
41+
42+
[centrifugation-activity]
43+
id = 'mplex:6'
44+
source_material = 'mplex:5'
45+
centrifuge = {speed = '5000', numeric_value = '10', unit = 'minutes'}
46+
temperature = { numeric_value = '4', unit = 'C' }
47+
48+
[transfer-activity]
49+
id = 'derive:12'
50+
source_material = 'derive:11'
51+
type = 'supernatant'
52+
53+
# To derivatization (Optional)

0 commit comments

Comments
 (0)