Skip to content

Commit

Permalink
Cleaned up iteration over instances in a JSON file.
Browse files Browse the repository at this point in the history
  • Loading branch information
jesper-friis committed Oct 26, 2024
1 parent d906b7d commit 9f9b999
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 27 deletions.
31 changes: 30 additions & 1 deletion bindings/python/dlite-entity-python.i
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ def get_instance(
# Allow metaid to be an Instance
if isinstance(metaid, Instance):
metaid = metaid.uri
errclr() # Clear internal error before calling Instance()
inst = Instance(
metaid=metaid, dims=dimensions, id=id,
dimensions=(), properties=() # arrays must not be None
Expand Down Expand Up @@ -488,6 +489,7 @@ def get_instance(
metaid=metaid
)
else:
errclr() # Clear internal error before calling Instance()
inst = Instance(
url=url, metaid=metaid,
dims=(), dimensions=(), properties=() # arrays
Expand All @@ -502,6 +504,7 @@ def get_instance(
If `metaid` is provided, the instance is tried mapped to this
metadata before it is returned.
"""
errclr() # Clear internal error before calling Instance()
inst = Instance(
storage=storage, id=id, metaid=metaid,
dims=(), dimensions=(), properties=() # arrays
Expand All @@ -519,6 +522,7 @@ def get_instance(
from dlite.options import make_query
if options and not isinstance(options, str):
options = make_query(options)
errclr() # Clear internal error before calling Instance()
inst = Instance(
driver=driver, location=str(location), options=options, id=id,
metaid=metaid,
Expand All @@ -529,6 +533,7 @@ def get_instance(
@classmethod
def from_json(cls, jsoninput, id=None, metaid=None):
"""Load the instance from json input."""
errclr() # Clear internal error before calling Instance()
inst = Instance(
jsoninput=jsoninput, id=id, metaid=metaid,
dims=(), dimensions=(), properties=() # arrays
Expand All @@ -538,6 +543,7 @@ def get_instance(
@classmethod
def from_bson(cls, bsoninput):
"""Load the instance from bson input."""
errclr() # Clear internal error before calling Instance()
inst = Instance(
bsoninput=bsoninput,
dims=(), dimensions=(), properties=() # arrays
Expand Down Expand Up @@ -593,6 +599,7 @@ def get_instance(
"""Create a new metadata entity (instance of entity schema) casted
to an instance.
"""
errclr() # Clear internal error before calling Instance()
inst = Instance(
uri=uri, dimensions=dimensions, properties=properties,
description=description,
Expand All @@ -614,6 +621,7 @@ def get_instance(
meta = get_instance(metaid)
dimensions = [dimensions[dim.name]
for dim in meta.properties['dimensions']]
errclr() # Clear internal error before calling Instance()
inst = Instance(
metaid=metaid, dims=dimensions, id=id,
dimensions=(), properties=() # arrays must not be None
Expand All @@ -630,10 +638,12 @@ def get_instance(
warnings.warn(
"create_from_url() is deprecated, use from_url() instead.",
DeprecationWarning, stacklevel=2)
return Instance(
errclr() # Clear internal error before calling Instance()
inst = Instance(
url=url, metaid=metaid,
dims=(), dimensions=(), properties=() # arrays
)
return instance_cast(inst)

@classmethod
def create_from_storage(cls, storage, id=None, metaid=None):
Expand All @@ -646,6 +656,7 @@ def get_instance(
warnings.warn(
"create_from_storage() is deprecated, use from_storage() instead.",
DeprecationWarning, stacklevel=2)
errclr() # Clear internal error before calling Instance()
inst = Instance(
storage=storage, id=id, metaid=metaid,
dims=(), dimensions=(), properties=() # arrays
Expand All @@ -664,12 +675,30 @@ def get_instance(
from dlite.options import make_query
if options and not isinstance(options, str):
options = make_query(options)
errclr() # Clear internal error before calling Instance()
inst = Instance(
driver=driver, location=str(location), options=options, id=id,
dims=(), dimensions=(), properties=() # arrays
)
return instance_cast(inst)

@classmethod
def get_uuids(cls, driver, location, options=None, pattern=None):
"""Returns a iterator over matching UUIDs in storage.

Arguments:
driver: Name of storage plugin for data parsing.
location: Location of resource. Typically a URL or file path.
options: Options passed to the protocol and driver plugins.
pattern: A glob pattern matching metadata UUIDs. If given,
only matching UUIDs will be returned.

Return:
Iterator over all matching UUIDs in storage.
"""
with Storage(driver, location, options=options) as s:
return s.get_uuids(pattern=pattern)

def save(self, *dest, location=None, options=None):
"""Saves this instance to url or storage.

Expand Down
2 changes: 1 addition & 1 deletion bindings/python/scripts/dlite-validate
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ from pathlib import Path
import dlite


def parse(url, driver=None, options=None, id=None):
def parse(url, driver=None, options="mode=r", id=None):
"""Loads an instance from storage.
Arguments:
Expand Down
29 changes: 28 additions & 1 deletion bindings/python/tests/test_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,10 +343,37 @@
assert item.q.f.m.tolist() == [0., 1., 0.001]
assert item.q.f.to("1/hour").m.tolist() == [0, 3600, 3.6]


# For issue #750 - test instance_cast()
with raises(dlite.DLiteTypeError):
dlite.instance_cast(inst, dlite.Metadata)
castinst = dlite.instance_cast(inst.meta, dlite.Instance)
assert type(castinst) == dlite.Instance
assert type(dlite.instance_cast(castinst)) == dlite.Metadata


# Test storage query
uuids = {
'850637b9-1d21-573c-91b6-477530e4bf58',
'020e411b-f349-5689-8657-f82b709369c3',
'570611f5-96b3-5b0d-90ad-f3a4c19a78b2',
'5e378ac7-83c9-5d77-ab20-b5bb32c695da',
'e5efe084-27f2-5fec-9b1c-fa1a692e1434',
}
with dlite.Storage("json", indir / "test_ref_type.json") as s:
assert set(s.get_uuids()) == uuids
assert set(s.get_uuids("http://onto-ns.com/meta/0.3/EntitySchema")) == uuids
assert s.get_uuids("xxx") == []
assert set(
dlite.Instance.get_uuids("json", indir / "test_ref_type.json")
) == uuids
assert set(
dlite.Instance.get_uuids(
"json", indir / "test_ref_type.json",
pattern="http://onto-ns.com/meta/0.3/EntitySchema",
)
) == uuids
assert dlite.Instance.get_uuids(
"json", indir / "test_ref_type.json",
pattern="xxx",
) == []
7 changes: 4 additions & 3 deletions bindings/python/tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@


# Test plugin that only defines to_bytes() and from_bytes()
#print("===================================")
#dlite.Storage.plugin_help("testbuff")
#buf = inst.to_bytes("bufftest")
txt = dlite.Storage.plugin_help("bufftest")
assert txt == "Test plugin that represents instances as byte-encoded json."
buf = inst.to_bytes("bufftest")
assert buf == str(inst).encode()
19 changes: 12 additions & 7 deletions src/dlite-json.c
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,7 @@ struct _DLiteJsonIter {
const jsmntok_t *t; /*!< Pointer to current token */
unsigned int n; /*!< Current token number */
unsigned int size; /*!< Size of the root object */
//char *metaid; /*!< Metadata id to match */
char metauuid[DLITE_UUID_LENGTH+1]; /*!< UUID of metadata */
};

Expand Down Expand Up @@ -1099,7 +1100,7 @@ static const jsmntok_t *nexttok(DLiteJsonIter *iter, int *length)
DLiteJsonIter *dlite_json_iter_create(const char *src, int length,
const char *metaid)
{
int r, ok=0;
int r;
DLiteJsonIter *iter=NULL;
jsmn_parser parser;

Expand All @@ -1116,10 +1117,10 @@ DLiteJsonIter *dlite_json_iter_create(const char *src, int length,
iter->size = iter->tokens->size;
if (metaid && dlite_get_uuid(iter->metauuid, metaid) < 0) goto fail;

ok=1;
fail:
if (!ok) dlite_json_iter_free(iter);
return iter;
fail:
if (iter) dlite_json_iter_free(iter);
return NULL;
}

/*
Expand Down Expand Up @@ -1308,11 +1309,15 @@ const char *dlite_jstore_iter_next(DLiteJStoreIter *iter)
if (iter->metauuid[0]) {
char metauuid[DLITE_UUID_LENGTH+1];
const char *val = jstore_get(js, iid);
int r;

jsmn_init(&parser);
if (jsmn_parse_alloc(&parser, val, strlen(val),
&iter->tokens, &iter->ntokens) < 0) {
err(dliteParseError, "invalid json input: \"%s\"", val);
if ((r = jsmn_parse_alloc(&parser, val, strlen(val),
&iter->tokens, &iter->ntokens)) < 0) {
if (r == JSMN_ERROR_INVAL)
err(dliteParseError, "invalid json input: \"%s\"", val);
else
err(dliteParseError, "json parse error: \"%s\"", jsmn_strerror(r));
continue;
}
if (get_meta_uuid(metauuid, val, iter->tokens)) {
Expand Down
2 changes: 1 addition & 1 deletion src/utils/jsmn.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ typedef struct jsmn_parser {
} jsmn_parser;

/**
* Create JSON parser over an array of tokens
* Initializes a JSON parser.
*/
JSMN_API void jsmn_init(jsmn_parser *parser);

Expand Down
22 changes: 13 additions & 9 deletions src/utils/jsmnx.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,42 +24,46 @@
if it is too small. `num_tokens_ptr` should point to the number of
allocated tokens.
Returns JSMN_ERROR_NOMEM on allocation error.
Returns number of tokens used by the parser or one of the following error
codes on error:
- JSMN_ERROR_NOMEM on allocation error.
- JSMN_INVAL on invalid character inside json string.
*/
int jsmn_parse_alloc(jsmn_parser *parser, const char *js, const size_t len,
jsmntok_t **tokens_ptr, unsigned int *num_tokens_ptr)
{
int n, n_save;
unsigned int saved_pos;
jsmntok_t *t=NULL;
(void) n_save; // avoid unused parameter error when assert is turned off
assert(tokens_ptr);
assert(num_tokens_ptr);
if (!*num_tokens_ptr) *tokens_ptr = NULL;
if (!*tokens_ptr) *num_tokens_ptr = 0;

saved_pos = parser->pos;

if (!*tokens_ptr) {
if ((n = jsmn_parse(parser, js, len, NULL, 0)) < 0) goto fail;
jsmn_parser tmp_parser;
jsmn_init(&tmp_parser);
if ((n = jsmn_parse(&tmp_parser, js, len, NULL, 0)) < 0) goto fail;
/* FIXME: there seems to be an issue with the dlite_json_check() that
looks post the last allocated token. Allocating `n+1` tokens is a
workaround to avoid memory issues. */
if (!(t = calloc(n+1, sizeof(jsmntok_t)))) return JSMN_ERROR_NOMEM;
} else {
jsmn_parser tmp_parser, saved_parser;
memcpy(&saved_parser, parser, sizeof(saved_parser));
n = jsmn_parse(parser, js, len, *tokens_ptr, *num_tokens_ptr);
if (n >= 0) return n;
if (n != JSMN_ERROR_NOMEM) goto fail;
if (!(t = realloc(*tokens_ptr, n*sizeof(jsmntok_t))))
memcpy(parser, &saved_parser, sizeof(saved_parser));
jsmn_init(&tmp_parser);
if ((n = jsmn_parse(&tmp_parser, js, len, NULL, 0)) < 0) goto fail;
if (!(t = realloc(*tokens_ptr, (n+1)*sizeof(jsmntok_t))))
return JSMN_ERROR_NOMEM;
}
*tokens_ptr = t;
*num_tokens_ptr = n;
n_save = n;

/* TODO: Instead of resetting the parser, we should continue after
reallocation */
parser->pos = saved_pos;
if ((n = jsmn_parse(parser, js, len, t, n)) < 0) goto fail;
assert(n == n_save);
return n;
Expand Down
7 changes: 3 additions & 4 deletions src/utils/jsmnx.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ void jsmn_init(jsmn_parser *parser);


/**
* Run JSON parser.
*
* It parses a JSON data string into and array of tokens, each
* Parse a JSON data string into and array of tokens, each
* describing a single JSON object.
*
* Arguments
Expand All @@ -60,8 +58,9 @@ void jsmn_init(jsmn_parser *parser);
* Returns
* -------
* On success, it returns the number of tokens actually used by the parser.
* On error, one of the following (negative) codes is returned:
* If `tokens` is NULL, the number of needed will be returned.
*
* On error, one of the following (negative) codes is returned:
* - JSMN_ERROR_INVAL: bad token, JSON string is corrupted
* - JSMN_ERROR_NOMEM: not enough tokens, JSON string is too large
* - JSMN_ERROR_PART: JSON string is too short, expecting more JSON data
Expand Down

0 comments on commit 9f9b999

Please sign in to comment.