From af6fe88e14968a3f6fa308059c4a3b3f82ca70a4 Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Wed, 3 Feb 2021 16:03:23 +0000 Subject: [PATCH 1/2] Implement new macros build and build-1 Main change is that the build macros fix various issues with construct. Most notably build macros: - Fixes #24, by only grouping into sets where it's needed. - Handles optionals better by hiding the presence of _0 unbound variables, and removing any keys that may have them from the results. - Closes #10 by implementing clearer grouping semantics Also there's a small construct perf improvement for non grouping queries due to a faster way to check whether we need to group. The check was previously linear in results, now constant time. --- README.md | 74 ++++++++++++++--- src/grafter/matcha/alpha.clj | 123 ++++++++++++++++++++++++----- test/grafter/matcha/alpha_test.clj | 58 ++++++++++++++ 3 files changed, 222 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index a24e049..9138681 100644 --- a/README.md +++ b/README.md @@ -30,8 +30,8 @@ use Matcha to query this graph locally. dispose of the index. This can lead to poor performance when you want to query the same set of data multiple times. - Construct graph query results directly into clojure datastructures. -- Support for `VALUES` clauses (unlike in SPARQL we do not yet support - binding arbitrary tuples/tables). So we only support the +- Support for `VALUES` clauses (unlike in SPARQL we do not yet support + binding arbitrary tuples/tables). So we only support the `VALUES ?x { ... }` form. - Support for `OPTIONAL`s with SPARQL-like semantics. @@ -48,7 +48,7 @@ Currently there is no support for the following SPARQL-like features: ## Usage Matcha defines some primary query functions `select`, `select-1`, -`construct`, `construct-1` and `ask`. +`build`, `build-1`, `construct`, `construct-1` and `ask`. First lets define an in memory database of triples, in reality this could come from a SPARQL query `CONSTRUCT`, but here we'll just define @@ -69,7 +69,7 @@ URI's, or clojure keywords. [:rick :foaf/knows :martin] [:rick :foaf/knows :katie] [:katie :foaf/knows :julie] - + [:rick :a :foaf/Person] [:katie :a :foaf/Person] [:martin :a :foaf/Person]]) @@ -92,6 +92,52 @@ BGPs have some semantics you need to be aware of: variables. - Other symbols are resolved to their values. +### `build` + +`build` always groups returned solutions into a sequence of clojure +maps, where the subjects are grouped into maps, and the maps are +grouped by their properties. If a property has multiple values they +will be rolled up into a set, otherwise they will be a scalar value. + +Each map returned by `build` typically represents a resource in the +built graph, which is projected into a sequence of maps, with +potentially multi-valued keys. + +It takes a binding for `?subject` of the map, a map form specifying +the projection of other property/value bindings a `bgp` and a +database. + +``` clojure +(build ?person + {:foaf/knows ?friends} + [[?person :foaf/knows ?friends]] + friends-db) + +;; => ({:grafter.rdf/uri :rick, :foaf/knows #{:martin :katie}} +;; {:grafter.rdf/uri :katie, :foaf/knows :julie} +``` + +NOTE: `:foaf/knows` is projected into a set of values for `:rick`, but +a single scalar value for `:katie`. + +The `?subject` is by default associated with the key +`:grafter.rdf/uri`. If you wish to specify this key yourself you can +by providing a key/value pair as the subject: e.g. substituting +?person for `[:id ?person]` changes the return values like so: + +``` clojure +(build [:id ?person] + {:foaf/knows ?friends} + [[?person :foaf/knows ?friends]] + friends-db) +;; => ({:id :rick, :foaf/knows #{:martin :katie}} +;; {:id :katie, :foaf/knows :julie} +``` + +Because `build` knows it is always returning a sequence of maps, it +will remove any keys corresponding to unbound variables introduced +through optionals. This is unlike `construct`. + ### `select` `select` compiles a query from your arguments, that returns results as a @@ -130,10 +176,14 @@ the first solution. ### `construct` -`CONSTRUCT`s are the most powerful query type, as they allow you to -construct arbitrary clojure data structures directly from your query -results, and position the projected query variables where ever you -want within the projected datastructure template. +NOTE: if you're using you `construct` to return maps, you should first +consider using `build` which fixes some issues present in common +`construct` usage. + +`CONSTRUCT`s allow you to construct arbitrary clojure data structures +directly from your query results, and position the projected query +variables where ever you want within the projected datastructure +template. Args: * `construct-pattern`: an arbitrary clojure data structure. Results @@ -213,7 +263,7 @@ You can parameterise Matcha queries simply by adding a lexical binding or wrappi [[person-id :foaf/knows ?friend] [?friend :rdfs/label ?name]])) -(lookup-friends :rick friends-db) +(lookup-friends :rick friends-db) ;; => [{:grafter.rdf/uri :martin, :name "Martin"} ;; {:grafter.rdf/uri :katie, :name "Katie"}] @@ -240,8 +290,8 @@ We support dynamic VALUEs clauses in all query types like so: (select [?name] [(values ?person-id person-ids) [?person-id :rdfs/label ?name]])) - -(lookup-names [:rick :katie] friends-db) ;; => ["Rick", "Katie"] + +(lookup-names [:rick :katie] friends-db) ;; => ["Rick", "Katie"] ``` You can also hardcode the values into the query: @@ -253,7 +303,7 @@ You can also hardcode the values into the query: [?person-id :rdfs/label ?name]])) ``` -Any "flat collection" (i.e. a `sequential?` or a `set?`) is valid +Any "flat collection" (i.e. a `sequential?` or a `set?`) is valid on the right hand side of a `values` binding. ## Performance diff --git a/src/grafter/matcha/alpha.clj b/src/grafter/matcha/alpha.clj index e7eda15..2dfddbd 100644 --- a/src/grafter/matcha/alpha.clj +++ b/src/grafter/matcha/alpha.clj @@ -344,31 +344,112 @@ `(quote ~qv)) query-vars))] (walk/postwalk-replace replacements construct-pattern))) -(defn group-subjects [solutions] - (if-let [subj-maps (seq (filter :grafter.rdf/uri solutions))] +(def ^:private group-predicates-xf + (map (fn [v] + (apply merge-with + (fn [a b] + (cond + (set? a) + (conj a b) + :else + (set [a b]))) + v)))) + +(def ^:private unsetify-grafter-uri + (map (fn [m] + (let [vs (:grafter.rdf/uri m) + v (if (set? vs) + (first vs) + vs)] + (assoc m :grafter.rdf/uri v))))) + +(defn group-subjects-for-construct [construct-pattern solutions] + (if (and (map? construct-pattern) (:grafter.rdf/uri construct-pattern)) (into [] (comp - (map (fn [v] - (apply merge-with - (fn [a b] - (cond - (set? a) - (conj a b) - :else - (set [a b]))) - v))) - (map (fn [m] - (let [vs (:grafter.rdf/uri m) - v (if (set? vs) - (first vs) - vs)] - (assoc m :grafter.rdf/uri v))))) - (vals (group-by :grafter.rdf/uri subj-maps))) + group-predicates-xf + unsetify-grafter-uri) + (vals (group-by :grafter.rdf/uri solutions))) solutions)) +(def ^:private clean-up-subject-map + "Removes any keys with unbound vars as values and flattens any sets + that have just one value into scalars." + (map (fn [e] + (reduce-kv (fn [m k v] + (-> m + (cond-> + (symbol? v) + (dissoc k) + + (and (set? v) (= 1 (count v))) + (assoc k (first v))))) + e + e)))) + +(defn group-subjects-for-build [subject-k solutions] + (into [] + (comp + group-predicates-xf + clean-up-subject-map) + (vals (group-by subject-k solutions)))) + +(defmacro build + "Query a `db-or-idx` with `bgps` patterns, and return data grouped by + subject and predicates into resource object maps. + + `subject` can be either a `?query-var` symbol used in the `bgps` or + a 2-tuple key value pair of `[:keyword ?query-var]`, in which case + `:keyword` will be the key used to identify the subject of the maps + in the response. If only `?query-var` and no `:keyword` is specified + then the default keyword of `:grafter.rdf/uri` is used. + + NOTE: unlike `construct`, `build` will eliminate any unbound + variables from the response maps that may arrise from using an + optional. + + If called with 3 arguments, returns a function of 1 argument: the `db-or-idx`, + which returns a sequence of results in the form of the `construct-pattern`. + + If called with 4 arguments, queries the `db-or-idx` directly, returning a + sequence of results in the form of the `construct-pattern`." + ([subject construct-pattern bgps] + `(fn [db-or-idx#] + (build ~subject + ~construct-pattern ~bgps db-or-idx#))) + ([subject construct-pattern bgps db-or-idx] + (let [[subject-k subject-var] (if (symbol? subject) + [:grafter.rdf/uri subject] + subject) + pvars (cons subject-var (find-vars-in-tree construct-pattern)) + pvarvec (vec pvars)] + `(->> ~(solve* 'build &env pvars bgps db-or-idx) + ;; create a sequence of {?var :value} binding maps for + ;; each solution. + (unify-solutions (quote ~pvarvec)) + (replace-vars-with-vals ~(quote-query-vars pvarvec (merge {subject-k subject-var} + construct-pattern))) + (group-subjects-for-build ~subject-k) + seq)))) + +(defmacro build-1 + "Like `build` but returns only the first resource object. + + NOTE: it is not lazy, so to make this efficient you should be + selective in your `bgps`." + ([subject-kv construct-pattern bgps] + `(fn [db-or-idx#] + (build ~subject-kv + ~construct-pattern ~bgps db-or-idx#))) + ([subject construct-pattern bgps db-or-idx] + `(first (build ~subject ~construct-pattern ~bgps ~db-or-idx)))) + (defmacro construct - "Query a `db-or-idx` with `bgps` patterns, and return data in the form of the - `construct-pattern`. + "NOTE: If you want to construct maps, you will likely be better + using `build` instead. + + Query a `db-or-idx` with `bgps` patterns, and return data in the + form of the `construct-pattern`. If called with 2 arguments, returns a function of 1 argument: the `db-or-idx`, which returns a sequence of results in the form of the `construct-pattern`. @@ -386,7 +467,7 @@ ;; each solution. (unify-solutions (quote ~pvarvec)) (replace-vars-with-vals ~(quote-query-vars pvarvec construct-pattern)) - (group-subjects) + (group-subjects-for-construct (quote ~construct-pattern)) seq)))) (s/def ::construct-pattern any?) diff --git a/test/grafter/matcha/alpha_test.clj b/test/grafter/matcha/alpha_test.clj index 9745217..f7021d5 100644 --- a/test/grafter/matcha/alpha_test.clj +++ b/test/grafter/matcha/alpha_test.clj @@ -579,3 +579,61 @@ (is (valid-syntax? (construct ?s [[nil nil nil]]))))) + +(deftest build-test + (testing "Sugared build" + (let [db [[:s :p :o] + [:s :p2 :o2] + + [:s2 :p :o3]] + + ret (build ?s + {?p ?o} + [[?s ?p ?o]] + db)] + + (is (= #{{:grafter.rdf/uri :s + :p :o + :p2 :o2} + + {:grafter.rdf/uri :s2 + :p :o3}} + (set ret)))) + + (testing "Optionals and predicate grouping" + (let [db [[:s :label "s"] + [:s :label "s another"] + [:s :p2 :o2] + [:s :optional "optional"] + [:s2 :label "s2"] + [:s2 :p2 :o2]] + + ret (build ?s + {:label ?label + :optional ?opt} + + [[?s :label ?label] + (grafter.matcha.alpha/optional [[?s :optional ?opt]])] + db)] + + (is (= #{{:grafter.rdf/uri :s, + :label #{"s" "s another"}, + :optional "optional"} + {:grafter.rdf/uri :s2, :label "s2"}} + + (set ret))))))) + +(deftest build-1-test + (let [db [[:s :p :o] + [:s :p2 :o2] + [:s :p2 :o3] + [:s2 :p :o] + [:s2 :p2 :o2]] + ret (build-1 ?s + {?p ?o} + [(values ?s [:s]) + [?s ?p ?o]] + db)] + (is (= {:grafter.rdf/uri :s, :p2 #{:o3 :o2}, :p :o} + ret)) + )) From a26c3fb82ba2b42e9b2cfecf5634ad76139b714f Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Wed, 10 Mar 2021 12:37:59 +0000 Subject: [PATCH 2/2] Tweak build and build-1 macros to support concrete/ground bindings --- src/grafter/matcha/alpha.clj | 23 ++++++++------ test/grafter/matcha/alpha_test.clj | 51 +++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/src/grafter/matcha/alpha.clj b/src/grafter/matcha/alpha.clj index 2dfddbd..1ea77a5 100644 --- a/src/grafter/matcha/alpha.clj +++ b/src/grafter/matcha/alpha.clj @@ -398,11 +398,13 @@ "Query a `db-or-idx` with `bgps` patterns, and return data grouped by subject and predicates into resource object maps. - `subject` can be either a `?query-var` symbol used in the `bgps` or - a 2-tuple key value pair of `[:keyword ?query-var]`, in which case - `:keyword` will be the key used to identify the subject of the maps - in the response. If only `?query-var` and no `:keyword` is specified - then the default keyword of `:grafter.rdf/uri` is used. + `subject` can be a concrete value, a value bound in lexical scope, a + `?query-var` symbol used in the `bgps` or a 2-tuple key value pair + of `[:keyword ?query-var]` or [:keyword :concrete-value]. If the + vector form is used case `:keyword` will be the key used to identify + the subject of the maps in the response. If only `?query-var` or a + concrete value and no `:keyword` is specified then the default + keyword of `:grafter.rdf/uri` is used. NOTE: unlike `construct`, `build` will eliminate any unbound variables from the response maps that may arrise from using an @@ -418,11 +420,14 @@ (build ~subject ~construct-pattern ~bgps db-or-idx#))) ([subject construct-pattern bgps db-or-idx] - (let [[subject-k subject-var] (if (symbol? subject) - [:grafter.rdf/uri subject] - subject) - pvars (cons subject-var (find-vars-in-tree construct-pattern)) + (let [[subject-k subject-var] (if (vector? subject) + subject + [:grafter.rdf/uri subject]) + pvars (if (query-var? subject-var) + (cons subject-var (find-vars-in-tree construct-pattern)) + (find-vars-in-tree construct-pattern)) pvarvec (vec pvars)] + `(->> ~(solve* 'build &env pvars bgps db-or-idx) ;; create a sequence of {?var :value} binding maps for ;; each solution. diff --git a/test/grafter/matcha/alpha_test.clj b/test/grafter/matcha/alpha_test.clj index f7021d5..5839fde 100644 --- a/test/grafter/matcha/alpha_test.clj +++ b/test/grafter/matcha/alpha_test.clj @@ -581,24 +581,45 @@ [[nil nil nil]]))))) (deftest build-test - (testing "Sugared build" + (testing "build" (let [db [[:s :p :o] [:s :p2 :o2] - [:s2 :p :o3]] - - ret (build ?s - {?p ?o} - [[?s ?p ?o]] - db)] - - (is (= #{{:grafter.rdf/uri :s - :p :o - :p2 :o2} - - {:grafter.rdf/uri :s2 - :p :o3}} - (set ret)))) + [:s2 :p :o3]]] + + (testing "with unbound subject" + (let [ret (build ?s + {?p ?o} + [[?s ?p ?o]] + db)] + (is (= #{{:grafter.rdf/uri :s + :p :o + :p2 :o2} + + {:grafter.rdf/uri :s2 + :p :o3}} + (set ret))))) + + (testing "with bound subject" + (let [subject :s + ret (build subject + {?p ?o} + [[subject ?p ?o]] + db)] + (is (= #{{:grafter.rdf/uri :s + :p :o + :p2 :o2}} + (set ret))))) + + (testing "with hardcoded subject value" + (let [ret (build :s + {?p ?o} + [[:s ?p ?o]] + db)] + (is (= #{{:grafter.rdf/uri :s + :p :o + :p2 :o2}} + (set ret)))))) (testing "Optionals and predicate grouping" (let [db [[:s :label "s"]