-
Notifications
You must be signed in to change notification settings - Fork 5
Semgrex DSL
Simon Gray edited this page Oct 24, 2017
·
1 revision
Experimental Clojure DSL for Semgrex
;; identifiers for nodes are marked in this way
{:text "xyz", :pos "NN", :id 3}
;; alternative syntax
[{} :< :nsubj {}]
[{} :<nsubj {}]
[{} :< {}]
[:< :nsubj {} {}]
(< (! {}) {})
;; identifiers for relations are set in this way (:? is the identifier)
(se [x {:lemma "eat"}
y :?]
(< y {} x))
;; naming of relations and nodes is done inside a vector preceding the pattern definition
;; as a side-effect, the pattern definitions are also simplified
;; so it might even make sense in cases where the named nodes need to be accessed
(se-pattern [x-rel :unknown
see-node {:lemma "see"}
eat-node {:lemma "eat"}]
(> x-rel {} see-node eat-node))
;; {} >=x-rel {lemma:see}=see-node >=x-rel {lemma:eat}=eat-node
;; hmm, maybe not good as x-rel appears twice using (> ...)
(se-pattern [eat {:lemma "eat"}
rel #"nsubj|dobj"
x {}]
"{eat} >rel {x}")
;; base relation function
(relation :dep :nsubj {} {} {})
;; the subject of the phrase (dep of nsubj rel)
(< :nsubj {} {} {})
;; the root of the phrase (gov of nsubj rel)
(> :nsubj {} {} {})
(> :nsubj {} {} {})
;; any dep relation
(< {} {} {})
;; number of nodes can vary (must have at least 2)
(> {} {})
(< {} {} {} {} {} {})
;; nodes can be strings (allow for output of other functions)
(< "!{lemma:have}" {})
(< (not {:lemma "have"}) {})
;; also works with nesting
(< (not (or {:lemma "have"
{:lemma "eat"}})
{}))
(< "!({lemma:have}|{lemma:eat})" {})
;; use vectors to group
(> [{:lemma "thing"} {:lemma "thingy"}] {})
;; optional relations
(?> {} {})
(?< {} {})