|
| 1 | +grammar BELScript; |
| 2 | + |
| 3 | +options { |
| 4 | + language = Java; |
| 5 | + output = AST; |
| 6 | +} |
| 7 | + |
| 8 | +@header { |
| 9 | + package org.openbel.framework.common.bel.parser; |
| 10 | + |
| 11 | + import java.util.List; |
| 12 | + import java.util.ArrayList; |
| 13 | + import java.util.Stack; |
| 14 | + |
| 15 | + import org.openbel.bel.model.BELParseErrorException; |
| 16 | +} |
| 17 | + |
| 18 | +@lexer::header { |
| 19 | + package org.openbel.framework.common.bel.parser; |
| 20 | +} |
| 21 | + |
| 22 | +@members { |
| 23 | + private final List<BELParseErrorException> syntaxErrors = new ArrayList<BELParseErrorException>(); |
| 24 | + private final Stack<String> paraphrases = new Stack<String>(); |
| 25 | + |
| 26 | + public List<BELParseErrorException> getSyntaxErrors() { |
| 27 | + return syntaxErrors; |
| 28 | + } |
| 29 | + |
| 30 | + @Override |
| 31 | + public void emitErrorMessage(String msg) { |
| 32 | + } |
| 33 | + |
| 34 | + @Override |
| 35 | + public void displayRecognitionError(String[] tokenNames, RecognitionException e) { |
| 36 | + String context = ""; |
| 37 | + if (paraphrases.size() > 0) { |
| 38 | + context = paraphrases.peek(); |
| 39 | + } |
| 40 | + syntaxErrors.add(new BELParseErrorException.SyntaxException(e.line, e.charPositionInLine, context, e)); |
| 41 | + } |
| 42 | +} |
| 43 | + |
| 44 | +document: |
| 45 | + (NEWLINE | DOCUMENT_COMMENT | record)+ EOF |
| 46 | + ; |
| 47 | + |
| 48 | +record: |
| 49 | + (define_namespace | define_annotation | set_annotation | set_document | set_statement_group | unset_statement_group | unset | statement) |
| 50 | + ; |
| 51 | + |
| 52 | +set_document |
| 53 | + @init { paraphrases.push("in set document."); } |
| 54 | + @after { paraphrases.pop(); } |
| 55 | + : |
| 56 | + ('SET' DOCUMENT_KEYWORD) document_property '=' (OBJECT_IDENT | vl=VALUE_LIST | quoted_value) |
| 57 | + { |
| 58 | + // https://github.com/OpenBEL/openbel-framework/issues/14 |
| 59 | + if ($vl != null) $vl.setText($vl.getText().replace("\\\\", "\\")); |
| 60 | + } |
| 61 | + ; |
| 62 | + |
| 63 | +set_statement_group |
| 64 | + @init { paraphrases.push("in set statement group."); } |
| 65 | + @after { paraphrases.pop(); } |
| 66 | + : |
| 67 | + 'SET' STATEMENT_GROUP_KEYWORD '=' (quoted_value | OBJECT_IDENT) |
| 68 | + ; |
| 69 | + |
| 70 | +set_annotation |
| 71 | + @init { paraphrases.push("in set annotation."); } |
| 72 | + @after { paraphrases.pop(); } |
| 73 | + : |
| 74 | + 'SET' OBJECT_IDENT '=' (quoted_value | vl=VALUE_LIST | OBJECT_IDENT) |
| 75 | + { |
| 76 | + // https://github.com/OpenBEL/openbel-framework/issues/14 |
| 77 | + if ($vl != null) $vl.setText($vl.getText().replace("\\\\", "\\")); |
| 78 | + } |
| 79 | + ; |
| 80 | + |
| 81 | +unset_statement_group |
| 82 | + @init { paraphrases.push("in unset statement group."); } |
| 83 | + @after { paraphrases.pop(); } |
| 84 | + : |
| 85 | + 'UNSET' STATEMENT_GROUP_KEYWORD |
| 86 | + ; |
| 87 | + |
| 88 | +unset |
| 89 | + @init { paraphrases.push("in unset."); } |
| 90 | + @after { paraphrases.pop(); } |
| 91 | + : |
| 92 | + 'UNSET' (OBJECT_IDENT | IDENT_LIST) |
| 93 | + ; |
| 94 | + |
| 95 | +define_namespace |
| 96 | + @init { paraphrases.push("in define namespace."); } |
| 97 | + @after { paraphrases.pop(); } |
| 98 | + : |
| 99 | + ('DEFINE' (('DEFAULT')? 'NAMESPACE')) OBJECT_IDENT 'AS' 'URL' quoted_value |
| 100 | + ; |
| 101 | + |
| 102 | +define_annotation |
| 103 | + @init { paraphrases.push("in define annotation."); } |
| 104 | + @after { paraphrases.pop(); } |
| 105 | + : |
| 106 | + ('DEFINE' 'ANNOTATION') OBJECT_IDENT 'AS' ((('URL' | 'PATTERN') quoted_value) | ('LIST' vl=VALUE_LIST)) |
| 107 | + { |
| 108 | + // https://github.com/OpenBEL/openbel-framework/issues/14 |
| 109 | + if ($vl != null) $vl.setText($vl.getText().replace("\\\\", "\\")); |
| 110 | + } |
| 111 | + ; |
| 112 | + |
| 113 | +quoted_value |
| 114 | + : qv=QUOTED_VALUE |
| 115 | + { |
| 116 | + // https://github.com/OpenBEL/openbel-framework/issues/14 |
| 117 | + $qv.setText($qv.getText().replace("\\\\", "\\")); |
| 118 | + } |
| 119 | + ; |
| 120 | + |
| 121 | +document_property: |
| 122 | + 'Authors' | |
| 123 | + 'ContactInfo' | |
| 124 | + 'Copyright' | |
| 125 | + 'Description' | |
| 126 | + 'Disclaimer' | |
| 127 | + 'Licenses' | |
| 128 | + 'Name' | |
| 129 | + 'Version' |
| 130 | + ; |
| 131 | + |
| 132 | +statement |
| 133 | + @init { paraphrases.push("in statement."); } |
| 134 | + @after { paraphrases.pop(); } |
| 135 | + : |
| 136 | + outer_term (relationship ((OPEN_PAREN outer_term relationship outer_term CLOSE_PAREN) | outer_term))? STATEMENT_COMMENT? |
| 137 | + ; |
| 138 | + |
| 139 | +outer_term: |
| 140 | + function OPEN_PAREN (','? argument)* CLOSE_PAREN |
| 141 | + ; |
| 142 | + |
| 143 | +argument: |
| 144 | + param | term |
| 145 | + ; |
| 146 | + |
| 147 | +term: |
| 148 | + function OPEN_PAREN (','? (term | param))* CLOSE_PAREN |
| 149 | + ; |
| 150 | + |
| 151 | +/* XXX OBJECT_IDENT is used for namespace value because otherwise parsing will fail using a token like (LETTER | DIGIT)+ */ |
| 152 | +fragment param: |
| 153 | + NS_PREFIX? (OBJECT_IDENT | quoted_value) |
| 154 | + ; |
| 155 | + |
| 156 | +function returns [String r]: |
| 157 | + ( |
| 158 | + fv='proteinAbundance' {$r = $fv.getText();} | |
| 159 | + fv='p' {$r = $fv.getText();} | |
| 160 | + fv='rnaAbundance' {$r = $fv.getText();} | |
| 161 | + fv='r' {$r = $fv.getText();} | |
| 162 | + fv='abundance' {$r = $fv.getText();} | |
| 163 | + fv='a' {$r = $fv.getText();} | |
| 164 | + fv='microRNAAbundance' {$r = $fv.getText();} | |
| 165 | + fv='m' {$r = $fv.getText();} | |
| 166 | + fv='geneAbundance' {$r = $fv.getText();} | |
| 167 | + fv='g' {$r = $fv.getText();} | |
| 168 | + fv='biologicalProcess' {$r = $fv.getText();} | |
| 169 | + fv='bp' {$r = $fv.getText();} | |
| 170 | + fv='pathology' {$r = $fv.getText();} | |
| 171 | + fv='path' {$r = $fv.getText();} | |
| 172 | + fv='complexAbundance' {$r = $fv.getText();} | |
| 173 | + fv='complex' {$r = $fv.getText();} | |
| 174 | + fv='translocation' {$r = $fv.getText();} | |
| 175 | + fv='tloc' {$r = $fv.getText();} | |
| 176 | + fv='cellSecretion' {$r = $fv.getText();} | |
| 177 | + fv='sec' {$r = $fv.getText();} | |
| 178 | + fv='cellSurfaceExpression' {$r = $fv.getText();} | |
| 179 | + fv='surf' {$r = $fv.getText();} | |
| 180 | + fv='reaction' {$r = $fv.getText();} | |
| 181 | + fv='rxn' {$r = $fv.getText();} | |
| 182 | + fv='compositeAbundance' {$r = $fv.getText();} | |
| 183 | + fv='composite' {$r = $fv.getText();} | |
| 184 | + fv='fusion' {$r = $fv.getText();} | |
| 185 | + fv='fus' {$r = $fv.getText();} | |
| 186 | + fv='degradation' {$r = $fv.getText();} | |
| 187 | + fv='deg' {$r = $fv.getText();} | |
| 188 | + fv='molecularActivity' {$r = $fv.getText();} | |
| 189 | + fv='act' {$r = $fv.getText();} | |
| 190 | + fv='catalyticActivity' {$r = $fv.getText();} | |
| 191 | + fv='cat' {$r = $fv.getText();} | |
| 192 | + fv='kinaseActivity' {$r = $fv.getText();} | |
| 193 | + fv='kin' {$r = $fv.getText();} | |
| 194 | + fv='phosphataseActivity' {$r = $fv.getText();} | |
| 195 | + fv='phos' {$r = $fv.getText();} | |
| 196 | + fv='peptidaseActivity' {$r = $fv.getText();} | |
| 197 | + fv='pep' {$r = $fv.getText();} | |
| 198 | + fv='ribosylationActivity' {$r = $fv.getText();} | |
| 199 | + fv='ribo' {$r = $fv.getText();} | |
| 200 | + fv='transcriptionalActivity' {$r = $fv.getText();} | |
| 201 | + fv='tscript' {$r = $fv.getText();} | |
| 202 | + fv='transportActivity' {$r = $fv.getText();} | |
| 203 | + fv='tport' {$r = $fv.getText();} | |
| 204 | + fv='gtpBoundActivity' {$r = $fv.getText();} | |
| 205 | + fv='gtp' {$r = $fv.getText();} | |
| 206 | + fv='chaperoneActivity' {$r = $fv.getText();} | |
| 207 | + fv='chap' {$r = $fv.getText();} | |
| 208 | + fv='proteinModification' {$r = $fv.getText();} | |
| 209 | + fv='pmod' {$r = $fv.getText();} | |
| 210 | + fv='substitution' {$r = $fv.getText();} | |
| 211 | + fv='sub' {$r = $fv.getText();} | |
| 212 | + fv='truncation' {$r = $fv.getText();} | |
| 213 | + fv='trunc' {$r = $fv.getText();} | |
| 214 | + fv='reactants' {$r = $fv.getText();} | |
| 215 | + fv='products' {$r = $fv.getText();} | |
| 216 | + fv='list' {$r = $fv.getText();} |
| 217 | + ) |
| 218 | + ; |
| 219 | + |
| 220 | +relationship returns [String r]: |
| 221 | + ( |
| 222 | + rv='increases' { $r = $rv.getText(); } | |
| 223 | + rv='->' { $r = $rv.getText(); } | |
| 224 | + rv='decreases' { $r = $rv.getText(); } | |
| 225 | + rv='-|' { $r = $rv.getText(); } | |
| 226 | + rv='directlyIncreases' { $r = $rv.getText(); } | |
| 227 | + rv='=>' { $r = $rv.getText(); } | |
| 228 | + rv='directlyDecreases' { $r = $rv.getText(); } | |
| 229 | + rv='=|' { $r = $rv.getText(); } | |
| 230 | + rv='causesNoChange' { $r = $rv.getText(); } | |
| 231 | + rv='positiveCorrelation' { $r = $rv.getText(); } | |
| 232 | + rv='negativeCorrelation' { $r = $rv.getText(); } | |
| 233 | + rv='translatedTo' { $r = $rv.getText(); } | |
| 234 | + rv='>>' { $r = $rv.getText(); } | |
| 235 | + rv='transcribedTo' { $r = $rv.getText(); } | |
| 236 | + rv=':>' { $r = $rv.getText(); } | |
| 237 | + rv='isA' { $r = $rv.getText(); } | |
| 238 | + rv='subProcessOf' { $r = $rv.getText(); } | |
| 239 | + rv='rateLimitingStepOf' { $r = $rv.getText(); } | |
| 240 | + rv='biomarkerFor' { $r = $rv.getText(); } | |
| 241 | + rv='prognosticBiomarkerFor' { $r = $rv.getText(); } | |
| 242 | + rv='orthologous' { $r = $rv.getText(); } | |
| 243 | + rv='analogous' { $r = $rv.getText(); } | |
| 244 | + rv='association' { $r = $rv.getText(); } | |
| 245 | + rv='--' { $r = $rv.getText(); } | |
| 246 | + rv='hasMembers' { $r = $rv.getText(); } | |
| 247 | + rv='hasComponents' { $r = $rv.getText(); } | |
| 248 | + rv='hasMember' { $r = $rv.getText(); } | |
| 249 | + rv='hasComponent' { $r = $rv.getText(); } |
| 250 | + ) |
| 251 | + ; |
| 252 | + |
| 253 | +DOCUMENT_COMMENT: |
| 254 | + '#' ~('\n' | '\r')* {$channel=HIDDEN;} |
| 255 | + ; |
| 256 | + |
| 257 | +STATEMENT_COMMENT: |
| 258 | + '//' (('\\\n') | ('\\\r\n') | ~('\n' | '\r'))* |
| 259 | + ; |
| 260 | + |
| 261 | +DOCUMENT_KEYWORD: |
| 262 | + 'DOCUMENT' |
| 263 | + ; |
| 264 | + |
| 265 | +STATEMENT_GROUP_KEYWORD: |
| 266 | + 'STATEMENT_GROUP' |
| 267 | + ; |
| 268 | + |
| 269 | +IDENT_LIST: |
| 270 | + '{' OBJECT_IDENT (COMMA OBJECT_IDENT)* '}' |
| 271 | + ; |
| 272 | + |
| 273 | +VALUE_LIST: |
| 274 | + '{' (OBJECT_IDENT | QUOTED_VALUE | VALUE_LIST)? (COMMA (OBJECT_IDENT | QUOTED_VALUE | VALUE_LIST)?)* '}' |
| 275 | + ; |
| 276 | + |
| 277 | +OBJECT_IDENT: |
| 278 | + ('_' | LETTER | DIGIT)+ |
| 279 | + ; |
| 280 | + |
| 281 | +QUOTED_VALUE: |
| 282 | + '"' ( EscapeSequence | '\\\n' | '\\\r\n' | ~('\\'|'"') )* '"' |
| 283 | + ; |
| 284 | + |
| 285 | +OPEN_PAREN: |
| 286 | + '(' |
| 287 | + ; |
| 288 | + |
| 289 | +CLOSE_PAREN: |
| 290 | + ')' |
| 291 | + ; |
| 292 | + |
| 293 | +NS_PREFIX: |
| 294 | + LETTER (LETTER | DIGIT)* ':' |
| 295 | + ; |
| 296 | + |
| 297 | +NEWLINE: |
| 298 | + '\u000d'? '\u000a' | '\u000d' |
| 299 | + ; |
| 300 | + |
| 301 | +WS : (' ' | '\t' | '\n' | '\r'| '\f' | '\\\n' | '\\\r\n')+ {$channel = HIDDEN;}; |
| 302 | + |
| 303 | +fragment COMMA: |
| 304 | + ' '* ',' ' '* |
| 305 | + ; |
| 306 | + |
| 307 | +fragment LETTER: |
| 308 | + ('a'..'z' | 'A'..'Z') |
| 309 | + ; |
| 310 | + |
| 311 | +fragment DIGIT: |
| 312 | + '0'..'9' |
| 313 | + ; |
| 314 | + |
| 315 | +fragment EscapeSequence: |
| 316 | + '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') |
| 317 | + | UnicodeEscape |
| 318 | + | OctalEscape |
| 319 | + ; |
| 320 | + |
| 321 | +fragment OctalEscape: |
| 322 | + '\\' ('0'..'3') ('0'..'7') ('0'..'7') |
| 323 | + | '\\' ('0'..'7') ('0'..'7') |
| 324 | + | '\\' ('0'..'7') |
| 325 | + ; |
| 326 | + |
| 327 | +fragment UnicodeEscape: |
| 328 | + '\\' 'u' HexDigit HexDigit HexDigit HexDigit |
| 329 | + ; |
| 330 | + |
| 331 | +fragment HexDigit: |
| 332 | + ('0'..'9'|'a'..'f'|'A'..'F') |
| 333 | + ; |
0 commit comments