diff --git a/version_1.0/ANTLR/BELScript.g b/version_1.0/ANTLR/BELScript.g new file mode 100644 index 0000000..1461d1e --- /dev/null +++ b/version_1.0/ANTLR/BELScript.g @@ -0,0 +1,333 @@ +grammar BELScript; + +options { + language = Java; + output = AST; +} + +@header { + package org.openbel.framework.common.bel.parser; + + import java.util.List; + import java.util.ArrayList; + import java.util.Stack; + + import org.openbel.bel.model.BELParseErrorException; +} + +@lexer::header { + package org.openbel.framework.common.bel.parser; +} + +@members { + private final List syntaxErrors = new ArrayList(); + private final Stack paraphrases = new Stack(); + + public List getSyntaxErrors() { + return syntaxErrors; + } + + @Override + public void emitErrorMessage(String msg) { + } + + @Override + public void displayRecognitionError(String[] tokenNames, RecognitionException e) { + String context = ""; + if (paraphrases.size() > 0) { + context = paraphrases.peek(); + } + syntaxErrors.add(new BELParseErrorException.SyntaxException(e.line, e.charPositionInLine, context, e)); + } +} + +document: + (NEWLINE | DOCUMENT_COMMENT | record)+ EOF + ; + +record: + (define_namespace | define_annotation | set_annotation | set_document | set_statement_group | unset_statement_group | unset | statement) + ; + +set_document + @init { paraphrases.push("in set document."); } + @after { paraphrases.pop(); } + : + ('SET' DOCUMENT_KEYWORD) document_property '=' (OBJECT_IDENT | vl=VALUE_LIST | quoted_value) + { + // https://github.com/OpenBEL/openbel-framework/issues/14 + if ($vl != null) $vl.setText($vl.getText().replace("\\\\", "\\")); + } + ; + +set_statement_group + @init { paraphrases.push("in set statement group."); } + @after { paraphrases.pop(); } + : + 'SET' STATEMENT_GROUP_KEYWORD '=' (quoted_value | OBJECT_IDENT) + ; + +set_annotation + @init { paraphrases.push("in set annotation."); } + @after { paraphrases.pop(); } + : + 'SET' OBJECT_IDENT '=' (quoted_value | vl=VALUE_LIST | OBJECT_IDENT) + { + // https://github.com/OpenBEL/openbel-framework/issues/14 + if ($vl != null) $vl.setText($vl.getText().replace("\\\\", "\\")); + } + ; + +unset_statement_group + @init { paraphrases.push("in unset statement group."); } + @after { paraphrases.pop(); } + : + 'UNSET' STATEMENT_GROUP_KEYWORD + ; + +unset + @init { paraphrases.push("in unset."); } + @after { paraphrases.pop(); } + : + 'UNSET' (OBJECT_IDENT | IDENT_LIST) + ; + +define_namespace + @init { paraphrases.push("in define namespace."); } + @after { paraphrases.pop(); } + : + ('DEFINE' (('DEFAULT')? 'NAMESPACE')) OBJECT_IDENT 'AS' 'URL' quoted_value + ; + +define_annotation + @init { paraphrases.push("in define annotation."); } + @after { paraphrases.pop(); } + : + ('DEFINE' 'ANNOTATION') OBJECT_IDENT 'AS' ((('URL' | 'PATTERN') quoted_value) | ('LIST' vl=VALUE_LIST)) + { + // https://github.com/OpenBEL/openbel-framework/issues/14 + if ($vl != null) $vl.setText($vl.getText().replace("\\\\", "\\")); + } + ; + +quoted_value + : qv=QUOTED_VALUE + { + // https://github.com/OpenBEL/openbel-framework/issues/14 + $qv.setText($qv.getText().replace("\\\\", "\\")); + } + ; + +document_property: + 'Authors' | + 'ContactInfo' | + 'Copyright' | + 'Description' | + 'Disclaimer' | + 'Licenses' | + 'Name' | + 'Version' + ; + +statement + @init { paraphrases.push("in statement."); } + @after { paraphrases.pop(); } + : + outer_term (relationship ((OPEN_PAREN outer_term relationship outer_term CLOSE_PAREN) | outer_term))? STATEMENT_COMMENT? + ; + +outer_term: + function OPEN_PAREN (','? argument)* CLOSE_PAREN + ; + +argument: + param | term + ; + +term: + function OPEN_PAREN (','? (term | param))* CLOSE_PAREN + ; + +/* XXX OBJECT_IDENT is used for namespace value because otherwise parsing will fail using a token like (LETTER | DIGIT)+ */ +fragment param: + NS_PREFIX? (OBJECT_IDENT | quoted_value) + ; + +function returns [String r]: + ( + fv='proteinAbundance' {$r = $fv.getText();} | + fv='p' {$r = $fv.getText();} | + fv='rnaAbundance' {$r = $fv.getText();} | + fv='r' {$r = $fv.getText();} | + fv='abundance' {$r = $fv.getText();} | + fv='a' {$r = $fv.getText();} | + fv='microRNAAbundance' {$r = $fv.getText();} | + fv='m' {$r = $fv.getText();} | + fv='geneAbundance' {$r = $fv.getText();} | + fv='g' {$r = $fv.getText();} | + fv='biologicalProcess' {$r = $fv.getText();} | + fv='bp' {$r = $fv.getText();} | + fv='pathology' {$r = $fv.getText();} | + fv='path' {$r = $fv.getText();} | + fv='complexAbundance' {$r = $fv.getText();} | + fv='complex' {$r = $fv.getText();} | + fv='translocation' {$r = $fv.getText();} | + fv='tloc' {$r = $fv.getText();} | + fv='cellSecretion' {$r = $fv.getText();} | + fv='sec' {$r = $fv.getText();} | + fv='cellSurfaceExpression' {$r = $fv.getText();} | + fv='surf' {$r = $fv.getText();} | + fv='reaction' {$r = $fv.getText();} | + fv='rxn' {$r = $fv.getText();} | + fv='compositeAbundance' {$r = $fv.getText();} | + fv='composite' {$r = $fv.getText();} | + fv='fusion' {$r = $fv.getText();} | + fv='fus' {$r = $fv.getText();} | + fv='degradation' {$r = $fv.getText();} | + fv='deg' {$r = $fv.getText();} | + fv='molecularActivity' {$r = $fv.getText();} | + fv='act' {$r = $fv.getText();} | + fv='catalyticActivity' {$r = $fv.getText();} | + fv='cat' {$r = $fv.getText();} | + fv='kinaseActivity' {$r = $fv.getText();} | + fv='kin' {$r = $fv.getText();} | + fv='phosphataseActivity' {$r = $fv.getText();} | + fv='phos' {$r = $fv.getText();} | + fv='peptidaseActivity' {$r = $fv.getText();} | + fv='pep' {$r = $fv.getText();} | + fv='ribosylationActivity' {$r = $fv.getText();} | + fv='ribo' {$r = $fv.getText();} | + fv='transcriptionalActivity' {$r = $fv.getText();} | + fv='tscript' {$r = $fv.getText();} | + fv='transportActivity' {$r = $fv.getText();} | + fv='tport' {$r = $fv.getText();} | + fv='gtpBoundActivity' {$r = $fv.getText();} | + fv='gtp' {$r = $fv.getText();} | + fv='chaperoneActivity' {$r = $fv.getText();} | + fv='chap' {$r = $fv.getText();} | + fv='proteinModification' {$r = $fv.getText();} | + fv='pmod' {$r = $fv.getText();} | + fv='substitution' {$r = $fv.getText();} | + fv='sub' {$r = $fv.getText();} | + fv='truncation' {$r = $fv.getText();} | + fv='trunc' {$r = $fv.getText();} | + fv='reactants' {$r = $fv.getText();} | + fv='products' {$r = $fv.getText();} | + fv='list' {$r = $fv.getText();} + ) + ; + +relationship returns [String r]: + ( + rv='increases' { $r = $rv.getText(); } | + rv='->' { $r = $rv.getText(); } | + rv='decreases' { $r = $rv.getText(); } | + rv='-|' { $r = $rv.getText(); } | + rv='directlyIncreases' { $r = $rv.getText(); } | + rv='=>' { $r = $rv.getText(); } | + rv='directlyDecreases' { $r = $rv.getText(); } | + rv='=|' { $r = $rv.getText(); } | + rv='causesNoChange' { $r = $rv.getText(); } | + rv='positiveCorrelation' { $r = $rv.getText(); } | + rv='negativeCorrelation' { $r = $rv.getText(); } | + rv='translatedTo' { $r = $rv.getText(); } | + rv='>>' { $r = $rv.getText(); } | + rv='transcribedTo' { $r = $rv.getText(); } | + rv=':>' { $r = $rv.getText(); } | + rv='isA' { $r = $rv.getText(); } | + rv='subProcessOf' { $r = $rv.getText(); } | + rv='rateLimitingStepOf' { $r = $rv.getText(); } | + rv='biomarkerFor' { $r = $rv.getText(); } | + rv='prognosticBiomarkerFor' { $r = $rv.getText(); } | + rv='orthologous' { $r = $rv.getText(); } | + rv='analogous' { $r = $rv.getText(); } | + rv='association' { $r = $rv.getText(); } | + rv='--' { $r = $rv.getText(); } | + rv='hasMembers' { $r = $rv.getText(); } | + rv='hasComponents' { $r = $rv.getText(); } | + rv='hasMember' { $r = $rv.getText(); } | + rv='hasComponent' { $r = $rv.getText(); } + ) + ; + +DOCUMENT_COMMENT: + '#' ~('\n' | '\r')* {$channel=HIDDEN;} + ; + +STATEMENT_COMMENT: + '//' (('\\\n') | ('\\\r\n') | ~('\n' | '\r'))* + ; + +DOCUMENT_KEYWORD: + 'DOCUMENT' + ; + +STATEMENT_GROUP_KEYWORD: + 'STATEMENT_GROUP' + ; + +IDENT_LIST: + '{' OBJECT_IDENT (COMMA OBJECT_IDENT)* '}' + ; + +VALUE_LIST: + '{' (OBJECT_IDENT | QUOTED_VALUE | VALUE_LIST)? (COMMA (OBJECT_IDENT | QUOTED_VALUE | VALUE_LIST)?)* '}' + ; + +OBJECT_IDENT: + ('_' | LETTER | DIGIT)+ + ; + +QUOTED_VALUE: + '"' ( EscapeSequence | '\\\n' | '\\\r\n' | ~('\\'|'"') )* '"' + ; + +OPEN_PAREN: + '(' + ; + +CLOSE_PAREN: + ')' + ; + +NS_PREFIX: + LETTER (LETTER | DIGIT)* ':' + ; + +NEWLINE: + '\u000d'? '\u000a' | '\u000d' + ; + +WS : (' ' | '\t' | '\n' | '\r'| '\f' | '\\\n' | '\\\r\n')+ {$channel = HIDDEN;}; + +fragment COMMA: + ' '* ',' ' '* + ; + +fragment LETTER: + ('a'..'z' | 'A'..'Z') + ; + +fragment DIGIT: + '0'..'9' + ; + +fragment EscapeSequence: + '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') + | UnicodeEscape + | OctalEscape + ; + +fragment OctalEscape: + '\\' ('0'..'3') ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') + ; + +fragment UnicodeEscape: + '\\' 'u' HexDigit HexDigit HexDigit HexDigit + ; + +fragment HexDigit: + ('0'..'9'|'a'..'f'|'A'..'F') + ; diff --git a/version_1.0/ANTLR/BELScriptWalker.g b/version_1.0/ANTLR/BELScriptWalker.g new file mode 100644 index 0000000..b201a21 --- /dev/null +++ b/version_1.0/ANTLR/BELScriptWalker.g @@ -0,0 +1,584 @@ +tree grammar BELScriptWalker; + +options { + language = Java; + tokenVocab=BELScript; + ASTLabelType=CommonTree; + output=AST; +} + +@header { + package org.openbel.framework.common.bel.parser; + + import java.text.ParseException; + + import java.text.SimpleDateFormat; + import java.util.Set; + import java.util.Map; + import java.util.HashMap; + import java.util.LinkedHashMap; + import java.util.LinkedHashSet; + import java.util.Arrays; + import java.util.List; + import java.util.ArrayList; + import java.util.Date; + + import org.openbel.bel.model.BELDocument; + import org.openbel.bel.model.BELDocumentHeader; + import org.openbel.bel.model.BELAnnotation; + import org.openbel.bel.model.BELCitation; + import org.openbel.bel.model.BELEvidence; + import org.openbel.bel.model.BELAnnotationDefinition; + import org.openbel.bel.model.BELAnnotationType; + import org.openbel.bel.model.BELNamespaceDefinition; + import org.openbel.bel.model.BELDocumentProperty; + import org.openbel.bel.model.BELStatement; + import org.openbel.bel.model.BELStatementGroup; + import org.openbel.bel.model.BELParseErrorException; + import org.openbel.bel.model.BELParseWarningException; + import org.openbel.bel.model.BELParseErrorException.DefineAnnotationBeforeUsageException; + import org.openbel.bel.model.BELParseErrorException.SetDocumentPropertiesFirstException; + import org.openbel.bel.model.BELParseErrorException.DocumentNameException; + import org.openbel.bel.model.BELParseErrorException.DocumentDescriptionException; + import org.openbel.bel.model.BELParseErrorException.DocumentVersionException; + import org.openbel.bel.model.BELParseErrorException.UnsetDocumentPropertiesException; + import org.openbel.bel.model.BELParseWarningException.UnsetUndefinedAnnotationException; + import org.openbel.bel.model.BELParseErrorException.NamespaceUndefinedException; + import org.openbel.bel.model.BELParseErrorException.InvalidCitationException; + import org.openbel.bel.model.BELParseErrorException.InvalidEvidenceException; + import org.openbel.framework.common.enums.FunctionEnum; +} + +@members { + private final Map docprop = new HashMap(); + private int lastDocumentPropertyLocation = 0; + private final Set adlist = new LinkedHashSet(); + private final Map definedAnnotations = new LinkedHashMap(); + private final Map definedNamespaces = new LinkedHashMap(); + private final Set nslist = new LinkedHashSet(); + + private BELStatementGroup activeStatementGroup; + private BELStatementGroup documentStatementGroup = new BELStatementGroup(); + private List statementGroups = new ArrayList(); + + private final Map sgAnnotationContext = new LinkedHashMap(); + private final Map annotationContext = new LinkedHashMap(); + private BELCitation citationContext; + private BELEvidence evidenceContext; + + private final List stmtlist = new ArrayList(); + private final List syntaxErrors = new ArrayList(); + private final List syntaxWarnings = new ArrayList(); + + private static final SimpleDateFormat iso8601DateFormat = new SimpleDateFormat("yyyy-MM-dd"); + + public List getSyntaxErrors() { + return syntaxErrors; + } + + public List getSyntaxWarnings() { + return syntaxWarnings; + } + + @Override + public void emitErrorMessage(String msg) { + } + + public void addError(BELParseErrorException e) { + syntaxErrors.add(e); + } + + public void addWarning(BELParseWarningException e) { + syntaxWarnings.add(e); + } +} + +document returns [BELDocument doc]: + (NEWLINE | DOCUMENT_COMMENT | record)+ EOF { + if (!docprop.containsKey(BELDocumentProperty.NAME)) { + addError(new DocumentNameException(lastDocumentPropertyLocation, 0)); + } else if (!docprop.containsKey(BELDocumentProperty.DESCRIPTION)) { + addError(new DocumentDescriptionException(lastDocumentPropertyLocation, 0)); + } else if (!docprop.containsKey(BELDocumentProperty.VERSION)) { + addError(new DocumentVersionException(lastDocumentPropertyLocation, 0)); + } else { + if (documentStatementGroup.getStatements().isEmpty()) { + // statements are only contained in explicitly-defined statement groups + $doc = new BELDocument(BELDocumentHeader.create(docprop), adlist, nslist, statementGroups); + } else { + // statements are defined in the implicit document statement group and possibly child statement groups + documentStatementGroup.setChildStatementGroups(statementGroups); + $doc = new BELDocument(BELDocumentHeader.create(docprop), adlist, nslist, Arrays.asList(documentStatementGroup)); + } + } + } + ; + +record: + (define_namespace | define_annotation | set_annotation | set_document | set_statement_group | unset_statement_group | unset | statement) + ; + +set_document: + ('SET' dkt=DOCUMENT_KEYWORD) prop=document_property '=' (qv=QUOTED_VALUE | oi=OBJECT_IDENT) { + if (!annotationContext.isEmpty() || !stmtlist.isEmpty()) { + addError(new SetDocumentPropertiesFirstException(dkt.getLine(), dkt.getCharPositionInLine())); + } + + final String keywordValue; + if ($qv != null) { + keywordValue = $qv.toString(); + } else if ($oi != null) { + keywordValue = $oi.toString(); + } else { + throw new IllegalStateException("Did not understand document keyword value, expecting quoted value or object identifier."); + } + + docprop.put(prop.r, keywordValue); + lastDocumentPropertyLocation = dkt.getLine(); + } + ; + +set_statement_group: + 'SET' STATEMENT_GROUP_KEYWORD '=' (qv=QUOTED_VALUE | oi=OBJECT_IDENT) { + final String name; + if (qv != null) { + name = qv.toString(); + } else if (oi != null) { + name = oi.toString(); + } else { + throw new IllegalStateException("Did not understand statement group value, expecting quoted value or object identifier."); + } + + activeStatementGroup = new BELStatementGroup(name); + statementGroups.add(activeStatementGroup); + } + ; + +set_annotation: + 'SET' an=OBJECT_IDENT '=' (qv=QUOTED_VALUE | list=VALUE_LIST | oi=OBJECT_IDENT) { + final String name = an.getText(); + + BELAnnotationDefinition ad = definedAnnotations.get(name); + if (ad != null) { + // read annotation value + final BELAnnotation annotation; + if (qv != null) { + annotation = new BELAnnotation(ad, qv.getText()); + } else if (oi != null) { + annotation = new BELAnnotation(ad, oi.getText()); + } else { + if (list == null) { + throw new IllegalStateException("Did not understand annotation value, expecting annotation list form."); + } + + String listvalues = list.getText(); + listvalues = listvalues.substring(1, listvalues.length() - 1); + annotation = new BELAnnotation(ad, Arrays.asList(ParserUtil.parseListRecord(listvalues))); + } + + if (activeStatementGroup != null) { + // add to local statement group scope + sgAnnotationContext.put(name, annotation); + } else { + // add to main statement group scope + annotationContext.put(name, annotation); + } + } else if (!name.equals("Citation") && !name.equals("Evidence")) { + // throw if annotation is not defined and it's not the intrinsics: Citation or EvidenceLine + addError(new DefineAnnotationBeforeUsageException(an.getLine(), an.getCharPositionInLine())); + } + + if (name.equals("Citation")) { + // redefinition of citation so clear out citation context + citationContext = null; + + if (list == null) { + addError(new InvalidCitationException(an.getLine(), an.getCharPositionInLine())); + } else { + String listvalues = list.getText(); + String[] tokens = ParserUtil.parseListRecord(listvalues); + + String type = null; + String cname = null; + String reference = null; + Date publicationDate = null; + String[] authors = null; + String comment = null; + + // (required) parse type + if (tokens.length > 0 && tokens[0] != null) { + type = tokens[0]; + if (!("Book".equals(type) || "Journal".equals(type) || "Online Resource".equals(type) || "Other".equals(type) || "PubMed".equals(type))) { + addError(new InvalidCitationException(an.getLine(), an.getCharPositionInLine())); + } + } else { + addError(new InvalidCitationException(an.getLine(), an.getCharPositionInLine())); + } + + // (required) parse name + if (tokens.length > 1 && tokens[1] != null) { + if ("".equals(tokens[1].trim())) { + addError(new InvalidCitationException(an.getLine(), an.getCharPositionInLine())); + } else { + cname = tokens[1]; + } + } else { + addError(new InvalidCitationException(an.getLine(), an.getCharPositionInLine())); + } + + // (required) parse reference + if (tokens.length > 2 && tokens[2] != null) { + if ("".equals(tokens[2].trim())) { + addError(new InvalidCitationException(an.getLine(), an.getCharPositionInLine())); + } else { + reference = tokens[2]; + } + } + + // (optional) parse date of publication + if (tokens.length > 3 && tokens[3] != null) { + if (!"".equals(tokens[3].trim())) { + try { + publicationDate = iso8601DateFormat.parse(tokens[3]); + } catch (ParseException e) { + addError(new InvalidCitationException(an.getLine(), an.getCharPositionInLine())); + } + + if (publicationDate == null) { + addError(new InvalidCitationException(an.getLine(), an.getCharPositionInLine())); + } + } + } + + // (optional) parse authors + if (tokens.length > 4 && tokens[4] != null) { + authors = ParserUtil.parseValueSeparated(tokens[4]); + } + + // (optional) parse comments + if (tokens.length > 5 && tokens[5] != null) { + comment = tokens[5]; + } + + citationContext = new BELCitation(type, cname, publicationDate, reference, authors == null ? null : Arrays.asList(authors), comment); + } + } else if (name.equals("Evidence")) { + // redefinition of evidence so clear out evidence context + evidenceContext = null; + + if (qv == null || "".equals(qv.getText().trim())) { + addError(new InvalidEvidenceException(an.getLine(), an.getCharPositionInLine())); + } else { + evidenceContext = new BELEvidence(qv.getText()); + } + } + } + ; + +unset_statement_group: + 'UNSET' STATEMENT_GROUP_KEYWORD { + activeStatementGroup = null; + sgAnnotationContext.clear(); + } + ; + +unset: + 'UNSET' (an=OBJECT_IDENT | list=IDENT_LIST) { + if (an != null) { + String annotationName = an.getText(); + if ("ALL".equals(annotationName)) { + if (activeStatementGroup == null) + annotationContext.clear(); + else + sgAnnotationContext.clear(); + } else if (definedAnnotations.containsKey(annotationName)) { + if (activeStatementGroup == null) + annotationContext.remove(annotationName); + else + sgAnnotationContext.remove(annotationName); + } else if (docprop.containsKey(BELDocumentProperty.getDocumentProperty(annotationName))) { + addError(new UnsetDocumentPropertiesException(an.getLine(), an.getCharPositionInLine())); + } else { + addWarning(new UnsetUndefinedAnnotationException(an.getLine(), an.getCharPositionInLine())); + } + } + } + ; + +define_namespace: + ('DEFINE' ((isdefault='DEFAULT')? 'NAMESPACE')) name=OBJECT_IDENT 'AS' 'URL' rloc=QUOTED_VALUE { + final String nametext = $name.getText(); + final String rloctext = $rloc.getText(); + + BELNamespaceDefinition belnsd; + if (isdefault != null) { + belnsd = new BELNamespaceDefinition(nametext, rloctext, true); + } else { + belnsd = new BELNamespaceDefinition(nametext, rloctext, false); + } + + nslist.add(belnsd); + definedNamespaces.put(nametext, belnsd); + } + ; + +define_annotation: + ('DEFINE' 'ANNOTATION') name=OBJECT_IDENT 'AS' (((type='URL' | type='PATTERN') value=QUOTED_VALUE) | (type='LIST' value=VALUE_LIST)) { + final String nametext = $name.getText(); + + if (type != null && value != null) { + final String typetext = type.toString(); + String valuetext = value.toString(); + BELAnnotationType atype; + + BELAnnotationDefinition ad; + if ("URL".equals(typetext)) { + atype = BELAnnotationType.URL; + ad = new BELAnnotationDefinition(nametext, atype, valuetext); + } else if ("PATTERN".equals(typetext)) { + atype = BELAnnotationType.PATTERN; + ad = new BELAnnotationDefinition(nametext, atype, valuetext); + } else { + atype = BELAnnotationType.LIST; + valuetext = valuetext.substring(1, valuetext.length() - 1); + ad = new BELAnnotationDefinition(nametext, atype, Arrays.asList(ParserUtil.parseListRecord(valuetext))); + } + + adlist.add(ad); + definedAnnotations.put(nametext, ad); + } + } + ; + +document_property returns [BELDocumentProperty r]: + pv='Authors' {$r = BELDocumentProperty.getDocumentProperty($pv.getText());} | + pv='ContactInfo' {$r = BELDocumentProperty.getDocumentProperty($pv.getText());} | + pv='Copyright' {$r = BELDocumentProperty.getDocumentProperty($pv.getText());} | + pv='Description' {$r = BELDocumentProperty.getDocumentProperty($pv.getText());} | + pv='Disclaimer' {$r = BELDocumentProperty.getDocumentProperty($pv.getText());} | + pv='Licenses' {$r = BELDocumentProperty.getDocumentProperty($pv.getText());} | + pv='Name' {$r = BELDocumentProperty.getDocumentProperty($pv.getText());} | + pv='Version' {$r = BELDocumentProperty.getDocumentProperty($pv.getText());} + ; + +statement: + st=outer_term (rel=relationship ((OPEN_PAREN nst=outer_term nrel=relationship not=outer_term CLOSE_PAREN) | ot=outer_term))? comment=STATEMENT_COMMENT? { + final StringBuilder stmtBuilder = new StringBuilder(); + stmtBuilder.append(st.r); + + if (rel != null) { + stmtBuilder.append(" ").append(rel.r); + + if (ot != null) { + stmtBuilder.append(" ").append(ot.r); + } else { + stmtBuilder.append("("); + + if (nst != null && nrel != null && not != null) { + stmtBuilder.append(nst.r).append(" ").append(nrel.r).append(" ").append(not.r); + } + stmtBuilder.append(")"); + } + } + + String commentText = null; + if (comment != null) { + commentText = comment.getText(); + } + + // build effective annotations from main statement group context and then local statement group context, if any + final Map effectiveAnnotations = new LinkedHashMap(annotationContext); + if (activeStatementGroup != null) { + effectiveAnnotations.putAll(sgAnnotationContext); + } + + final List annotations = new ArrayList(effectiveAnnotations.values()); + + // build statement and keep track of it for validation purposes + final BELStatement stmt = new BELStatement(stmtBuilder.toString(), annotations, citationContext, evidenceContext, commentText); + stmtlist.add(stmt); + + // add statement to scoped statement group + if (activeStatementGroup != null) { + activeStatementGroup.getStatements().add(stmt); + } else { + documentStatementGroup.getStatements().add(stmt); + } + } + ; + +outer_term returns [String r] +@init { + final StringBuilder tBuilder = new StringBuilder(); +} +: + f=function { + tBuilder.append(f.r); + } op=OPEN_PAREN { + tBuilder.append(op.getText()); + } (c=','? { + if (c != null) { + tBuilder.append(c.getText()); + } + } a=argument { + tBuilder.append(a.r); + })* cp=CLOSE_PAREN { + tBuilder.append(cp.getText()); + $r = tBuilder.toString(); + } + ; + +argument returns [String r]: + p=param { + $r = p.r; + } | + t=term { + $r = t.r; + } + ; + +term returns [String r] +@init { + final StringBuilder termBuilder = new StringBuilder(); +} +: + f=function { + termBuilder.append(f.r); + } op=OPEN_PAREN { + termBuilder.append(op.getText()); + }(c=','? { + if (c != null) { + termBuilder.append(c.getText()); + } + } (t=term { + termBuilder.append(t.r); + } | p=param { + termBuilder.append(p.r); + }))* cp=CLOSE_PAREN { + termBuilder.append(cp.getText()); + $r = termBuilder.toString(); + } + ; + +/* XXX OBJECT_IDENT is used for namespace value because otherwise parsing will fail using a token like (LETTER | DIGIT)+ */ +fragment param returns [String r]: + nsp=NS_PREFIX? (id=OBJECT_IDENT | quo=QUOTED_VALUE) { + final StringBuilder pBuilder = new StringBuilder(); + + if (nsp != null) { + String prefix = nsp.getText(); + if (!definedNamespaces.containsKey(prefix.substring(0, prefix.length() - 1))) { + addError(new NamespaceUndefinedException(nsp.getLine(), nsp.getCharPositionInLine())); + } + + pBuilder.append(prefix); + } + + if (id != null) { + pBuilder.append(id.getText()); + } + + if (quo != null) { + pBuilder.append(quo.getText()); + } + + $r = pBuilder.toString(); + } + ; + +function returns [String r]: + ( + fv='proteinAbundance' { $r = $fv.getText(); } | + fv='p' { $r = $fv.getText(); } | + fv='rnaAbundance' { $r = $fv.getText(); } | + fv='r' { $r = $fv.getText(); } | + fv='abundance' { $r = $fv.getText(); } | + fv='a' { $r = $fv.getText(); } | + fv='microRNAAbundance' { $r = $fv.getText(); } | + fv='m' { $r = $fv.getText(); } | + fv='geneAbundance' { $r = $fv.getText(); } | + fv='g' { $r = $fv.getText(); } | + fv='biologicalProcess' { $r = $fv.getText(); } | + fv='bp' { $r = $fv.getText(); } | + fv='pathology' { $r = $fv.getText(); } | + fv='path' { $r = $fv.getText(); } | + fv='complexAbundance' { $r = $fv.getText(); } | + fv='complex' { $r = $fv.getText(); } | + fv='translocation' { $r = $fv.getText(); } | + fv='tloc' { $r = $fv.getText(); } | + fv='cellSecretion' { $r = $fv.getText(); } | + fv='sec' { $r = $fv.getText(); } | + fv='cellSurfaceExpression' { $r = $fv.getText(); } | + fv='surf' { $r = $fv.getText(); } | + fv='reaction' { $r = $fv.getText(); } | + fv='rxn' { $r = $fv.getText(); } | + fv='compositeAbundance' { $r = $fv.getText(); } | + fv='composite' { $r = $fv.getText(); } | + fv='fusion' { $r = $fv.getText(); } | + fv='fus' { $r = $fv.getText(); } | + fv='degradation' { $r = $fv.getText(); } | + fv='deg' { $r = $fv.getText(); } | + fv='molecularActivity' { $r = $fv.getText(); } | + fv='act' { $r = $fv.getText(); } | + fv='catalyticActivity' { $r = $fv.getText(); } | + fv='cat' { $r = $fv.getText(); } | + fv='kinaseActivity' { $r = $fv.getText(); } | + fv='kin' { $r = $fv.getText(); } | + fv='phosphataseActivity' { $r = $fv.getText(); } | + fv='phos' { $r = $fv.getText(); } | + fv='peptidaseActivity' { $r = $fv.getText(); } | + fv='pep' { $r = $fv.getText(); } | + fv='ribosylationActivity' { $r = $fv.getText(); } | + fv='ribo' { $r = $fv.getText(); } | + fv='transcriptionalActivity' { $r = $fv.getText(); } | + fv='tscript' { $r = $fv.getText(); } | + fv='transportActivity' { $r = $fv.getText(); } | + fv='tport' { $r = $fv.getText(); } | + fv='gtpBoundActivity' { $r = $fv.getText(); } | + fv='gtp' { $r = $fv.getText(); } | + fv='chaperoneActivity' { $r = $fv.getText(); } | + fv='chap' { $r = $fv.getText(); } | + fv='proteinModification' { $r = $fv.getText(); } | + fv='pmod' { $r = $fv.getText(); } | + fv='substitution' { $r = $fv.getText(); } | + fv='sub' { $r = $fv.getText(); } | + fv='truncation' { $r = $fv.getText(); } | + fv='trunc' { $r = $fv.getText(); } | + fv='reactants' { $r = $fv.getText(); } | + fv='products' { $r = $fv.getText(); } | + fv='list' { $r = $fv.getText(); } + ) + ; + +relationship returns [String r]: + ( + rv='increases' { $r = $rv.getText(); } | + rv='->' { $r = $rv.getText(); } | + rv='decreases' { $r = $rv.getText(); } | + rv='-|' { $r = $rv.getText(); } | + rv='directlyIncreases' { $r = $rv.getText(); } | + rv='=>' { $r = $rv.getText(); } | + rv='directlyDecreases' { $r = $rv.getText(); } | + rv='=|' { $r = $rv.getText(); } | + rv='causesNoChange' { $r = $rv.getText(); } | + rv='positiveCorrelation' { $r = $rv.getText(); } | + rv='negativeCorrelation' { $r = $rv.getText(); } | + rv='translatedTo' { $r = $rv.getText(); } | + rv='>>' { $r = $rv.getText(); } | + rv='transcribedTo' { $r = $rv.getText(); } | + rv=':>' { $r = $rv.getText(); } | + rv='isA' { $r = $rv.getText(); } | + rv='subProcessOf' { $r = $rv.getText(); } | + rv='rateLimitingStepOf' { $r = $rv.getText(); } | + rv='biomarkerFor' { $r = $rv.getText(); } | + rv='prognosticBiomarkerFor' { $r = $rv.getText(); } | + rv='orthologous' { $r = $rv.getText(); } | + rv='analogous' { $r = $rv.getText(); } | + rv='association' { $r = $rv.getText(); } | + rv='--' { $r = $rv.getText(); } | + rv='hasMembers' { $r = $rv.getText(); } | + rv='hasComponents' { $r = $rv.getText(); } | + rv='hasMember' { $r = $rv.getText(); } | + rv='hasComponent' { $r = $rv.getText(); } + ) + ; diff --git a/version_1.0/ANTLR/BELScriptWalker_v1.g b/version_1.0/ANTLR/BELScriptWalker_v1.g new file mode 100644 index 0000000..a1108b9 --- /dev/null +++ b/version_1.0/ANTLR/BELScriptWalker_v1.g @@ -0,0 +1,223 @@ +tree grammar BELScriptWalker_v1; + +options { + // Target language for ANTLR to generate code in (defaults to Java). + language = Java; + + // Controls the data structure the recognizer will generated. + // (AST for abstract syntax trees or template for StringTemplate templates) + output = AST; + + // Import the vocabulary from our combined grammar. + tokenVocab=BELScript_v1; + + // Tree types. + ASTLabelType=CommonTree; +} + +// Introduce imaginary tokens. These tokens are not associated w/ any +// of our input but make nice root nodes for our abstract syntax tree. +tokens { + // Defines document authors. + AUTHDEF; + // Defines document contact info. + CONTACTDEF; + // Defines document copyright. + COPYDEF; + // Defines document description. + DESCDEF; + // Defines document disclaimer. + DISCDEF; + // Defines document licenses. + LICDEF; + // Defines document name. + NAMEDEF; + // Defines document version. + VERSIONDEF; +} + +document + : ^(DOCDEF record+) + ; + +record + : define_namespace + | define_annotation + | set_annotation + | set_document + | set_statement_group + | unset_statement_group + | unset + | statement + ; + +set_doc_expr + : KWRD_SET WS* KWRD_DOCUMENT WS* + ; + +set_document + : ^(DOCSET_QV document_property QUOTED_VALUE) + | ^(DOCSET_LIST document_property VALUE_LIST) + | ^(DOCSET_ID document_property OBJECT_IDENT) + ; + +set_statement_group + : ^(SG_SET_QV QUOTED_VALUE) + | ^(SG_SET_ID OBJECT_IDENT) + ; + +set_annotation + : ^(ANNO_SET_QV OBJECT_IDENT QUOTED_VALUE) + | ^(ANNO_SET_LIST OBJECT_IDENT VALUE_LIST) + | ^(ANNO_SET_ID OBJECT_IDENT OBJECT_IDENT) + ; + +unset_statement_group + : UNSET_SG + ; + +unset + : ^(UNSET_ID OBJECT_IDENT) + | ^(UNSET_ID_LIST IDENT_LIST) + ; + +define_namespace + : ^(DFLT_NSDEF OBJECT_IDENT QUOTED_VALUE) + | ^(NSDEF OBJECT_IDENT QUOTED_VALUE) + ; + +define_anno_expr + : KWRD_DEFINE WS* KWRD_ANNO WS* + ; + +define_annotation + : ^(ANNO_DEF_LIST OBJECT_IDENT VALUE_LIST) + | ^(ANNO_DEF_URL OBJECT_IDENT QUOTED_VALUE) + ; + +document_property + : KWRD_AUTHORS -> AUTHDEF + | KWRD_CONTACTINFO -> CONTACTDEF + | KWRD_COPYRIGHT -> COPYDEF + | KWRD_DESC -> DESCDEF + | KWRD_DISCLAIMER -> DISCDEF + | KWRD_LICENSES -> LICDEF + | KWRD_NAME -> NAMEDEF + | KWRD_VERSION -> VERSIONDEF + ; + +argument + : term + | param + ; + +term + : ^(TERMDEF function argument*) + ; + +statement + : ^(STMTDEF STATEMENT_COMMENT? term (relationship term (relationship term)?)?) + ; + +ns_prefix + : OBJECT_IDENT + ; + +param + : ^(PARAM_DEF_ID ns_prefix? OBJECT_IDENT) + | ^(PARAM_DEF_QV ns_prefix? QUOTED_VALUE) + ; + +function returns [String r] + : fv='proteinAbundance' {$r = "p";} + | fv='p' {$r = "p";} + | fv='rnaAbundance' {$r = "r";} + | fv='r' {$r = "r";} + | fv='abundance' {$r = "a";} + | fv='a' {$r = "a";} + | fv='microRNAAbundance' {$r = "m";} + | fv='m' {$r = "m";} + | fv='geneAbundance' {$r = "g";} + | fv='g' {$r = "g";} + | fv='biologicalProcess' {$r = "bp";} + | fv='bp' {$r = "bp";} + | fv='pathology' {$r = "path";} + | fv='path' {$r = "path";} + | fv='complexAbundance' {$r = "complex";} + | fv='complex' {$r = "complex";} + | fv='translocation' {$r = "tloc";} + | fv='tloc' {$r = "tloc";} + | fv='cellSecretion' {$r = "sec";} + | fv='sec' {$r = "sec";} + | fv='cellSurfaceExpression' {$r = "surf";} + | fv='surf' {$r = "surf";} + | fv='reaction' {$r = "rxn";} + | fv='rxn' {$r = "rxn";} + | fv='compositeAbundance' {$r = "composite";} + | fv='composite' {$r = "composite";} + | fv='fusion' {$r = "fus";} + | fv='fus' {$r = "fus";} + | fv='degradation' {$r = "deg";} + | fv='deg' {$r = "deg";} + | fv='molecularActivity' {$r = "act";} + | fv='act' {$r = "act";} + | fv='catalyticActivity' {$r = "cat";} + | fv='cat' {$r = "cat";} + | fv='kinaseActivity' {$r = "kin";} + | fv='kin' {$r = "kin";} + | fv='phosphataseActivity' {$r = "phos";} + | fv='phos' {$r = "phos";} + | fv='peptidaseActivity' {$r = "pep";} + | fv='pep' {$r = "pep";} + | fv='ribosylationActivity' {$r = "ribo";} + | fv='ribo' {$r = "ribo";} + | fv='transcriptionalActivity' {$r = "tscript";} + | fv='tscript' {$r = "tscript";} + | fv='transportActivity' {$r = "tport";} + | fv='tport' {$r = "tport";} + | fv='gtpBoundActivity' {$r = "gtp";} + | fv='gtp' {$r = "gtp";} + | fv='chaperoneActivity' {$r = "chap";} + | fv='chap' {$r = "chap";} + | fv='proteinModification' {$r = "pmod";} + | fv='pmod' {$r = "pmod";} + | fv='substitution' {$r = "sub";} + | fv='sub' {$r = "sub";} + | fv='truncation' {$r = "trunc";} + | fv='trunc' {$r = "trunc";} + | fv='reactants' {$r = "reactants";} + | fv='products' {$r = "products";} + | fv='list' {$r = "list";} + ; + +relationship returns [String r] + : rv='increases' { $r = "increases";} + | rv='->' { $r = "increases";} + | rv='decreases' { $r = "decreases";} + | rv='-|' { $r = "decreases";} + | rv='directlyIncreases' { $r = "directlyIncreases";} + | rv='=>' { $r = "directlyIncreases";} + | rv='directlyDecreases' { $r = "directlyDecreases";} + | rv='=|' { $r = "directlyDecreases";} + | rv='causesNoChange' { $r = "causesNoChange";} + | rv='positiveCorrelation' { $r = "positiveCorrelation";} + | rv='negativeCorrelation' { $r = "negativeCorrelation";} + | rv='translatedTo' { $r = "translatedTo";} + | rv='>>' { $r = "translatedTo";} + | rv='transcribedTo' { $r = "transcribedTo";} + | rv=':>' { $r = "transcribedTo";} + | rv='isA' { $r = "isA";} + | rv='subProcessOf' { $r = "subProcessOf";} + | rv='rateLimitingStepOf' { $r = "rateLimitingStepOf";} + | rv='biomarkerFor' { $r = "biomarkerFor";} + | rv='prognosticBiomarkerFor' { $r = "prognosticBiomarkerFor";} + | rv='orthologous' { $r = "orthologous";} + | rv='analogous' { $r = "analogous";} + | rv='association' { $r = "association";} + | rv='--' { $r = "association";} + | rv='hasMembers' { $r = "hasMembers";} + | rv='hasComponents' { $r = "hasComponents";} + | rv='hasMember' { $r = "hasMember";} + | rv='hasComponent' { $r = "hasComponent";} + ; + diff --git a/version_1.0/ANTLR/BELScript_C_v1.g b/version_1.0/ANTLR/BELScript_C_v1.g new file mode 100644 index 0000000..86dd652 --- /dev/null +++ b/version_1.0/ANTLR/BELScript_C_v1.g @@ -0,0 +1,426 @@ +grammar BELScript_C_v1; + +options { + // Target language for ANTLR to generate code in (defaults to Java). + language = C; + + // Controls the data structure the recognizer will generated. + // (AST for abstract syntax trees or template for StringTemplate templates) + output = AST; +} + +// Introduce imaginary tokens. These tokens are not associated w/ any +// of our input but make nice root nodes for our abstract syntax tree. +tokens { + + // DOCSET a quoted value. + DOCSET_QV; + // DOCSET a list of values. + DOCSET_LIST; + // DOCSET an identifier. + DOCSET_ID; + + // SG_SET a quoted value. + SG_SET_QV; + // SG_SET an identifier. + SG_SET_ID; + + // ANNO_SET a quoted value. + ANNO_SET_QV; + // ANNO_SET a list of values. + ANNO_SET_LIST; + // ANNO_SET an identifier. + ANNO_SET_ID; + + // ANNO_DEF a list. + ANNO_DEF_LIST; + // ANNO_DEF a URL. + ANNO_DEF_URL; + // ANNO_DEF a pattern. + ANNO_DEF_PTRN; + + // PARAM_DEF a quoted value. + PARAM_DEF_QV; + // PARAM_DEF an identifier. + PARAM_DEF_ID; + + // UNSET a statement group. + UNSET_SG; + + // UNSET an identifier. + UNSET_ID; + // UNSET a list of identifiers. + UNSET_ID_LIST; + + // Defines a document. + DOCDEF; + // Defines a namespace. + NSDEF; + // Defines the default namespace. + DFLT_NSDEF; + // Defines a term. + TERMDEF; + // Defines a statement. + STMTDEF; +} + +document + : (NEWLINE | DOCUMENT_COMMENT | record)+ EOF -> + ^(DOCDEF record+) + ; + +record + : define_namespace + | define_annotation + | set_annotation + | set_document + | set_statement_group + | unset_statement_group + | unset + | statement + ; + +set_doc_expr + : KWRD_SET WS* KWRD_DOCUMENT WS* + ; + +set_document + : set_doc_expr document_property eq_clause val=QUOTED_VALUE -> + ^(DOCSET_QV document_property $val) + | set_doc_expr document_property eq_clause val=VALUE_LIST -> + ^(DOCSET_LIST document_property $val) + | set_doc_expr document_property eq_clause val=OBJECT_IDENT -> + ^(DOCSET_ID document_property $val) + ; + +set_sg_expr + : KWRD_SET WS* KWRD_STMT_GROUP + ; + +set_statement_group + : set_sg_expr eq_clause val=QUOTED_VALUE -> ^(SG_SET_QV $val) + | set_sg_expr eq_clause val=OBJECT_IDENT -> ^(SG_SET_ID $val) + ; + +set_annotation + : KWRD_SET OBJECT_IDENT eq_clause val=QUOTED_VALUE -> + ^(ANNO_SET_QV OBJECT_IDENT $val) + | KWRD_SET OBJECT_IDENT eq_clause val=VALUE_LIST -> + ^(ANNO_SET_LIST OBJECT_IDENT $val) + | KWRD_SET OBJECT_IDENT eq_clause val=OBJECT_IDENT -> + ^(ANNO_SET_ID OBJECT_IDENT $val) + ; + +unset_statement_group + : KWRD_UNSET KWRD_STMT_GROUP -> ^(UNSET_SG) + ; + +unset + : KWRD_UNSET val=OBJECT_IDENT -> ^(UNSET_ID $val) + | KWRD_UNSET val=IDENT_LIST -> ^(UNSET_ID_LIST $val) + ; + +define_namespace + : KWRD_DEFINE KWRD_DFLT KWRD_NS OBJECT_IDENT KWRD_AS KWRD_URL QUOTED_VALUE -> + ^(DFLT_NSDEF OBJECT_IDENT QUOTED_VALUE) + | KWRD_DEFINE KWRD_NS OBJECT_IDENT KWRD_AS KWRD_URL QUOTED_VALUE -> + ^(NSDEF OBJECT_IDENT QUOTED_VALUE) + ; + +define_anno_expr + : KWRD_DEFINE WS* KWRD_ANNO WS* + ; + +define_annotation + : define_anno_expr OBJECT_IDENT KWRD_AS KWRD_LIST val=VALUE_LIST -> + ^(ANNO_DEF_LIST OBJECT_IDENT $val) + | define_anno_expr OBJECT_IDENT KWRD_AS KWRD_URL val=QUOTED_VALUE -> + ^(ANNO_DEF_URL OBJECT_IDENT $val) + | define_anno_expr OBJECT_IDENT KWRD_AS KWRD_PATTERN val=QUOTED_VALUE -> + ^(ANNO_DEF_PTRN OBJECT_IDENT $val) + ; + +document_property + : KWRD_AUTHORS + | KWRD_CONTACTINFO + | KWRD_COPYRIGHT + | KWRD_DESC + | KWRD_DISCLAIMER + | KWRD_LICENSES + | KWRD_NAME + | KWRD_VERSION + ; + +argument + : COMMA? term -> term + | COMMA? param -> param + ; + +term + : function LP (argument)* RP -> + ^(TERMDEF function argument*) + ; + +// The rewrite rules for statement could be better captured using semantic +// predicates to pick the correct rewrite rule at runtime. +statement + : subject=term (rel=relationship (LP obj_sub=term obj_rel=relationship obj_obj=term RP | obj=term))? comment=STATEMENT_COMMENT? -> + ^(STMTDEF $comment? $subject $rel? $obj? $obj_sub? $obj_rel? $obj_obj?) + ; + +ns_prefix + : OBJECT_IDENT COLON! + ; + +param + : ns_prefix? OBJECT_IDENT -> ^(PARAM_DEF_ID ns_prefix? OBJECT_IDENT) + | ns_prefix? QUOTED_VALUE -> ^(PARAM_DEF_QV ns_prefix? QUOTED_VALUE) + ; + +function returns [char *f] + : fv='proteinAbundance' { retval.f = "proteinAbundance"; } + | fv='p' { retval.f = "p"; } + | fv='rnaAbundance' { retval.f = "r"; } + | fv='r' { retval.f = "r"; } + | fv='abundance' { retval.f = "a"; } + | fv='a' { retval.f = "a"; } + | fv='microRNAAbundance' { retval.f = "m"; } + | fv='m' { retval.f = "m"; } + | fv='geneAbundance' { retval.f = "g"; } + | fv='g' { retval.f = "g"; } + | fv='biologicalProcess' { retval.f = "bp"; } + | fv='bp' { retval.f = "bp"; } + | fv='pathology' { retval.f = "path"; } + | fv='path' { retval.f = "path"; } + | fv='complexAbundance' { retval.f = "complex"; } + | fv='complex' { retval.f = "complex"; } + | fv='translocation' { retval.f = "tloc"; } + | fv='tloc' { retval.f = "tloc"; } + | fv='cellSecretion' { retval.f = "sec"; } + | fv='sec' { retval.f = "sec"; } + | fv='cellSurfaceExpression' { retval.f = "surf"; } + | fv='surf' { retval.f = "surf"; } + | fv='reaction' { retval.f = "rxn"; } + | fv='rxn' { retval.f = "rxn"; } + | fv='compositeAbundance' { retval.f = "composite"; } + | fv='composite' { retval.f = "composite"; } + | fv='fusion' { retval.f = "fus"; } + | fv='fus' { retval.f = "fus"; } + | fv='degradation' { retval.f = "deg"; } + | fv='deg' { retval.f = "deg"; } + | fv='molecularActivity' { retval.f = "act"; } + | fv='act' { retval.f = "act"; } + | fv='catalyticActivity' { retval.f = "cat"; } + | fv='cat' { retval.f = "cat"; } + | fv='kinaseActivity' { retval.f = "kin"; } + | fv='kin' { retval.f = "kin"; } + | fv='phosphataseActivity' { retval.f = "phos"; } + | fv='phos' { retval.f = "phos"; } + | fv='peptidaseActivity' { retval.f = "pep"; } + | fv='pep' { retval.f = "pep"; } + | fv='ribosylationActivity' { retval.f = "ribo"; } + | fv='ribo' { retval.f = "ribo"; } + | fv='transcriptionalActivity' { retval.f = "tscript"; } + | fv='tscript' { retval.f = "tscript"; } + | fv='transportActivity' { retval.f = "tport"; } + | fv='tport' { retval.f = "tport"; } + | fv='gtpBoundActivity' { retval.f = "gtp"; } + | fv='gtp' { retval.f = "gtp"; } + | fv='chaperoneActivity' { retval.f = "chap"; } + | fv='chap' { retval.f = "chap"; } + | fv='proteinModification' { retval.f = "pmod"; } + | fv='pmod' { retval.f = "pmod"; } + | fv='substitution' { retval.f = "sub"; } + | fv='sub' { retval.f = "sub"; } + | fv='truncation' { retval.f = "trunc"; } + | fv='trunc' { retval.f = "trunc"; } + | fv='reactants' { retval.f = "reactants"; } + | fv='products' { retval.f = "products"; } + | fv='list' { retval.f = "list"; } + ; + +relationship returns [char *r] + : rv='increases' { retval.r = "increases";} + | rv='->' { retval.r = "increases";} + | rv='decreases' { retval.r = "decreases";} + | rv='-|' { retval.r = "decreases";} + | rv='directlyIncreases' { retval.r = "directlyIncreases";} + | rv='=>' { retval.r = "directlyIncreases";} + | rv='directlyDecreases' { retval.r = "directlyDecreases";} + | rv='=|' { retval.r = "directlyDecreases";} + | rv='causesNoChange' { retval.r = "causesNoChange";} + | rv='positiveCorrelation' { retval.r = "positiveCorrelation";} + | rv='negativeCorrelation' { retval.r = "negativeCorrelation";} + | rv='translatedTo' { retval.r = "translatedTo";} + | rv='>>' { retval.r = "translatedTo";} + | rv='transcribedTo' { retval.r = "transcribedTo";} + | rv=':>' { retval.r = "transcribedTo";} + | rv='isA' { retval.r = "isA";} + | rv='subProcessOf' { retval.r = "subProcessOf";} + | rv='rateLimitingStepOf' { retval.r = "rateLimitingStepOf";} + | rv='biomarkerFor' { retval.r = "biomarkerFor";} + | rv='prognosticBiomarkerFor' { retval.r = "prognosticBiomarkerFor";} + | rv='orthologous' { retval.r = "orthologous";} + | rv='analogous' { retval.r = "analogous";} + | rv='association' { retval.r = "association";} + | rv='--' { retval.r = "association";} + | rv='hasMembers' { retval.r = "hasMembers";} + | rv='hasComponents' { retval.r = "hasComponents";} + | rv='hasMember' { retval.r = "hasMember";} + | rv='hasComponent' { retval.r = "hasComponent";} + ; + +eq_clause + : WS* EQ WS* + ; + +DOCUMENT_COMMENT + : '#' ~('\n' | '\r')* + ; + +STATEMENT_COMMENT + : '//' (('\\\n') | ('\\\r\n') | ~('\n' | '\r'))* + ; + +IDENT_LIST + : '{' OBJECT_IDENT (COMMA OBJECT_IDENT)* '}' + ; + +VALUE_LIST + : '{' (OBJECT_IDENT | QUOTED_VALUE | VALUE_LIST)? (COMMA (' ')* (OBJECT_IDENT | QUOTED_VALUE | VALUE_LIST)?)* '}' + ; + +QUOTED_VALUE + : '"' ( ESCAPE_SEQUENCE | '\\\n' | '\\\r\n' | ~('\\'|'"') )* '"' + ; + +LP: '('; +RP: ')'; +EQ: '='; +COLON: ':'; +COMMA: ','; + +NEWLINE + : '\u000d'? '\u000a' | '\u000d' + ; + +WS: (' ' | '\t' | '\n' | '\r'| '\f' | '\\\n' | '\\\r\n')+ { skip(); }; + +// Start of BELScript keywords - case insensitive tokens. + +KWRD_ANNO + : ('A'|'a')('N'|'n')('N'|'n')('O'|'o')('T'|'t')('A'|'a')('T'|'t')('I'|'i')('O'|'o')('N'|'n') + ; + +KWRD_AS + : ('A'|'a')('S'|'s') + ; + +KWRD_AUTHORS + : ('A'|'a')('U'|'u')('T'|'t')('H'|'h')('O'|'o')('R'|'r')('S'|'s') + ; + +KWRD_CONTACTINFO + : ('C'|'c')('O'|'o')('N'|'n')('T'|'t')('A'|'a')('C'|'c')('T'|'t')('I'|'i')('N'|'n')('F'|'f')('O'|'o') + ; + +KWRD_COPYRIGHT + : ('C'|'c')('O'|'o')('P'|'p')('Y'|'y')('R'|'r')('I'|'i')('G'|'g')('H'|'h')('T'|'t') + ; + +KWRD_DFLT + : ('D'|'d')('E'|'e')('F'|'f')('A'|'a')('U'|'u')('L'|'l')('T'|'t') + ; + +KWRD_DEFINE + : ('D'|'d')('E'|'e')('F'|'f')('I'|'i')('N'|'n')('E'|'e') + ; + +KWRD_DESC + : ('D'|'d')('E'|'e')('S'|'s')('C'|'c')('R'|'r')('I'|'i')('P'|'p')('T'|'t')('I'|'i')('O'|'o')('N'|'n') + ; + +KWRD_DISCLAIMER + : ('D'|'d')('I'|'i')('S'|'s')('C'|'c')('L'|'l')('A'|'a')('I'|'i')('M'|'m')('E'|'e')('R'|'r') + ; + +KWRD_DOCUMENT + : ('D'|'d')('O'|'o')('C'|'c')('U'|'u')('M'|'m')('E'|'e')('N'|'n')('T'|'t') + ; + +KWRD_LICENSES + : ('L'|'l')('I'|'i')('C'|'c')('E'|'e')('N'|'n')('S'|'s')('E'|'e')('S'|'s') + ; + +KWRD_LIST + : ('L'|'l')('I'|'i')('S'|'s')('T'|'t') + ; + +KWRD_NAME + : ('N'|'n')('A'|'a')('M'|'m')('E'|'e') + ; + +KWRD_NS + : ('N'|'n')('A'|'a')('M'|'m')('E'|'e')('S'|'s')('P'|'p')('A'|'a')('C'|'c')('E'|'e') + ; + +KWRD_PATTERN + : ('P'|'p')('A'|'a')('T'|'t')('T'|'t')('E'|'e')('R'|'r')('N'|'n') + ; + +KWRD_SET + : ('S'|'s')('E'|'e')('T'|'t') + ; + +KWRD_STMT_GROUP + : ('S'|'s')('T'|'t')('A'|'a')('T'|'t')('E'|'e')('M'|'m')('E'|'e')('N'|'n')('T'|'t')('_')('G'|'g')('R'|'r')('O'|'o')('U'|'u')('P'|'p') + ; + +KWRD_UNSET + : ('U'|'u')('N'|'n')('S'|'s')('E'|'e')('T'|'t') + ; + +KWRD_URL + : ('U'|'u')('R'|'r')('L'|'l') + ; + +KWRD_VERSION + : ('V'|'v')('E'|'e')('R'|'r')('S'|'s')('I'|'i')('O'|'o')('N'|'n') + ; + +OBJECT_IDENT + : ('_' | LETTER | DIGIT)+ + ; + +// A select few fragment rules. If used in excess, fragment rules may impact +// performance. Fragment lexer rules tell ANTLR that a rule will only be called +// by other rules, and to not yield a token to the parser. + +fragment LETTER + : ('a'..'z' | 'A'..'Z') + ; + +fragment DIGIT + : '0'..'9' + ; + +fragment ESCAPE_SEQUENCE + : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') + | UNICODE_ESCAPE + | OCTAL_ESCAPE + ; + +fragment OCTAL_ESCAPE + : '\\' ('0'..'3') ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') + ; + +fragment UNICODE_ESCAPE + : '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT + ; + +fragment HEX_DIGIT + : ('0'..'9'|'a'..'f'|'A'..'F') + ; + diff --git a/version_1.0/ANTLR/BELScript_Python_v1.g b/version_1.0/ANTLR/BELScript_Python_v1.g new file mode 100644 index 0000000..3776753 --- /dev/null +++ b/version_1.0/ANTLR/BELScript_Python_v1.g @@ -0,0 +1,426 @@ +grammar BELScript_Python_v1; + +options { + // Target language for ANTLR to generate code in (defaults to Java). + language = Python; + + // Controls the data structure the recognizer will generated. + // (AST for abstract syntax trees or template for StringTemplate templates) + output = AST; +} + +// Introduce imaginary tokens. These tokens are not associated w/ any +// of our input but make nice root nodes for our abstract syntax tree. +tokens { + + // DOCSET a quoted value. + DOCSET_QV; + // DOCSET a list of values. + DOCSET_LIST; + // DOCSET an identifier. + DOCSET_ID; + + // SG_SET a quoted value. + SG_SET_QV; + // SG_SET an identifier. + SG_SET_ID; + + // ANNO_SET a quoted value. + ANNO_SET_QV; + // ANNO_SET a list of values. + ANNO_SET_LIST; + // ANNO_SET an identifier. + ANNO_SET_ID; + + // ANNO_DEF a list. + ANNO_DEF_LIST; + // ANNO_DEF a URL. + ANNO_DEF_URL; + // ANNO_DEF a pattern. + ANNO_DEF_PTRN; + + // PARAM_DEF a quoted value. + PARAM_DEF_QV; + // PARAM_DEF an identifier. + PARAM_DEF_ID; + + // UNSET a statement group. + UNSET_SG; + + // UNSET an identifier. + UNSET_ID; + // UNSET a list of identifiers. + UNSET_ID_LIST; + + // Defines a document. + DOCDEF; + // Defines a namespace. + NSDEF; + // Defines the default namespace. + DFLT_NSDEF; + // Defines a term. + TERMDEF; + // Defines a statement. + STMTDEF; +} + +document + : (NEWLINE | DOCUMENT_COMMENT | record)+ EOF -> + ^(DOCDEF record+) + ; + +record + : define_namespace + | define_annotation + | set_annotation + | set_document + | set_statement_group + | unset_statement_group + | unset + | statement + ; + +set_doc_expr + : KWRD_SET WS* KWRD_DOCUMENT WS* + ; + +set_document + : set_doc_expr document_property eq_clause val=QUOTED_VALUE -> + ^(DOCSET_QV document_property $val) + | set_doc_expr document_property eq_clause val=VALUE_LIST -> + ^(DOCSET_LIST document_property $val) + | set_doc_expr document_property eq_clause val=OBJECT_IDENT -> + ^(DOCSET_ID document_property $val) + ; + +set_sg_expr + : KWRD_SET WS* KWRD_STMT_GROUP + ; + +set_statement_group + : set_sg_expr eq_clause val=QUOTED_VALUE -> ^(SG_SET_QV $val) + | set_sg_expr eq_clause val=OBJECT_IDENT -> ^(SG_SET_ID $val) + ; + +set_annotation + : KWRD_SET OBJECT_IDENT eq_clause val=QUOTED_VALUE -> + ^(ANNO_SET_QV OBJECT_IDENT $val) + | KWRD_SET OBJECT_IDENT eq_clause val=VALUE_LIST -> + ^(ANNO_SET_LIST OBJECT_IDENT $val) + | KWRD_SET OBJECT_IDENT eq_clause val=OBJECT_IDENT -> + ^(ANNO_SET_ID OBJECT_IDENT $val) + ; + +unset_statement_group + : KWRD_UNSET KWRD_STMT_GROUP -> ^(UNSET_SG) + ; + +unset + : KWRD_UNSET val=OBJECT_IDENT -> ^(UNSET_ID $val) + | KWRD_UNSET val=IDENT_LIST -> ^(UNSET_ID_LIST $val) + ; + +define_namespace + : KWRD_DEFINE KWRD_DFLT KWRD_NS OBJECT_IDENT KWRD_AS KWRD_URL QUOTED_VALUE -> + ^(DFLT_NSDEF OBJECT_IDENT QUOTED_VALUE) + | KWRD_DEFINE KWRD_NS OBJECT_IDENT KWRD_AS KWRD_URL QUOTED_VALUE -> + ^(NSDEF OBJECT_IDENT QUOTED_VALUE) + ; + +define_anno_expr + : KWRD_DEFINE WS* KWRD_ANNO WS* + ; + +define_annotation + : define_anno_expr OBJECT_IDENT KWRD_AS KWRD_LIST val=VALUE_LIST -> + ^(ANNO_DEF_LIST OBJECT_IDENT $val) + | define_anno_expr OBJECT_IDENT KWRD_AS KWRD_URL val=QUOTED_VALUE -> + ^(ANNO_DEF_URL OBJECT_IDENT $val) + | define_anno_expr OBJECT_IDENT KWRD_AS KWRD_PATTERN val=QUOTED_VALUE -> + ^(ANNO_DEF_PTRN OBJECT_IDENT $val) + ; + +document_property + : KWRD_AUTHORS + | KWRD_CONTACTINFO + | KWRD_COPYRIGHT + | KWRD_DESC + | KWRD_DISCLAIMER + | KWRD_LICENSES + | KWRD_NAME + | KWRD_VERSION + ; + +argument + : COMMA? term -> term + | COMMA? param -> param + ; + +term + : function LP (argument)* RP -> + ^(TERMDEF function argument*) + ; + +// The rewrite rules for statement could be better captured using semantic +// predicates to pick the correct rewrite rule at runtime. +statement + : subject=term (rel=relationship (LP obj_sub=term obj_rel=relationship obj_obj=term RP | obj=term))? comment=STATEMENT_COMMENT? -> + ^(STMTDEF $comment? $subject $rel? $obj? $obj_sub? $obj_rel? $obj_obj?) + ; + +ns_prefix + : OBJECT_IDENT COLON! + ; + +param + : ns_prefix? OBJECT_IDENT -> ^(PARAM_DEF_ID ns_prefix? OBJECT_IDENT) + | ns_prefix? QUOTED_VALUE -> ^(PARAM_DEF_QV ns_prefix? QUOTED_VALUE) + ; + +function returns [r] + : fv='proteinAbundance' { $r = "p"; } + | fv='p' { $r = "p"; } + | fv='rnaAbundance' { $r = "r"; } + | fv='r' { $r = "r"; } + | fv='abundance' { $r = "a"; } + | fv='a' { $r = "a"; } + | fv='microRNAAbundance' { $r = "m"; } + | fv='m' { $r = "m"; } + | fv='geneAbundance' { $r = "g"; } + | fv='g' { $r = "g"; } + | fv='biologicalProcess' { $r = "bp"; } + | fv='bp' { $r = "bp"; } + | fv='pathology' { $r = "path"; } + | fv='path' { $r = "path"; } + | fv='complexAbundance' { $r = "complex"; } + | fv='complex' { $r = "complex"; } + | fv='translocation' { $r = "tloc"; } + | fv='tloc' { $r = "tloc"; } + | fv='cellSecretion' { $r = "sec"; } + | fv='sec' { $r = "sec"; } + | fv='cellSurfaceExpression' { $r = "surf"; } + | fv='surf' { $r = "surf"; } + | fv='reaction' { $r = "rxn"; } + | fv='rxn' { $r = "rxn"; } + | fv='compositeAbundance' { $r = "composite"; } + | fv='composite' { $r = "composite"; } + | fv='fusion' { $r = "fus"; } + | fv='fus' { $r = "fus"; } + | fv='degradation' { $r = "deg"; } + | fv='deg' { $r = "deg"; } + | fv='molecularActivity' { $r = "act"; } + | fv='act' { $r = "act"; } + | fv='catalyticActivity' { $r = "cat"; } + | fv='cat' { $r = "cat"; } + | fv='kinaseActivity' { $r = "kin"; } + | fv='kin' { $r = "kin"; } + | fv='phosphataseActivity' { $r = "phos"; } + | fv='phos' { $r = "phos"; } + | fv='peptidaseActivity' { $r = "pep"; } + | fv='pep' { $r = "pep"; } + | fv='ribosylationActivity' { $r = "ribo"; } + | fv='ribo' { $r = "ribo"; } + | fv='transcriptionalActivity' { $r = "tscript"; } + | fv='tscript' { $r = "tscript"; } + | fv='transportActivity' { $r = "tport"; } + | fv='tport' { $r = "tport"; } + | fv='gtpBoundActivity' { $r = "gtp"; } + | fv='gtp' { $r = "gtp"; } + | fv='chaperoneActivity' { $r = "chap"; } + | fv='chap' { $r = "chap"; } + | fv='proteinModification' { $r = "pmod"; } + | fv='pmod' { $r = "pmod"; } + | fv='substitution' { $r = "sub"; } + | fv='sub' { $r = "sub"; } + | fv='truncation' { $r = "trunc"; } + | fv='trunc' { $r = "trunc"; } + | fv='reactants' { $r = "reactants"; } + | fv='products' { $r = "products"; } + | fv='list' { $r = "list"; } + ; + +relationship returns [r] + : rv='increases' { $r = "increases"; } + | rv='->' { $r = "increases"; } + | rv='decreases' { $r = "decreases"; } + | rv='-|' { $r = "decreases"; } + | rv='directlyIncreases' { $r = "directlyIncreases"; } + | rv='=>' { $r = "directlyIncreases"; } + | rv='directlyDecreases' { $r = "directlyDecreases"; } + | rv='=|' { $r = "directlyDecreases"; } + | rv='causesNoChange' { $r = "causesNoChange"; } + | rv='positiveCorrelation' { $r = "positiveCorrelation"; } + | rv='negativeCorrelation' { $r = "negativeCorrelation"; } + | rv='translatedTo' { $r = "translatedTo"; } + | rv='>>' { $r = "translatedTo"; } + | rv='transcribedTo' { $r = "transcribedTo"; } + | rv=':>' { $r = "transcribedTo"; } + | rv='isA' { $r = "isA"; } + | rv='subProcessOf' { $r = "subProcessOf"; } + | rv='rateLimitingStepOf' { $r = "rateLimitingStepOf"; } + | rv='biomarkerFor' { $r = "biomarkerFor"; } + | rv='prognosticBiomarkerFor' { $r = "prognosticBiomarkerFor"; } + | rv='orthologous' { $r = "orthologous"; } + | rv='analogous' { $r = "analogous"; } + | rv='association' { $r = "association"; } + | rv='--' { $r = "association"; } + | rv='hasMembers' { $r = "hasMembers"; } + | rv='hasComponents' { $r = "hasComponents"; } + | rv='hasMember' { $r = "hasMember"; } + | rv='hasComponent' { $r = "hasComponent"; } + ; + +eq_clause + : WS* EQ WS* + ; + +DOCUMENT_COMMENT + : '#' ~('\n' | '\r')* + ; + +STATEMENT_COMMENT + : '//' (('\\\n') | ('\\\r\n') | ~('\n' | '\r'))* + ; + +IDENT_LIST + : '{' OBJECT_IDENT (COMMA OBJECT_IDENT)* '}' + ; + +VALUE_LIST + : '{' (OBJECT_IDENT | QUOTED_VALUE | VALUE_LIST)? (COMMA (' ')* (OBJECT_IDENT | QUOTED_VALUE | VALUE_LIST)?)* '}' + ; + +QUOTED_VALUE + : '"' ( ESCAPE_SEQUENCE | '\\\n' | '\\\r\n' | ~('\\'|'"') )* '"' + ; + +LP: '('; +RP: ')'; +EQ: '='; +COLON: ':'; +COMMA: ','; + +NEWLINE + : '\u000d'? '\u000a' | '\u000d' + ; + +WS: (' ' | '\t' | '\n' | '\r'| '\f' | '\\\n' | '\\\r\n')+ { skip() }; + +// Start of BELScript keywords - case insensitive tokens. + +KWRD_ANNO + : ('A'|'a')('N'|'n')('N'|'n')('O'|'o')('T'|'t')('A'|'a')('T'|'t')('I'|'i')('O'|'o')('N'|'n') + ; + +KWRD_AS + : ('A'|'a')('S'|'s') + ; + +KWRD_AUTHORS + : ('A'|'a')('U'|'u')('T'|'t')('H'|'h')('O'|'o')('R'|'r')('S'|'s') + ; + +KWRD_CONTACTINFO + : ('C'|'c')('O'|'o')('N'|'n')('T'|'t')('A'|'a')('C'|'c')('T'|'t')('I'|'i')('N'|'n')('F'|'f')('O'|'o') + ; + +KWRD_COPYRIGHT + : ('C'|'c')('O'|'o')('P'|'p')('Y'|'y')('R'|'r')('I'|'i')('G'|'g')('H'|'h')('T'|'t') + ; + +KWRD_DFLT + : ('D'|'d')('E'|'e')('F'|'f')('A'|'a')('U'|'u')('L'|'l')('T'|'t') + ; + +KWRD_DEFINE + : ('D'|'d')('E'|'e')('F'|'f')('I'|'i')('N'|'n')('E'|'e') + ; + +KWRD_DESC + : ('D'|'d')('E'|'e')('S'|'s')('C'|'c')('R'|'r')('I'|'i')('P'|'p')('T'|'t')('I'|'i')('O'|'o')('N'|'n') + ; + +KWRD_DISCLAIMER + : ('D'|'d')('I'|'i')('S'|'s')('C'|'c')('L'|'l')('A'|'a')('I'|'i')('M'|'m')('E'|'e')('R'|'r') + ; + +KWRD_DOCUMENT + : ('D'|'d')('O'|'o')('C'|'c')('U'|'u')('M'|'m')('E'|'e')('N'|'n')('T'|'t') + ; + +KWRD_LICENSES + : ('L'|'l')('I'|'i')('C'|'c')('E'|'e')('N'|'n')('S'|'s')('E'|'e')('S'|'s') + ; + +KWRD_LIST + : ('L'|'l')('I'|'i')('S'|'s')('T'|'t') + ; + +KWRD_NAME + : ('N'|'n')('A'|'a')('M'|'m')('E'|'e') + ; + +KWRD_NS + : ('N'|'n')('A'|'a')('M'|'m')('E'|'e')('S'|'s')('P'|'p')('A'|'a')('C'|'c')('E'|'e') + ; + +KWRD_PATTERN + : ('P'|'p')('A'|'a')('T'|'t')('T'|'t')('E'|'e')('R'|'r')('N'|'n') + ; + +KWRD_SET + : ('S'|'s')('E'|'e')('T'|'t') + ; + +KWRD_STMT_GROUP + : ('S'|'s')('T'|'t')('A'|'a')('T'|'t')('E'|'e')('M'|'m')('E'|'e')('N'|'n')('T'|'t')('_')('G'|'g')('R'|'r')('O'|'o')('U'|'u')('P'|'p') + ; + +KWRD_UNSET + : ('U'|'u')('N'|'n')('S'|'s')('E'|'e')('T'|'t') + ; + +KWRD_URL + : ('U'|'u')('R'|'r')('L'|'l') + ; + +KWRD_VERSION + : ('V'|'v')('E'|'e')('R'|'r')('S'|'s')('I'|'i')('O'|'o')('N'|'n') + ; + +OBJECT_IDENT + : ('_' | LETTER | DIGIT)+ + ; + +// A select few fragment rules. If used in excess, fragment rules may impact +// performance. Fragment lexer rules tell ANTLR that a rule will only be called +// by other rules, and to not yield a token to the parser. + +fragment LETTER + : ('a'..'z' | 'A'..'Z') + ; + +fragment DIGIT + : '0'..'9' + ; + +fragment ESCAPE_SEQUENCE + : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') + | UNICODE_ESCAPE + | OCTAL_ESCAPE + ; + +fragment OCTAL_ESCAPE + : '\\' ('0'..'3') ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') + ; + +fragment UNICODE_ESCAPE + : '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT + ; + +fragment HEX_DIGIT + : ('0'..'9'|'a'..'f'|'A'..'F') + ; + diff --git a/version_1.0/ANTLR/BELScript_v1.g b/version_1.0/ANTLR/BELScript_v1.g new file mode 100644 index 0000000..f8de213 --- /dev/null +++ b/version_1.0/ANTLR/BELScript_v1.g @@ -0,0 +1,426 @@ +grammar BELScript_v1; + +options { + // Target language for ANTLR to generate code in (defaults to Java). + language = Java; + + // Controls the data structure the recognizer will generated. + // (AST for abstract syntax trees or template for StringTemplate templates) + output = AST; +} + +// Introduce imaginary tokens. These tokens are not associated w/ any +// of our input but make nice root nodes for our abstract syntax tree. +tokens { + + // DOCSET a quoted value. + DOCSET_QV; + // DOCSET a list of values. + DOCSET_LIST; + // DOCSET an identifier. + DOCSET_ID; + + // SG_SET a quoted value. + SG_SET_QV; + // SG_SET an identifier. + SG_SET_ID; + + // ANNO_SET a quoted value. + ANNO_SET_QV; + // ANNO_SET a list of values. + ANNO_SET_LIST; + // ANNO_SET an identifier. + ANNO_SET_ID; + + // ANNO_DEF a list. + ANNO_DEF_LIST; + // ANNO_DEF a URL. + ANNO_DEF_URL; + // ANNO_DEF a pattern. + ANNO_DEF_PTRN; + + // PARAM_DEF a quoted value. + PARAM_DEF_QV; + // PARAM_DEF an identifier. + PARAM_DEF_ID; + + // UNSET a statement group. + UNSET_SG; + + // UNSET an identifier. + UNSET_ID; + // UNSET a list of identifiers. + UNSET_ID_LIST; + + // Defines a document. + DOCDEF; + // Defines a namespace. + NSDEF; + // Defines the default namespace. + DFLT_NSDEF; + // Defines a term. + TERMDEF; + // Defines a statement. + STMTDEF; +} + +document + : (NEWLINE | DOCUMENT_COMMENT | record)+ EOF -> + ^(DOCDEF record+) + ; + +record + : define_namespace + | define_annotation + | set_annotation + | set_document + | set_statement_group + | unset_statement_group + | unset + | statement + ; + +set_doc_expr + : KWRD_SET WS* KWRD_DOCUMENT WS* + ; + +set_document + : set_doc_expr document_property eq_clause val=QUOTED_VALUE -> + ^(DOCSET_QV document_property $val) + | set_doc_expr document_property eq_clause val=VALUE_LIST -> + ^(DOCSET_LIST document_property $val) + | set_doc_expr document_property eq_clause val=OBJECT_IDENT -> + ^(DOCSET_ID document_property $val) + ; + +set_sg_expr + : KWRD_SET WS* KWRD_STMT_GROUP + ; + +set_statement_group + : set_sg_expr eq_clause val=QUOTED_VALUE -> ^(SG_SET_QV $val) + | set_sg_expr eq_clause val=OBJECT_IDENT -> ^(SG_SET_ID $val) + ; + +set_annotation + : KWRD_SET OBJECT_IDENT eq_clause val=QUOTED_VALUE -> + ^(ANNO_SET_QV OBJECT_IDENT $val) + | KWRD_SET OBJECT_IDENT eq_clause val=VALUE_LIST -> + ^(ANNO_SET_LIST OBJECT_IDENT $val) + | KWRD_SET OBJECT_IDENT eq_clause val=OBJECT_IDENT -> + ^(ANNO_SET_ID OBJECT_IDENT $val) + ; + +unset_statement_group + : KWRD_UNSET KWRD_STMT_GROUP -> ^(UNSET_SG) + ; + +unset + : KWRD_UNSET val=OBJECT_IDENT -> ^(UNSET_ID $val) + | KWRD_UNSET val=IDENT_LIST -> ^(UNSET_ID_LIST $val) + ; + +define_namespace + : KWRD_DEFINE KWRD_DFLT KWRD_NS OBJECT_IDENT KWRD_AS KWRD_URL QUOTED_VALUE -> + ^(DFLT_NSDEF OBJECT_IDENT QUOTED_VALUE) + | KWRD_DEFINE KWRD_NS OBJECT_IDENT KWRD_AS KWRD_URL QUOTED_VALUE -> + ^(NSDEF OBJECT_IDENT QUOTED_VALUE) + ; + +define_anno_expr + : KWRD_DEFINE WS* KWRD_ANNO WS* + ; + +define_annotation + : define_anno_expr OBJECT_IDENT KWRD_AS KWRD_LIST val=VALUE_LIST -> + ^(ANNO_DEF_LIST OBJECT_IDENT $val) + | define_anno_expr OBJECT_IDENT KWRD_AS KWRD_URL val=QUOTED_VALUE -> + ^(ANNO_DEF_URL OBJECT_IDENT $val) + | define_anno_expr OBJECT_IDENT KWRD_AS KWRD_PATTERN val=QUOTED_VALUE -> + ^(ANNO_DEF_PTRN OBJECT_IDENT $val) + ; + +document_property + : KWRD_AUTHORS + | KWRD_CONTACTINFO + | KWRD_COPYRIGHT + | KWRD_DESC + | KWRD_DISCLAIMER + | KWRD_LICENSES + | KWRD_NAME + | KWRD_VERSION + ; + +argument + : COMMA? term -> term + | COMMA? param -> param + ; + +term + : function LP (argument)* RP -> + ^(TERMDEF function argument*) + ; + +// The rewrite rules for statement could be better captured using semantic +// predicates to pick the correct rewrite rule at runtime. +statement + : subject=term (rel=relationship (LP obj_sub=term obj_rel=relationship obj_obj=term RP | obj=term))? comment=STATEMENT_COMMENT? -> + ^(STMTDEF $comment? $subject $rel? $obj? $obj_sub? $obj_rel? $obj_obj?) + ; + +ns_prefix + : OBJECT_IDENT COLON! + ; + +param + : ns_prefix? OBJECT_IDENT -> ^(PARAM_DEF_ID ns_prefix? OBJECT_IDENT) + | ns_prefix? QUOTED_VALUE -> ^(PARAM_DEF_QV ns_prefix? QUOTED_VALUE) + ; + +function returns [String r] + : fv='proteinAbundance' {$r = "p";} + | fv='p' {$r = "p";} + | fv='rnaAbundance' {$r = "r";} + | fv='r' {$r = "r";} + | fv='abundance' {$r = "a";} + | fv='a' {$r = "a";} + | fv='microRNAAbundance' {$r = "m";} + | fv='m' {$r = "m";} + | fv='geneAbundance' {$r = "g";} + | fv='g' {$r = "g";} + | fv='biologicalProcess' {$r = "bp";} + | fv='bp' {$r = "bp";} + | fv='pathology' {$r = "path";} + | fv='path' {$r = "path";} + | fv='complexAbundance' {$r = "complex";} + | fv='complex' {$r = "complex";} + | fv='translocation' {$r = "tloc";} + | fv='tloc' {$r = "tloc";} + | fv='cellSecretion' {$r = "sec";} + | fv='sec' {$r = "sec";} + | fv='cellSurfaceExpression' {$r = "surf";} + | fv='surf' {$r = "surf";} + | fv='reaction' {$r = "rxn";} + | fv='rxn' {$r = "rxn";} + | fv='compositeAbundance' {$r = "composite";} + | fv='composite' {$r = "composite";} + | fv='fusion' {$r = "fus";} + | fv='fus' {$r = "fus";} + | fv='degradation' {$r = "deg";} + | fv='deg' {$r = "deg";} + | fv='molecularActivity' {$r = "act";} + | fv='act' {$r = "act";} + | fv='catalyticActivity' {$r = "cat";} + | fv='cat' {$r = "cat";} + | fv='kinaseActivity' {$r = "kin";} + | fv='kin' {$r = "kin";} + | fv='phosphataseActivity' {$r = "phos";} + | fv='phos' {$r = "phos";} + | fv='peptidaseActivity' {$r = "pep";} + | fv='pep' {$r = "pep";} + | fv='ribosylationActivity' {$r = "ribo";} + | fv='ribo' {$r = "ribo";} + | fv='transcriptionalActivity' {$r = "tscript";} + | fv='tscript' {$r = "tscript";} + | fv='transportActivity' {$r = "tport";} + | fv='tport' {$r = "tport";} + | fv='gtpBoundActivity' {$r = "gtp";} + | fv='gtp' {$r = "gtp";} + | fv='chaperoneActivity' {$r = "chap";} + | fv='chap' {$r = "chap";} + | fv='proteinModification' {$r = "pmod";} + | fv='pmod' {$r = "pmod";} + | fv='substitution' {$r = "sub";} + | fv='sub' {$r = "sub";} + | fv='truncation' {$r = "trunc";} + | fv='trunc' {$r = "trunc";} + | fv='reactants' {$r = "reactants";} + | fv='products' {$r = "products";} + | fv='list' {$r = "list";} + ; + +relationship returns [String r] + : rv='increases' { $r = "increases";} + | rv='->' { $r = "increases";} + | rv='decreases' { $r = "decreases";} + | rv='-|' { $r = "decreases";} + | rv='directlyIncreases' { $r = "directlyIncreases";} + | rv='=>' { $r = "directlyIncreases";} + | rv='directlyDecreases' { $r = "directlyDecreases";} + | rv='=|' { $r = "directlyDecreases";} + | rv='causesNoChange' { $r = "causesNoChange";} + | rv='positiveCorrelation' { $r = "positiveCorrelation";} + | rv='negativeCorrelation' { $r = "negativeCorrelation";} + | rv='translatedTo' { $r = "translatedTo";} + | rv='>>' { $r = "translatedTo";} + | rv='transcribedTo' { $r = "transcribedTo";} + | rv=':>' { $r = "transcribedTo";} + | rv='isA' { $r = "isA";} + | rv='subProcessOf' { $r = "subProcessOf";} + | rv='rateLimitingStepOf' { $r = "rateLimitingStepOf";} + | rv='biomarkerFor' { $r = "biomarkerFor";} + | rv='prognosticBiomarkerFor' { $r = "prognosticBiomarkerFor";} + | rv='orthologous' { $r = "orthologous";} + | rv='analogous' { $r = "analogous";} + | rv='association' { $r = "association";} + | rv='--' { $r = "association";} + | rv='hasMembers' { $r = "hasMembers";} + | rv='hasComponents' { $r = "hasComponents";} + | rv='hasMember' { $r = "hasMember";} + | rv='hasComponent' { $r = "hasComponent";} + ; + +eq_clause + : WS* EQ WS* + ; + +DOCUMENT_COMMENT + : '#' ~('\n' | '\r')* + ; + +STATEMENT_COMMENT + : '//' (('\\\n') | ('\\\r\n') | ~('\n' | '\r'))* + ; + +IDENT_LIST + : '{' OBJECT_IDENT (COMMA OBJECT_IDENT)* '}' + ; + +VALUE_LIST + : '{' (OBJECT_IDENT | QUOTED_VALUE | VALUE_LIST)? (COMMA (' ')* (OBJECT_IDENT | QUOTED_VALUE | VALUE_LIST)?)* '}' + ; + +QUOTED_VALUE + : '"' ( ESCAPE_SEQUENCE | '\\\n' | '\\\r\n' | ~('\\'|'"') )* '"' + ; + +LP: '('; +RP: ')'; +EQ: '='; +COLON: ':'; +COMMA: ','; + +NEWLINE + : '\u000d'? '\u000a' | '\u000d' + ; + +WS: (' ' | '\t' | '\n' | '\r'| '\f' | '\\\n' | '\\\r\n')+ { skip(); }; + +// Start of BELScript keywords - case insensitive tokens. + +KWRD_ANNO + : ('A'|'a')('N'|'n')('N'|'n')('O'|'o')('T'|'t')('A'|'a')('T'|'t')('I'|'i')('O'|'o')('N'|'n') + ; + +KWRD_AS + : ('A'|'a')('S'|'s') + ; + +KWRD_AUTHORS + : ('A'|'a')('U'|'u')('T'|'t')('H'|'h')('O'|'o')('R'|'r')('S'|'s') + ; + +KWRD_CONTACTINFO + : ('C'|'c')('O'|'o')('N'|'n')('T'|'t')('A'|'a')('C'|'c')('T'|'t')('I'|'i')('N'|'n')('F'|'f')('O'|'o') + ; + +KWRD_COPYRIGHT + : ('C'|'c')('O'|'o')('P'|'p')('Y'|'y')('R'|'r')('I'|'i')('G'|'g')('H'|'h')('T'|'t') + ; + +KWRD_DFLT + : ('D'|'d')('E'|'e')('F'|'f')('A'|'a')('U'|'u')('L'|'l')('T'|'t') + ; + +KWRD_DEFINE + : ('D'|'d')('E'|'e')('F'|'f')('I'|'i')('N'|'n')('E'|'e') + ; + +KWRD_DESC + : ('D'|'d')('E'|'e')('S'|'s')('C'|'c')('R'|'r')('I'|'i')('P'|'p')('T'|'t')('I'|'i')('O'|'o')('N'|'n') + ; + +KWRD_DISCLAIMER + : ('D'|'d')('I'|'i')('S'|'s')('C'|'c')('L'|'l')('A'|'a')('I'|'i')('M'|'m')('E'|'e')('R'|'r') + ; + +KWRD_DOCUMENT + : ('D'|'d')('O'|'o')('C'|'c')('U'|'u')('M'|'m')('E'|'e')('N'|'n')('T'|'t') + ; + +KWRD_LICENSES + : ('L'|'l')('I'|'i')('C'|'c')('E'|'e')('N'|'n')('S'|'s')('E'|'e')('S'|'s') + ; + +KWRD_LIST + : ('L'|'l')('I'|'i')('S'|'s')('T'|'t') + ; + +KWRD_NAME + : ('N'|'n')('A'|'a')('M'|'m')('E'|'e') + ; + +KWRD_NS + : ('N'|'n')('A'|'a')('M'|'m')('E'|'e')('S'|'s')('P'|'p')('A'|'a')('C'|'c')('E'|'e') + ; + +KWRD_PATTERN + : ('P'|'p')('A'|'a')('T'|'t')('T'|'t')('E'|'e')('R'|'r')('N'|'n') + ; + +KWRD_SET + : ('S'|'s')('E'|'e')('T'|'t') + ; + +KWRD_STMT_GROUP + : ('S'|'s')('T'|'t')('A'|'a')('T'|'t')('E'|'e')('M'|'m')('E'|'e')('N'|'n')('T'|'t')('_')('G'|'g')('R'|'r')('O'|'o')('U'|'u')('P'|'p') + ; + +KWRD_UNSET + : ('U'|'u')('N'|'n')('S'|'s')('E'|'e')('T'|'t') + ; + +KWRD_URL + : ('U'|'u')('R'|'r')('L'|'l') + ; + +KWRD_VERSION + : ('V'|'v')('E'|'e')('R'|'r')('S'|'s')('I'|'i')('O'|'o')('N'|'n') + ; + +OBJECT_IDENT + : ('_' | LETTER | DIGIT)+ + ; + +// A select few fragment rules. If used in excess, fragment rules may impact +// performance. Fragment lexer rules tell ANTLR that a rule will only be called +// by other rules, and to not yield a token to the parser. + +fragment LETTER + : ('a'..'z' | 'A'..'Z') + ; + +fragment DIGIT + : '0'..'9' + ; + +fragment ESCAPE_SEQUENCE + : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') + | UNICODE_ESCAPE + | OCTAL_ESCAPE + ; + +fragment OCTAL_ESCAPE + : '\\' ('0'..'3') ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') + ; + +fragment UNICODE_ESCAPE + : '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT + ; + +fragment HEX_DIGIT + : ('0'..'9'|'a'..'f'|'A'..'F') + ; + diff --git a/version_1.0/ANTLR/BELStatement.g b/version_1.0/ANTLR/BELStatement.g new file mode 100644 index 0000000..a48aa73 --- /dev/null +++ b/version_1.0/ANTLR/BELStatement.g @@ -0,0 +1,311 @@ +grammar + BELStatement; + +options { + language = Java; + output = AST; +} + +@lexer::header { + package org.openbel.framework.common.bel.parser; + + import org.openbel.bel.model.BELParseErrorException; +} + +@header { + package org.openbel.framework.common.bel.parser; + + import org.openbel.bel.model.BELParseErrorException; + import org.openbel.framework.common.model.BELObject; + import org.openbel.framework.common.model.Namespace; + import org.openbel.framework.common.model.Parameter; + import org.openbel.framework.common.model.Statement; + import org.openbel.framework.common.model.Term; + import org.openbel.framework.common.enums.FunctionEnum; + import org.openbel.framework.common.enums.RelationshipType; +} + +@lexer::members { + private final List errors = new ArrayList(); + + public List getSyntaxErrors() { + return errors; + } + + @Override + public void emitErrorMessage(String msg) { + } + + @Override + public void displayRecognitionError(String[] tokenNames, RecognitionException e) { + String context = ""; + errors.add(new BELParseErrorException.SyntaxException(e.line, e.charPositionInLine, context, e)); + } +} + +@members { + private final List errors = new ArrayList(); + + public List getSyntaxErrors() { + return errors; + } + + @Override + public void emitErrorMessage(String msg) { + } + + @Override + public void displayRecognitionError(String[] tokenNames, RecognitionException e) { + String context = ""; + errors.add(new BELParseErrorException.SyntaxException(e.line, e.charPositionInLine, context, e)); + } +} + +statement returns [Statement r]: + st=outer_term { + final Statement s = new Statement($st.r); + $r = s; + } + ( + rel=relationship { + s.setRelationshipType($rel.r); + } + ( + ( + OPEN_PAREN + nst=outer_term { + final Statement ns = new Statement($nst.r); + } + nrel=relationship { + ns.setRelationshipType($nrel.r); + } + not=outer_term { + ns.setObject(new Statement.Object($not.r)); + s.setObject(new Statement.Object(ns)); + $r = s; + } + CLOSE_PAREN + ) + | + ot=outer_term { + s.setObject(new Statement.Object($ot.r)); + $r = s; + } + ) + )? + ; + +outer_term returns [Term r]: + f=function { + final Term outerTerm = new Term($f.r); + } + OPEN_PAREN + (','? arg=argument { outerTerm.addFunctionArgument($arg.r); })* + CLOSE_PAREN { + $r = outerTerm; + } + ; + +argument returns [BELObject r]: + fp=param { $r = $fp.r; } | + ff=term { $r = $ff.r; } + ; + +term returns [Term r]: + pfv=function { + final Term parentTerm = new Term($pfv.r); + } + OPEN_PAREN + ( + ','? + (it=term { + parentTerm.addFunctionArgument($it.r); + } | + pp=param { parentTerm.addFunctionArgument($pp.r); }) + )* + CLOSE_PAREN { + $r = parentTerm; + } + ; + +param returns [Parameter r]: + nsp=NS_PREFIX? + ( + NS_VALUE { + Namespace ns = null; + + if($nsp != null) { + String prefix = $nsp.getText(); + prefix = prefix.substring(0, prefix.length() - 1); + ns = new Namespace(prefix, "FIX_ME"); + } + + $r = new Parameter(); + $r.setValue($NS_VALUE.getText()); + $r.setNamespace(ns); + } | + QUOTED_VALUE { + Namespace ns = null; + + if($nsp != null) { + String prefix = $nsp.getText(); + prefix = prefix.substring(0, prefix.length() - 1); + ns = new Namespace(prefix, "FIX_ME"); + } + + $r = new Parameter(); + $r.setNamespace(ns); + + // Strip quotes + String quoted = $QUOTED_VALUE.getText(); + $r.setValue(quoted.substring(1, quoted.length() - 1)); + } + ) + ; + +function returns [FunctionEnum r]: + ( + fv='proteinAbundance' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='p' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='rnaAbundance' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='r' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='abundance' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='a' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='microRNAAbundance' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='m' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='geneAbundance' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='g' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='biologicalProcess' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='bp' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='pathology' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='path' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='complexAbundance' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='complex' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='translocation' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='tloc' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='cellSecretion' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='sec' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='cellSurfaceExpression' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='surf' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='reaction' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='rxn' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='compositeAbundance' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='composite' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='fusion' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='fus' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='degradation' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='deg' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='molecularActivity' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='act' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='catalyticActivity' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='cat' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='kinaseActivity' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='kin' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='phosphataseActivity' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='phos' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='peptidaseActivity' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='pep' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='ribosylationActivity' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='ribo' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='transcriptionalActivity' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='tscript' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='transportActivity' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='tport' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='gtpBoundActivity' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='gtp' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='chaperoneActivity' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='chap' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='proteinModification' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='pmod' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='substitution' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='sub' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='truncation' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='trunc' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='reactants' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='products' {$r = FunctionEnum.getFunctionEnum($fv.getText());} | + fv='list' {$r = FunctionEnum.getFunctionEnum($fv.getText());} + ) + ; + +relationship returns [RelationshipType r]: + ( + rv='increases' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='->' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='decreases' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='-|' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='directlyIncreases' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='=>' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='directlyDecreases' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='=|' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='causesNoChange' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='positiveCorrelation' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='negativeCorrelation' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='translatedTo' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='>>' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='transcribedTo' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv=':>' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='isA' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='subProcessOf' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='rateLimitingStepOf' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='biomarkerFor' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='prognosticBiomarkerFor' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='orthologous' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='analogous' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='association' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='--' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='hasMembers' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='hasComponents' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='hasMember' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } | + rv='hasComponent' { $r = RelationshipType.fromString($rv.getText()); if($r == null) { $r = RelationshipType.fromAbbreviation($rv.getText()); } } + ) + ; + +fragment LETTER: + ('a'..'z' | 'A'..'Z') + ; + +fragment DIGIT: + '0'..'9' + ; + +OPEN_PAREN: + '(' + ; + +CLOSE_PAREN: + ')' + ; + +NS_PREFIX: + LETTER (LETTER | DIGIT)* ':' + ; + +NS_VALUE: + ('_' | LETTER | DIGIT)+ + ; + +QUOTED_VALUE: + '"' ( EscapeSequence | ~('\\'|'"') )* '"' + ; + +fragment EscapeSequence: + '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') + | UnicodeEscape + | OctalEscape + ; + +fragment OctalEscape: + '\\' ('0'..'3') ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') + ; + +fragment UnicodeEscape: + '\\' 'u' HexDigit HexDigit HexDigit HexDigit + ; + +fragment HexDigit: + ('0'..'9'|'a'..'f'|'A'..'F') + ; + +WS : (' ' | '\t' | '\n' | '\r' | '\f')+ {$channel = HIDDEN;}; diff --git a/version_1.0/ANTLR/README.adoc b/version_1.0/ANTLR/README.adoc new file mode 100644 index 0000000..b4df8bd --- /dev/null +++ b/version_1.0/ANTLR/README.adoc @@ -0,0 +1,44 @@ +language +-------- + +ANTLR +~~~~~ + +Pulled directly from the +https://github.com/OpenBEL/openbel-framework[framework]. + +BELScript.g +^^^^^^^^^^^ + +_Not used_ + +BELScriptWalker.g +^^^^^^^^^^^^^^^^^ + +Grammar used by the legacy Java BEL Framework. + +BELScriptWalker_v1.g +^^^^^^^^^^^^^^^^^^^^ + +_Not used_ + +BELScript_C_v1.g +^^^^^^^^^^^^^^^^ + +_Not used_ + +BELScript_Python_v1.g +^^^^^^^^^^^^^^^^^^^^^ + +_Not used_ + +BELScript_v1.g +^^^^^^^^^^^^^^ + +_Not used_ + +BELStatement.g +^^^^^^^^^^^^^^ + +_Not used_ + diff --git a/version_1.0/XML/README.adoc b/version_1.0/XML/README.adoc new file mode 100644 index 0000000..c70337b --- /dev/null +++ b/version_1.0/XML/README.adoc @@ -0,0 +1,20 @@ +language +-------- + +XML +~~~ + +Pulled directly from the +https://github.com/OpenBEL/openbel-framework[framework]. + +xbel.xsd +^^^^^^^^ + +XML schema definition for BEL documents. Used by the legacy Java BEL Framework. + +xbel-annotations.xsd +^^^^^^^^^^^^^^^^^^^^ + +XML schema definition for BEL annotations used by the legacy Java BEL +Framework. + diff --git a/version_1.0/XML/xbel-annotations.xsd b/version_1.0/XML/xbel-annotations.xsd new file mode 100644 index 0000000..cc26b2b --- /dev/null +++ b/version_1.0/XML/xbel-annotations.xsd @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + Specifies information about the knowledge source on which a + statement is based. + + + + + + The citation annotation type to capture the knowledge source details. + + + + + + + + + + + + Contains one or more citation authors. + + + + + + + The author type for a citation. + + + + + + + + + + + + + + + + Citation type (e.g., book, journal, online resource, other, pubmed) + + + + + + + + + + + diff --git a/version_1.0/XML/xbel.xsd b/version_1.0/XML/xbel.xsd new file mode 100644 index 0000000..9913648 --- /dev/null +++ b/version_1.0/XML/xbel.xsd @@ -0,0 +1,823 @@ + + + + + + + + + + + + + + A comment optionally used with statements. + + + + + + + + Document copyright information. + + + + + + + + + Document name. + + + + + + + + + Document version information. + + + + + + + + + Document contact information. + + + + + + + + + Document author. + + + + + + + + + Document license. + + + + + + + + + Document disclaimer. + + + + + + + + + The intrinsic set of relationship types allowed in + statements. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The intrinsic set of functions allowed in BEL term expressions. + + + + + + + + The protein abundance BEL function. + + + + + + + The protein modification BEL function for a protein abundance. + + + + + + + The substitution BEL function for a protein abundance. + + + + + + + The truncation BEL function for a protein abundance. + + + + + + + The rna abundance BEL function. + + + + + + + The abundance BEL function. + + + + + + + The micro rna abundance BEL function. + + + + + + + The gene abundance BEL function. + + + + + + + The biological process BEL function. + + + + + + + The pathology BEL function. + + + + + + + The complex abundance BEL function. + + + + + + + The translocation BEL function. + + + + + + + The cell secretion BEL function. + + + + + + + The cell surface expression BEL function. + + + + + + + The reaction BEL function. + + + + + + + The composite abundance BEL function. + + + + + + + The fusion BEL function. + + + + + + + The reactants BEL function. + + + + + + + The products BEL function. + + + + + + + The degradation BEL function. + + + + + + + The molecular activity BEL function. + + + + + + + The catalytic activity BEL function. + + + + + + + The kinase activity BEL function. + + + + + + + The phosphatase activity BEL function. + + + + + + + The peptidase activity BEL function. + + + + + + + The ribosylation activity BEL function. + + + + + + + The transcriptional activity BEL function. + + + + + + + The transport activity BEL function. + + + + + + + The gtp-bound activity BEL function. + + + + + + + The chaperone activity BEL function. + + + + + + + The list BEL function. + + + + + + + + + + + Contains a pattern-based value. + + + + + + + + + Contains a list-based value. + + + + + + + + + A unique identifier referencing an annotation + definition. + + + + + + + + + + + Annotation usage explaining how to filter on + annotation values. + + + + + + + + + General element description. + + + + + + + + + Defines a function used within a BEL term expression. + + + + + + + + + Resource location of the default namespace used by + the document. + + + + + + + + + Associated namespace. + + + + + + + + + Prefix uniquely identifying a namespace within the + document. + + + + + + + + + + + Namespace values. + + + + + + + + + + + + + URL values. + + + + + + + + + + + + + the URI of the resource defining a namespace. + + + + + + + + + + + + + Contains a list of values. + + + + + + + + + + + + + + + Provides ownership and legal context for the document. + + + + + + + + + + + + + + + + + + + + + + Internally defined annotations express knowledge about a + statement. The annotation values are defined in the document. + + + + + + + + + + + + + + + + + + + + + Externally defined annotations express knowledge about a + statement. The annotation values are defined by a URL. + + + + + + + + + + + + + + + An annotation applied to a BEL statement, + referencing an annotation definition. + + + + + + + + + + + + + + + + + A grouping of annotations, evidences, and citations. + + + + + + + + + + + + + + + + + + + Contains one or more document licenses. + + + + + + + + + + + + + + + Contains one or more document authors. + + + + + + + + + + + + + + + Contains one or more annotation definitions. + + + + + + + + + + + + + + + + Specifies a namespace whose values may be used in + statements. + + + + + + + + + + + + + + Contains any number of namespaces. + + + + + + + + + + + + + + + + BEL term and function parameter. + + + + + + + + + + + + + + + + + BEL terms represent biological entities. + + + + + + + + + + + + + + + + + Defines the subject of a statement. + + + + + + + + + + + + + + + Defines the object of a statement. + + + + + + + + + + + + + + + + A statement is a specific assertion of a fact in + some context: it is implicitly based on some source of + knowledge, implicitly true in at least one specific situation. + + + + + + + + + + + + + + + + + + + A group of BEL statements. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +