diff --git a/.gitignore b/.gitignore index 9f5210f6d..0e04513d3 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ .sbt smithyql-log.txt .version +**/.antlr diff --git a/build.sbt b/build.sbt index e57ec3111..2c1437d5e 100644 --- a/build.sbt +++ b/build.sbt @@ -101,6 +101,12 @@ lazy val parser = module("parser") "co.fs2" %% "fs2-io" % "3.10.2" % Test, ) ) + .enablePlugins(Antlr4Plugin) + .settings( + Antlr4 / antlr4Version := "4.13.0", + Antlr4 / antlr4PackageName := Some("playground.smithyql.parser.v3"), + Antlr4 / antlr4GenVisitor := true, + ) .dependsOn( ast % "test->test;compile->compile", source % "test->test;compile->compile", diff --git a/flake.nix b/flake.nix index 9afa50302..4a1f2491d 100644 --- a/flake.nix +++ b/flake.nix @@ -13,7 +13,7 @@ overlays = [ (final: prev: let - jre = final.openjdk11; + jre = final.openjdk17; jdk = jre; in { inherit jdk jre; }) diff --git a/modules/parser/src/main/antlr4/SmithyQL.g4 b/modules/parser/src/main/antlr4/SmithyQL.g4 new file mode 100644 index 000000000..865715b37 --- /dev/null +++ b/modules/parser/src/main/antlr4/SmithyQL.g4 @@ -0,0 +1,38 @@ +parser grammar SmithyQL; +options { + tokenVocab = Tokens; +} + +// todos: comments, forced newlines (e.g. separate use clauses) + +qualified_identifier: ident ('.' ident)* '#' ident; + +soft_keyword: 'use' | 'service'; + +ident: ID | soft_keyword; + +use_clause: 'use' 'service' qualified_identifier; + +prelude: use_clause*; + +service_reference: (qualified_identifier '.')? ident; + +query_operation_name: service_reference; + +number: NUMBER; +bool: 'true' | 'false'; +node: number | bool | STRING | NULL | struct | listed; + +field: key = ident (':' | '=') value = node; + +struct: '{' (field (',' field)* (',')?)? '}'; + +listed: '[' (node (',' node)* (',')?)? ']'; + +query: query_operation_name struct; + +run_query: query; + +statement: run_query; + +source_file: prelude statement* EOF; diff --git a/modules/parser/src/main/antlr4/Tokens.g4 b/modules/parser/src/main/antlr4/Tokens.g4 new file mode 100644 index 000000000..e56ed32ad --- /dev/null +++ b/modules/parser/src/main/antlr4/Tokens.g4 @@ -0,0 +1,42 @@ +lexer grammar Tokens; + +DOT: '.'; +COMMA: ','; +USE: 'use'; +SERVICE: 'service'; +HASH: '#'; +LBRACE: '{'; +RBRACE: '}'; +LBRACKET: '['; +RBRACKET: ']'; +EQUAL: '='; +COLON: ':'; +TRUE: 'true'; +FALSE: 'false'; +NULL: 'null'; + +ID: [a-zA-Z][a-zA-Z_0-9]*; + +// string and number tokens stolen from JSON +// https://github.com/antlr/grammars-v4/blob/b2a35350cbce75b2d47c659ccbadba78a89310ef/json/JSON.g4 +STRING: '"' (ESC | SAFECODEPOINT)* '"'; + +fragment ESC: '\\' (["\\/bfnrt] | UNICODE); + +fragment UNICODE: 'u' HEX HEX HEX HEX; + +fragment HEX: [0-9a-fA-F]; + +fragment SAFECODEPOINT: ~ ["\\\u0000-\u001F]; + +NUMBER: '-'? INT ('.' [0-9]+)? EXP?; + +fragment INT: // integer part forbids leading 0s (e.g. `01`) + '0' + | [1-9] [0-9]*; + +// no leading zeros +fragment EXP: // exponent number permits leading 0s (e.g. `1e01`) + [Ee] [+-]? [0-9]+; + +WS: [ \t\n\r]+ -> skip; diff --git a/modules/parser/src/main/antlr4/Yikes.g4 b/modules/parser/src/main/antlr4/Yikes.g4 new file mode 100644 index 000000000..c317fe357 --- /dev/null +++ b/modules/parser/src/main/antlr4/Yikes.g4 @@ -0,0 +1,10 @@ +parser grammar Yikes; +options { + tokenVocab = Tokens; +} + +namespace: (ID ('.' ID)*); +qualified_identifier: namespace '#' ID; +use_clause: 'use' 'service' qualified_identifier; + +source_file: use_clause* EOF; diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/RecursiveDescentDemo.worksheet.sc b/modules/parser/src/main/scala/playground/smithyql/parser/v2/RecursiveDescentDemo.worksheet.sc new file mode 100644 index 000000000..d47bc8856 --- /dev/null +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/RecursiveDescentDemo.worksheet.sc @@ -0,0 +1,155 @@ +import scala.reflect.ClassTag +/* vocab: + +DOT: '.'; +USE: 'use'; +SERVICE: 'service'; +HASH: '#'; +ID: [a-zA-Z][a-zA-Z_0-9]*; + + */ + +/* grammar: + +namespace: (ID ('.' ID)*); +qualified_identifier: namespace '#' ID; +use_clause: 'use' 'service' qualified_identifier; + +source_file: use_clause* EOF; + + */ + +sealed trait Token extends Product with Serializable + +abstract class SimpleToken( + name: String +) extends Token + with PartialFunction[Token, Unit] { + + override def toString( + ): String = name + + override def isDefinedAt( + x: Token + ): Boolean = x == this + + override def apply( + v1: Token + ): Unit = () + +} + +case object Dot extends SimpleToken("Dot") +case object Use extends SimpleToken("Use") +case object Service extends SimpleToken("Service") +case object Hash extends SimpleToken("Hash") + +case class Id( + value: String +) extends Token + +def tokenize( + input: String +): List[Token] = { + // split by whitespace or punctuation, make sure punctuation is its own token + val tokens = input.split("\\s+|(?=[#\\.])|(?<=[#\\.])(?!\\s)").toList + tokens.flatMap { + case "use" => List(Use) + case "service" => List(Service) + case "#" => List(Hash) + case "." => List(Dot) + case id => List(Id(id)) + } +} + +var tokens = List.empty[Token] + +def previewToken( +) = tokens.headOption + +def expectTyped[T <: Token: ClassTag]( +): T = expect(TypedMatcher[T](scala.reflect.classTag[T])) + +case class TypedMatcher[T]( + ct: ClassTag[T] +) extends PartialFunction[Token, T] { + + override def apply( + v1: Token + ): T = ct.unapply(v1).get + + override def isDefinedAt( + x: Token + ): Boolean = ct.runtimeClass.isInstance(x) + +} + +def expect[T]( + t: PartialFunction[Token, T] +): T = + previewToken().flatMap { tok => + nextToken(): Unit + t.lift(tok) + } match { + case Some(p) => p + case p => throw new Exception(s"expected $t, got $p") + } + +def nextToken( +) = { + val t = tokens.head + tokens = tokens.tail + t +} + +case class QualifiedIdentifier( + namespace: List[Id], + service: Id, +) + +case class UseClause( + ident: QualifiedIdentifier +) + +case class SourceFile( + clauses: List[UseClause] +) + +def qualifiedIdentifier: QualifiedIdentifier = { + val initId = expectTyped[Id]() + var namespace = List(initId) + + while (previewToken().contains(Dot)) { + expect(Dot): Unit + namespace = namespace :+ expectTyped[Id]() + } + + expect(Hash): Unit + + val service = expectTyped[Id]() + + QualifiedIdentifier(namespace, service) +} + +def useClause: UseClause = { + expect(Use): Unit + expect(Service): Unit + UseClause(qualifiedIdentifier) +} + +def sourceFile: SourceFile = { + var clauses = List.empty[UseClause] + + while (previewToken().contains(Use)) + clauses = clauses :+ useClause + + SourceFile(clauses) +} + +val example = + """use service foo.bar#Baz + |use service foo.bar#Quux""".stripMargin + +tokens = tokenize(example) +tokens +sourceFile diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v3/Demo.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v3/Demo.scala new file mode 100644 index 000000000..56aef7be7 --- /dev/null +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v3/Demo.scala @@ -0,0 +1,213 @@ +package playground.smithyql.parser.v3 + +import cats.Monad +import cats.Parallel +import cats.data.EitherNel +import cats.data.NonEmptyList +import cats.implicits._ +import org.antlr.v4.runtime.BaseErrorListener +import org.antlr.v4.runtime.CharStreams +import org.antlr.v4.runtime.CommonTokenStream +import org.antlr.v4.runtime.RecognitionException +import org.antlr.v4.runtime.Recognizer +import org.antlr.v4.runtime.tree.ErrorNode +import org.antlr.v4.runtime.tree.TerminalNode +import playground.smithyql.parser.v3.Yikes.Source_fileContext +import playground.smithyql.parser.v3.Yikes.Use_clauseContext + +import scala.jdk.CollectionConverters._ + +object Demo { + + def main( + args: Array[String] + ): Unit = { + val input = + """|use + |use service ..# + |use service x# + |use service a#oho + |use service b + |use service #foo""".stripMargin + + val l = new Tokens(CharStreams.fromString(input)) + + val p = new Yikes(new CommonTokenStream(l)) + + case class SourceFile[F[_]]( + clauses: List[F[UseClause[F]]] + ) { + + def sequence( + implicit F: Parallel[F], + M: Monad[F], + ): F[SourceFile[cats.Id]] = clauses + .parTraverse(_.flatMap(_.sequence)) + .map(SourceFile[cats.Id](_)) + + } + + case class UseClause[F[_]]( + namespace: F[NonEmptyList[F[String]]], + service: F[String], + ) { + + def sequence( + implicit F: Parallel[F], + M: Monad[F], + ): F[UseClause[cats.Id]] = (namespace.flatMap(_.parSequence), service) + .parMapN(UseClause[cats.Id](_, _)) + + } + + implicit class NullableOps[T]( + t: T + ) { + + def requireOr( + msg: String + ): EitherNel[String, T] = Option(t).toRightNel(msg) + + } + + def checkTerminal( + p: TerminalNode + ): EitherNel[String, String] = p.accept( + new YikesBaseVisitor[EitherNel[String, String]] { + override protected def defaultResult( + ): EitherNel[String, String] = sys.error("unsupported") + + override def visitTerminal( + node: TerminalNode + ): EitherNel[String, String] = node.getText().asRight + + override def visitErrorNode( + node: ErrorNode + ): EitherNel[String, String] = s"error node: ${node.getText()}".leftNel + } + ) + + def parseFull( + p: Yikes + ): EitherNel[String, SourceFile[EitherNel[String, *]]] = p + .source_file() + .requireOr("no source file") + .map { sf => + SourceFile[EitherNel[String, *]]( + sf + .use_clause() + .asScala + .toList + .map { useClause => + UseClause[EitherNel[String, *]]( + namespace = NonEmptyList + .fromList( + useClause + .qualified_identifier() + .namespace() + .ID() + .asScala + .toList + .map(_.requireOr("invalid namespace segment").flatMap(checkTerminal(_))) + ) + .toRightNel("missing namespace"), + service = useClause + .qualified_identifier() + .ID() + .requireOr("missing ident node") + .flatMap { + checkTerminal + }, + ).asRight + } + ) + } + + p.removeErrorListeners() + + p.addErrorListener(new BaseErrorListener { + + override def syntaxError( + recognizer: Recognizer[_ <: Object, _ <: Object], + offendingSymbol: Object, + line: Int, + charPositionInLine: Int, + msg: String, + e: RecognitionException, + ): Unit = { + + val (beforeError, afterError) = input + .linesIterator + .toList(line - 1) + .splitAt(charPositionInLine) + + val previousLinesRange: String = + input.linesWithSeparators.toList.slice(line - 3, line - 1).mkString + + val nextLinesRange: String = input.linesWithSeparators.toList.slice(line, line + 2).mkString + + println( + s"""ERROR $line:$charPositionInLine @ $msg + |${previousLinesRange}${Console.GREEN}${beforeError}${Console.RED}${afterError}${Console.RESET} + |${" " * charPositionInLine}^HERE${nextLinesRange}""".stripMargin + ) + } + + }) + + val r = parseFull(p) + println("parsed N rules: " + r.toOption.get.clauses.size) + r.toOption.get.clauses.foreach(println) + println("result: " + r.map(_.sequence)) + + p.reset() + p.removeErrorListeners() + val r2 = + new YikesBaseVisitor[EitherNel[String, Any]] { + override def visitSource_file( + ctx: Source_fileContext + ): EitherNel[String, SourceFile[EitherNel[String, *]]] = ctx + .use_clause() + .requireOr("no use clauses") + .map { + _.asScala.toList.map(visitUse_clause(_)) + } + .map(SourceFile(_)) + + override def visitUse_clause( + ctx: Use_clauseContext + ): EitherNel[String, UseClause[EitherNel[String, *]]] = ctx + .qualified_identifier() + .requireOr("missing qualified identifier") + .flatMap { qi => + UseClause[EitherNel[String, *]]( + namespace = NonEmptyList + .fromList( + qi.namespace() + .ID() + .asScala + .toList + .map(_.requireOr("invalid namespace segment").flatMap(checkTerminal(_))) + ) + .toRightNel("missing namespace"), + service = qi.ID().requireOr("missing ident node").flatMap { + checkTerminal + }, + ).asRight + } + + override protected def defaultResult( + ): EitherNel[String, Nothing] = sys.error("unsupported branch") + } + .visitSource_file( + p.source_file() + ) + + println(r) + println(r2) + println(r == r2) + + println(r2.flatMap(_.sequence)) + } + +} diff --git a/project/plugins.sbt b/project/plugins.sbt index b619d86f5..7cf494295 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -14,4 +14,6 @@ addSbtPlugin("com.disneystreaming.smithy4s" % "smithy4s-sbt-codegen" % "0.18.23" addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.11.0") addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "1.1.1") +addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.3") + addDependencyTreePlugin