Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a kleenex implementation #284

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 41 additions & 11 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ val shapeless2Version = "2.3.7"
val shapeless3Version = "3.0.3"
val scalaJavaTimeVersion = "2.3.0"
val diffsonVersion = "4.1.1"
val weaverVersion = "0.7.9"

val commonSettings = List(
scalaVersion := scala213,
Expand Down Expand Up @@ -56,13 +57,14 @@ val commonSettings = List(
libraryDependencies ++= List(
"co.fs2" %%% "fs2-core" % fs2Version,
"org.scala-lang.modules" %%% "scala-collection-compat" % "2.6.0",
"io.circe" %%% "circe-parser" % circeVersion % "test",
"co.fs2" %% "fs2-io" % fs2Version % "test",
"com.disneystreaming" %%% "weaver-cats" % "0.7.9" % "test",
"com.disneystreaming" %%% "weaver-cats-core" % "0.7.9" % "test",
"com.disneystreaming" %%% "weaver-core" % "0.7.9" % "test",
"com.disneystreaming" %%% "weaver-framework" % "0.7.9" % "test",
"com.eed3si9n.expecty" %%% "expecty" % "0.15.4" % "test",
"io.circe" %%% "circe-parser" % circeVersion % Test,
"co.fs2" %% "fs2-io" % fs2Version % Test,
"com.disneystreaming" %%% "weaver-cats" % weaverVersion % Test,
"com.disneystreaming" %%% "weaver-cats-core" % weaverVersion % Test,
"com.disneystreaming" %%% "weaver-core" % weaverVersion % Test,
"com.disneystreaming" %%% "weaver-framework" % weaverVersion % Test,
"com.disneystreaming" %% "weaver-scalacheck" % weaverVersion % Test,
"com.eed3si9n.expecty" %%% "expecty" % "0.15.4" % Test,
"org.portable-scala" %%% "portable-scala-reflect" % "1.1.1" cross CrossVersion.for3Use2_13
) ++ PartialFunction
.condOpt(CrossVersion.partialVersion(scalaVersion.value)) { case Some((2, _)) =>
Expand Down Expand Up @@ -116,7 +118,9 @@ val root = (project in file("."))
jsonDiffson.js,
jsonPlay.js,
text.js,
xml.js),
xml.js,
transducers.js,
kleenex.js),
ScalaUnidoc / siteSubdirName := "api",
addMappingsToSiteDir(ScalaUnidoc / packageDoc / mappings, ScalaUnidoc / siteSubdirName),
Nanoc / sourceDirectory := file("site"),
Expand All @@ -140,7 +144,11 @@ val root = (project in file("."))
xml.jvm,
xml.js,
cbor.jvm,
cbor.js
cbor.js,
transducers.jvm,
transducers.js,
kleenex.jvm,
kleenex.js
)

lazy val text = crossProject(JVMPlatform, JSPlatform)
Expand Down Expand Up @@ -225,7 +233,7 @@ lazy val jsonCirce = crossProject(JVMPlatform, JSPlatform)
description := "Streaming JSON library with support for circe ASTs",
libraryDependencies ++= List(
"io.circe" %%% "circe-core" % circeVersion,
"org.gnieh" %%% "diffson-circe" % diffsonVersion % "test"
"org.gnieh" %%% "diffson-circe" % diffsonVersion % Test
)
)
.dependsOn(json % "compile->compile;test->test", jsonDiffson % "test->test")
Expand All @@ -241,7 +249,7 @@ lazy val jsonPlay = crossProject(JVMPlatform, JSPlatform)
crossScalaVersions := Seq(scala212, scala213),
libraryDependencies ++= List(
"com.typesafe.play" %%% "play-json" % playVersion,
"org.gnieh" %%% "diffson-play-json" % diffsonVersion % "test"
"org.gnieh" %%% "diffson-play-json" % diffsonVersion % Test
)
)
.dependsOn(json % "compile->compile;test->test", jsonDiffson % "test->test")
Expand Down Expand Up @@ -304,6 +312,28 @@ lazy val cbor = crossProject(JVMPlatform, JSPlatform)
.flatten
)

lazy val transducers = crossProject(JVMPlatform, JSPlatform)
.crossType(CrossType.Full)
.in(file("transducers"))
.settings(commonSettings)
.settings(publishSettings)
.settings(
name := "fs2-data-transducers",
description := "Streaming transducers library"
)

lazy val kleenex = crossProject(JVMPlatform, JSPlatform)
.crossType(CrossType.Full)
.in(file("kleenex"))
.settings(commonSettings)
.settings(publishSettings)
.settings(
name := "fs2-data-kleenex",
description := "Streaming text processing library",
libraryDependencies += "org.typelevel" %%% "cats-parse" % "0.3.6"
)
.dependsOn(text, transducers)

lazy val documentation = project
.in(file("documentation"))
.enablePlugins(MdocPlugin)
Expand Down
32 changes: 32 additions & 0 deletions kleenex/shared/src/main/scala/fs2/data/kleenex/Action.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright 2021 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2.data.kleenex

import cats.Show

sealed trait Action
object Action {
case object Push extends Action
case class Pop(reg: String) extends Action
case class Write(reg: String) extends Action

implicit val show: Show[Action] = Show.show {
case Push => "push"
case Pop(r) => s"pop $$$r"
case Write(r) => s"write $$$r"
}
}
216 changes: 216 additions & 0 deletions kleenex/shared/src/main/scala/fs2/data/kleenex/Check.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/*
* Copyright 2021 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2.data.kleenex

import cats.data.NonEmptyList
import cats.parse.Caret
import cats.syntax.all._
import cats.data.StateT
import cats.MonadError
import fs2.data.kleenex.core.KleenexCompilerException
import scala.annotation.tailrec

class Checker[F[_]](implicit F: MonadError[F, Throwable]) {

def check(prog: Program): F[Unit] = {
val declMap = prog.productions.toList.map { case p @ Production(name, t) => (name, (p.pos, t)) }.toMap
scc(declMap).flatMap { components =>
components.traverse_ { component =>
val allStrictDeps =
component.flatMap(id => declMap.get(id).map { case (pos, t) => (id, pos, strictDependencies(t)) })
val localStrictDeps = allStrictDeps.toList.mapFilter { case (id, pos, deps) =>
// remove strict dependencies not in SCC
val deps1 = deps.view.filterKeys(component.contains(_)).toMap
if (deps1.nonEmpty)
(id, pos).some
else
None
}
if (localStrictDeps.nonEmpty)
F.raiseError[Unit](KleenexCompilerException(s"""Following productions contain non tail recursive calls:
|${localStrictDeps
.map { case (id, pos) =>
s"$id (at line ${pos.line + 1})"
}
.mkString("\n")}""".stripMargin))
else
F.unit

}
}
}

private def successors(id: String, term: Term): List[String] = {
def go(t: Term, acc: Set[String]): Set[String] =
t match {
case Term.Var(s) => acc + s
case Term.Concat(ts) => ts.foldLeft(acc)((acc, t) => go(t, acc))
case Term.Alternative(ts) => ts.foldLeft(acc)((acc, t) => go(t, acc))
case Term.Star(t) => go(t, acc)
case Term.Plus(t) => go(t, acc)
case Term.Question(t) => go(t, acc)
case Term.Range(t, _, _) => go(t, acc)
case Term.Suppress(t) => go(t, acc)
case Term.Capture(_, t) => go(t, acc)
case _ => acc
}

go(term, Set.empty).toList
}

private def termIdents(t: Term): Map[String, Set[Caret]] =
t match {
case Term.Var(name) => Map(name -> Set(t.pos))
case Term.Concat(ts) => ts.toList.map(termIdents(_)).combineAll
case Term.Alternative(ts) => ts.toList.map(termIdents(_)).combineAll
case Term.Star(t) => termIdents(t)
case Term.Plus(t) => termIdents(t)
case Term.Question(t) => termIdents(t)
case Term.Suppress(t) => termIdents(t)
case Term.Capture(_, t) => termIdents(t)
case _ => Map.empty
}

// strict dependencies are the variables occurring not in tail positions in sequences
def strictDependencies(t: Term): Map[String, Set[Caret]] =
t match {
case Term.Concat(NonEmptyList(t1, t2 :: ts)) =>
strictDependencies(Term.Concat(NonEmptyList(t2, ts))).combine(termIdents(t1))
case Term.Concat(NonEmptyList(t, Nil)) => strictDependencies(t)
case Term.Alternative(ts) => ts.toList.map(strictDependencies(_)).combineAll
case Term.Star(t) => strictDependencies(t)
case Term.Plus(t) => strictDependencies(t)
case Term.Question(t) => strictDependencies(t)
case Term.Suppress(t) => strictDependencies(t)
case Term.Capture(_, t) => strictDependencies(t)
case _ => Map.empty
}

private type State[Res] = StateT[F, SCCState, Res]

private def gets[Res](f: SCCState => Res): State[Res] =
StateT.inspect(f)

private def getProps(id: String): State[Option[SCCProps]] =
StateT.inspect(_.props.get(id))

private def nop: State[Unit] =
StateT.empty

private def modify(f: SCCState => SCCState): State[Unit] =
StateT.modify(f)

private def update[Res](f: SCCState => (SCCState, Res)): State[Res] =
StateT.inspect(f).flatMap { case (st, res) => StateT.set(st).as(res) }

private def raiseError[Res](t: Throwable): State[Res] =
nop.flatMapF(_ => t.raiseError)

private def scc(declMap: Map[String, (Caret, Term)]): F[List[Set[String]]] = {
val state = SCCState(0, Nil, Map.empty, Nil)

def process(v: String, t: Term): State[Unit] =
for {
// first push v on the stack and assign an index
vProps <- update { st =>
val props = SCCProps(true, st.index, st.index)
(st.copy(index = st.index + 1, stack = v :: st.stack, props = st.props.updated(v, props)), props)
}
// then for each successor compute recursively
() <- successors(v, t).traverse_ { w =>
getProps(w).flatMap {
case Some(wProps) =>
// successor already processed
if (wProps.onStack)
// it is on stack, hence in the current SCC
modify(st =>
st.copy(props = st.props.updated(v, vProps.copy(lowlink = vProps.lowlink.min(wProps.index)))))
else
// not on the stack, not in SCC
nop
case None =>
// not processed yet, do it
declMap.get(w) match {
case Some((_, wt)) =>
for {
() <- process(w, wt)
wProps <- gets(_.props(w))
vProps <- gets(_.props(v))
() <- modify(st =>
st.copy(props = st.props.updated(v, vProps.copy(lowlink = vProps.lowlink.min(wProps.lowlink)))))
} yield ()
case None =>
raiseError[Unit](
KleenexCompilerException(s"Unknown identifier $w in definition of $v at line ${t.pos.line + 1}"))
}
}
}
vProps <- gets(_.props(v))
() <-
if (vProps.lowlink == vProps.index)
for {
stack <- gets(_.stack)
(component, stack1) = spanUntilIncluding(stack, v)
() <- modify { st =>
st.copy(
// pop from stack
stack = stack1,
// update the components
components = component.toSet :: st.components,
// remove vertices in component from stack
props = component.foldLeft(st.props) { (props, w) =>
props.updatedWith(w)(_.map(_.copy(onStack = false)))
}
)
}
} yield ()
else
nop
} yield ()

declMap.toList
// traverse each node (aka production identifier)
.traverse_ { case (id, (_, t)) =>
getProps(id)
.flatMap {
case None =>
// if no index has been assigned yet, process it
process(id, t)
case Some(_) =>
// otherwise, just continue
nop
}
}
.runS(state)
.map(_.components)
}

private def spanUntilIncluding(l: List[String], v: String): (List[String], List[String]) = {
@tailrec
def loop(l: List[String], acc: List[String]): (List[String], List[String]) =
l match {
case Nil => (l, Nil)
case `v` :: rest => ((v :: acc).reverse, rest)
case e :: rest => loop(rest, e :: acc)
}
loop(l, Nil)
}

}

case class SCCProps(onStack: Boolean, index: Int, lowlink: Int)
case class SCCState(index: Int, stack: List[String], props: Map[String, SCCProps], components: List[Set[String]])
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright 2021 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2.data.kleenex

case class Environment(stack: List[String], registers: Map[String, String]) {

/** Appends the `s` on top of the stack. */
def append(s: String): Option[Environment] =
stack match {
case r :: stack => Some(copy((r + s) :: stack))
case Nil => None
}

/** Pushes an empty value on top of the stack. */
def push: Environment =
copy(stack = "" :: stack)

/** Pops the value on top of the stack and stores it in `reg`. */
def pop(reg: String): Option[Environment] =
stack match {
case r :: stack => Some(copy(stack = stack, registers = registers.updated(reg, r)))
case Nil => None
}

/** Appends the value in `reg` on top of the stack and empties the register. */
def write(reg: String): Option[Environment] =
stack match {
case r :: stack =>
val value = registers.getOrElse(reg, "")
Some(copy(stack = (r + value) :: stack, registers.updated(reg, "")))
case Nil => None
}

}
Loading