Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: introduce FileSystem.unjarOnce #1250

Merged
merged 2 commits into from
Jun 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
scala: [3.4.2]
scala: [3.3.3]
java: [temurin@18]
runs-on: ${{ matrix.os }}
steps:
Expand Down Expand Up @@ -56,7 +56,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
scala: [3.4.2]
scala: [3.3.3]
java: [temurin@18]
runs-on: ${{ matrix.os }}
steps:
Expand Down
40 changes: 40 additions & 0 deletions common/shared/src/main/scala/org/specs2/data/LruCache.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package org.specs2.data

import org.specs2.fp.*
import org.specs2.control.*
import org.specs2.time.*

/** LRU (least recently used) cache for processing items Values can be registered and the cached cleaned so that it
* doesn't go above a given size. The oldest elements are removed first.
*/
class LruCache[A](maxSize: Int, systemTime: SystemTime = JavaSystemTime):
private var values: Map[A, Long] = Map.empty

/** Checks if a value has already been processed; if not immediately adds it to the cache. If it has been processed,
* refresh its timestamp.
* @return
* the processed status
*/
def register(value: A): Operation[ProcessedStatus] =
Operation.delayed {
this.synchronized:
val alreadyProcessed = values.contains(value)
// refresh the timestamp even if the params were already registered
values += value -> systemTime.nanoTime
val status = if alreadyProcessed then ProcessedStatus.AlreadyProcessed else ProcessedStatus.ToProcess
while values.size > maxSize do values -= values.minBy(_._2)._1
status
}

/** Return the number of elements in the cache */
def size: Int =
values.size

/** Return the timestamp for the oldest element */
def oldestTimestamp: Long =
values.minBy(_._2)._2

/** This enum describes the status of an item in the LruCache */
enum ProcessedStatus:
case AlreadyProcessed
case ToProcess
27 changes: 27 additions & 0 deletions common/shared/src/main/scala/org/specs2/io/FileSystem.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package io

import control.*
import fp.syntax.*
import data.LruCache
import data.ProcessedStatus.*
import java.io.*
import java.util.regex.Pattern.*
import java.util.regex.Matcher.*
Expand Down Expand Up @@ -58,6 +60,28 @@ case class FileSystem(logger: Logger) extends FilePathReader:
def mkdirs(path: FilePath): Operation[Unit] =
mkdirs(path.dir)

/** Unjaring the same thing over and over is inefficient. LRU cache to keep track of what was already done. */
private val UnjarLRUCache = new LruCache[(URL, DirectoryPath, String)](maxSize = 1000)

/** Unjar the jar (or zip file) specified by "path" to the "dest" directory. Filters files which shouldn't be
* extracted with a regular expression. This is only done once per argument list (unless eventually evicted from LRU
* cache).
* @param jarUrl
* path of the jar file
* @param dest
* destination directory path
* @param regexFilter
* regular expression filtering files which shouldn't be extracted; the expression must capture the path of an
* entry as group 1 which will then be used relative to dirPath as target path for that entry
* @see
* [[unjar]]
*/
def unjarOnce(jarUrl: URL, dest: DirectoryPath, regexFilter: String): Operation[Unit] =
for
status <- UnjarLRUCache.register((jarUrl, dest, regexFilter))
_ <- unjar(jarUrl, dest, regexFilter).when(status == ToProcess)
yield ()

/** Unjar the jar (or zip file) specified by "path" to the "dest" directory. Filters files which shouldn't be
* extracted with a regular expression.
* @param jarUrl
Expand All @@ -67,6 +91,9 @@ case class FileSystem(logger: Logger) extends FilePathReader:
* @param regexFilter
* regular expression filtering files which shouldn't be extracted; the expression must capture the path of an
* entry as group 1 which will then be used relative to dirPath as target path for that entry
*
* @see
* [[unjarOnce]]
*/
def unjar(jarUrl: URL, dest: DirectoryPath, regexFilter: String): Operation[Unit] =
val regex = compile(regexFilter)
Expand Down
9 changes: 9 additions & 0 deletions common/shared/src/main/scala/org/specs2/time/SystemTime.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package org.specs2.time

/** This trait provides the current time */
trait SystemTime:
def nanoTime: Long

object JavaSystemTime extends SystemTime:
override def nanoTime: Long =
System.nanoTime()
2 changes: 1 addition & 1 deletion html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ case class HtmlPrinter(env: Env, searchPage: SearchPage, logger: Logger = Consol
case Some(url) =>
val fs = env.fileSystem
if url.getProtocol.equalsIgnoreCase("jar") then
fs.unjar(jarOf(url), outputDir, s"^${quote(base.path)}(/${quote(src.path)}/.*)$$")
fs.unjarOnce(jarOf(url), outputDir, s"^${quote(base.path)}(/${quote(src.path)}/.*)$$")
else fs.copyDir(DirectoryPath.unsafe(url.toURI), outputDir / src)
case _ =>
val message = s"no resource found for path ${(base / src).path}"
Expand Down
56 changes: 56 additions & 0 deletions tests/shared/src/test/scala/org/specs2/data/LruCacheSpec.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package org.specs2
package data

import org.scalacheck.*
import org.scalacheck.Arbitrary.*
import org.specs2.time.*
import org.specs2.fp.syntax.*
import ProcessedStatus.*

class LruCacheSpec extends Specification with ScalaCheck:
def is = s2"""

A LRU cache can be used to store elements and evict them when they have been unused for a long time
A status is returned to know if an element has already been seen before $e1
The cache can not contain more than a fixed number of elements $e2
The oldest elements are always evicted first $e3

"""

def e1 =
val cache = LruCache[Int](maxSize = 3, systemTime = MockSystemTime())
val operations = cache.register(1) >> cache.register(2) >> cache.register(1)
val status = operations.unsafeRun
status === AlreadyProcessed

def e2 = prop { (n: SmallInt) =>
val cache = LruCache[Int](maxSize = 3, systemTime = MockSystemTime())
val operations = (1 to n.value).toList.traverse(i => cache.register(i))
operations.void.unsafeRun
cache.size must be_<=(3)
}.set(minTestsOk = 10)

def e3 = prop { (n: SmallInt) =>
val mockSystemTime = MockSystemTime()
val cache = LruCache[Int](maxSize = 3, systemTime = mockSystemTime)
val operations = (1 to n.value).toList.traverse(i => cache.register(i))
operations.void.unsafeRun
cache.oldestTimestamp must be_<(mockSystemTime.nanoTime)
}.set(minTestsOk = 10)

/** HELPERS */
class MockSystemTime() extends SystemTime:
private var times: LazyList[Long] = LazyList.from(1).map(_.toLong)

def nanoTime: Long =
times match {
case t #:: ts => times = ts; t
}

case class SmallInt(value: Int)

object SmallInt {
given Arbitrary[SmallInt] = Arbitrary {
arbitrary[Int].map(n => SmallInt((n % 10).abs + 1))
}
}