Skip to content

Commit

Permalink
app: add --verbose and re-use chunker
Browse files Browse the repository at this point in the history
  • Loading branch information
grote committed Jul 11, 2024
1 parent 9789d29 commit dfa6241
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 16 deletions.
29 changes: 16 additions & 13 deletions app/src/main/java/org/calyxos/seedvault/chunker/Main.kt
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,34 @@ class Cli : CliktCommand() {
private val files by argument().multiple(required = true)

private val checkDedupRatio: Boolean by option().flag(default = false)
private val verbose: Boolean by option("-v", "--verbose").flag(default = false)

private val size: Int by option()
.int()
.restrictTo(AVERAGE_MIN..AVERAGE_MAX)
.default(16384)
.default(16 * 1024)

private val normalization: Int by option()
.int()
.restrictTo(0..3)
.default(1)

override fun run() {
val digest = MessageDigest.getInstance("SHA-256")
val chunker = Chunker(size, normalization) { bytes ->
digest.digest(bytes).fold("") { str, it -> str + "%02x".format(it) }
}
val duration = measureTime {
files.forEach { file ->
onEachFile(File(file))
onEachFile(chunker, File(file))
}
}
println("\nTook: $duration")
if (checkDedupRatio) {
println()
val totalSize = files.sumOf { File(it).length() }
val sizePerTime = totalSize / duration.inWholeSeconds
println("Files: ${files.size} with a total of $totalSize bytes ($sizePerTime bytes/s)")
val sizePerTime = totalSize / duration.inWholeSeconds / 1024 / 1024
println("Files: ${files.size} with a total of $totalSize bytes ($sizePerTime MiB/s)")
println("Unique chunks: ${chunks.size}")
println("Dupe chunks: $reusedChunks")
println("Dupe data: $sizeDupe")
Expand All @@ -53,22 +58,20 @@ class Cli : CliktCommand() {
private var reusedChunks: Int = 0
private var sizeDupe: Long = 0L

private fun onEachFile(file: File) {
println()
println(file.absolutePath)
private fun onEachFile(chunker: Chunker, file: File) {
if (verbose) {
println()
println(file.absolutePath)
}
if (!file.isFile) {
println(" not a file, ignoring...")
if (verbose) println(" not a file, ignoring...")
return
}
val digest = MessageDigest.getInstance("SHA-256")
val chunker = Chunker(size, normalization) { bytes ->
digest.digest(bytes).fold("") { str, it -> str + "%02x".format(it) }
}
chunker.chunk(file) { chunk -> onNewChunk(chunk) }
}

private fun onNewChunk(chunk: Chunk) {
println("hash=${chunk.hash} offset=${chunk.offset} size=${chunk.length}")
if (verbose) println("hash=${chunk.hash} offset=${chunk.offset} size=${chunk.length}")
if (checkDedupRatio) {
if (chunk.hash in chunks) {
sizeDupe += chunk.length
Expand Down
7 changes: 4 additions & 3 deletions lib/src/main/java/org/calyxos/seedvault/chunker/Chunker.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import org.calyxos.seedvault.chunker.GearTableCreator.GEAR_SIZE
import java.io.File
import java.lang.Byte.toUnsignedInt
import kotlin.math.min
import kotlin.math.roundToInt

class Chunk(val offset: Long, val length: Int, val data: ByteArray, val hash: String)

Expand All @@ -34,10 +35,10 @@ class Chunker(
private val blob = RingByteArray(maxSize * 2)
private var offset: Long = 0

constructor(avgSize: Int, normalization: Int = 2, hashFunction: (ByteArray) -> String) : this(
minSize = avgSize.floorDiv(4),
constructor(avgSize: Int, normalization: Int = 1, hashFunction: (ByteArray) -> String) : this(
minSize = avgSize.floorDiv(2),
avgSize = avgSize,
maxSize = avgSize * 8,
maxSize = (avgSize * 2.5).roundToInt(),
normalization = normalization,
gearTable = Const.GEAR,
hashFunction = hashFunction,
Expand Down

0 comments on commit dfa6241

Please sign in to comment.