Skip to content

Commit 32beb66

Browse files
committed
Try to fix guard again
1 parent 8c4cb84 commit 32beb66

File tree

2 files changed

+27
-34
lines changed

2 files changed

+27
-34
lines changed

tar-lz4-java/src/main/java/org/spoorn/tarlz4java/api/TarLz4Compressor.java

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,17 @@ public Path compress(String sourcePath, String destinationPath, String outputFil
131131
shouldLogProgress, logProgressPercentInterval, verbosity, excludeFiles, outputFile).run();
132132
}
133133
} else {
134+
long[] fileNumIntervals = TarLz4Util.getFileCountIntervalsFromSize(Path.of(sourcePath), numThreads);
135+
136+
// We may in actuality use less than numThreads if the way files are split can cover all files early,
137+
// or we have less files than numThreads.
138+
int actualNumThreads = (int) fileNumIntervals[fileNumIntervals.length - 2];
139+
134140
// Reuse futures array
135-
var futures = new Future[numThreads];
141+
var futures = new Future[actualNumThreads];
136142

137143
// Archive + Compression tasks
138-
submitArchiveTasks(sourcePath, destinationPath, fileCount, futures);
144+
submitArchiveTasks(sourcePath, destinationPath, fileCount, fileNumIntervals, actualNumThreads, futures);
139145

140146
// At this point, we have all our .tmp files which are standalone .tar.lz4 compressed archives for each slice
141147
// The .tmp files can't be opened themselves however, as they are a sliced part of the final output file.
@@ -149,7 +155,7 @@ public Path compress(String sourcePath, String destinationPath, String outputFil
149155
// This is made possible with the AsynchronousFileChannel API, which allows for writing bytes directly into a file
150156
// at some specified offset position.
151157

152-
mergeTmpArchives(destinationPath, futures);
158+
mergeTmpArchives(destinationPath, actualNumThreads, futures);
153159
}
154160

155161
log.debug("Finished compressing {} files from source={} to destination={}", fileCount, sourcePath, destinationPath);
@@ -160,11 +166,10 @@ public Path compress(String sourcePath, String destinationPath, String outputFil
160166
}
161167
}
162168

163-
private void submitArchiveTasks(String sourcePath, String destinationPath, long fileCount, Future<?>[] futures)
169+
private void submitArchiveTasks(String sourcePath, String destinationPath, long fileCount, long[] fileNumIntervals, int numThreads, Future<?>[] futures)
164170
throws IOException, ExecutionException, InterruptedException {
165171
// Get the file number intervals
166172
// TODO: Make it configurable to use file count vs this
167-
long[] fileNumIntervals = TarLz4Util.getFileCountIntervalsFromSize(Path.of(sourcePath), numThreads);
168173
long totalBytes = fileNumIntervals[fileNumIntervals.length - 1];
169174

170175
// In the multithreaded use case, we'll spin up `numThreads` threads, each writing to its own temporary file
@@ -211,10 +216,8 @@ private void submitArchiveTasks(String sourcePath, String destinationPath, long
211216
currPercent = 0;
212217
isDone = true;
213218
for (int i = 0; i < numThreads; i++) {
214-
if (futures[i] != null && tasks[i] != null) {
215-
currPercent += tasks[i].getBytesProcessed();
216-
isDone &= futures[i].isDone();
217-
}
219+
currPercent += tasks[i].getBytesProcessed();
220+
isDone &= futures[i].isDone();
218221
}
219222

220223
currPercent = currPercent * 100 / totalBytes;
@@ -228,56 +231,44 @@ private void submitArchiveTasks(String sourcePath, String destinationPath, long
228231

229232
// Wait for all futures to finish
230233
for (int i = 0; i < numThreads; i++) {
231-
if (futures[i] != null) {
232-
futures[i].get();
233-
tasks[i].fos.close(); // Clean up and close the .tmp file OutputStreams
234-
}
234+
futures[i].get();
235+
tasks[i].fos.close(); // Clean up and close the .tmp file OutputStreams
235236
}
236237
success = true;
237238
} finally {
238239
// Safety check
239240
if (!success) {
240241
// Wait for all futures to finish
241242
for (int i = 0; i < numThreads; i++) {
242-
if (futures[i] != null) {
243-
futures[i].get();
244-
tasks[i].fos.close(); // Clean up and close the .tmp file OutputStreams
245-
}
243+
futures[i].get();
244+
tasks[i].fos.close(); // Clean up and close the .tmp file OutputStreams
246245
}
247246
}
248247
}
249248

250249
log.debug("Finished compressing archive task for source={}, destination={}", sourcePath, destinationPath);
251250
}
252251

253-
private void mergeTmpArchives(String destinationPath, Future<?>[] futures) throws IOException, ExecutionException, InterruptedException {
252+
private void mergeTmpArchives(String destinationPath, int numThreads, Future<?>[] futures) throws IOException, ExecutionException, InterruptedException {
254253
// Pre-check which indices of futures are nonEmpty
255-
int n = 0;
256-
for (int i = 0; i < futures.length; i++) {
257-
if (futures[i] == null) {
258-
break;
259-
}
260-
n++;
261-
}
262-
263-
FileInputStream[] tmpFiles = new FileInputStream[n];
264-
FileChannel[] tmpChannels = new FileChannel[n];
265-
long[] fileChannelOffsets = new long[n];
254+
FileInputStream[] tmpFiles = new FileInputStream[numThreads];
255+
FileChannel[] tmpChannels = new FileChannel[numThreads];
256+
long[] fileChannelOffsets = new long[numThreads];
266257

267258
// This grabs a FileChannel to read for each .tmp file, and also calculates what all the fileChannel position offsets
268259
// we should use for each Thread, based on the size in bytes of each .tmp file
269-
for (int i = 0; i < n; i++) {
260+
for (int i = 0; i < numThreads; i++) {
270261
tmpFiles[i] = new FileInputStream(destinationPath + "_" + i + TMP_SUFFIX);
271262
tmpChannels[i] = tmpFiles[i].getChannel();
272-
if (i < n - 1) {
263+
if (i < numThreads - 1) {
273264
fileChannelOffsets[i + 1] = fileChannelOffsets[i] + tmpChannels[i].size();
274265
}
275266
}
276267

277268
// Create an AsynchronousFileChannel for the final output `.tar.lz4` file
278269
// This channel is has the capability to WRITE to the file, or CREATE it if it doesn't yet exist
279270
AsynchronousFileChannel destChannel = AsynchronousFileChannel.open(Path.of(destinationPath), WRITE, CREATE);
280-
for (int i = 0; i < n; i++) {
271+
for (int i = 0; i < numThreads; i++) {
281272
int finalI = i;
282273
// Let's again spin up a thread for each .tmp file to write to its slice, or region in the final output file
283274
futures[i] = executorService.submit(() -> {
@@ -316,7 +307,7 @@ private void mergeTmpArchives(String destinationPath, Future<?>[] futures) throw
316307
}
317308

318309
// Wait for all futures to finish
319-
for (int i = 0; i < n; i++) {
310+
for (int i = 0; i < numThreads; i++) {
320311
futures[i].get();
321312
}
322313

tar-lz4-java/src/main/java/org/spoorn/tarlz4java/util/TarLz4Util.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,11 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
4141
* @param path Path to process
4242
* @param numIntervals Number of intervals
4343
* @return long[] that holds the file number indexes to split at. The last element will be the total size of the directory in bytes.
44+
* The second to last element will be the number of actual effective indices in the result to read from.
4445
* @throws IOException If processing files fail
4546
*/
4647
public static long[] getFileCountIntervalsFromSize(Path path, int numIntervals) throws IOException {
47-
long[] res = new long[numIntervals + 1];
48+
long[] res = new long[numIntervals + 2];
4849
// index of res, file count, current size, previous size
4950
long[] state = {1, 0, 0, 0};
5051
long directorySize = getDirectorySize(path);
@@ -75,6 +76,7 @@ public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOExce
7576
}
7677
});
7778

79+
res[res.length - 2] = state[0] - 1; // -1 since we started at 1, and state[0] will be the "next" index
7880
res[res.length - 1] = directorySize;
7981
return res;
8082
}

0 commit comments

Comments
 (0)