From c19fbd46d6378de0814365a4e90f91bd0e7fa174 Mon Sep 17 00:00:00 2001 From: Ben Pennell Date: Wed, 13 Dec 2023 10:09:41 -0500 Subject: [PATCH 1/2] Use streaming scanning for files which are larger than the max scanning size --- .../boxc/deposit/validate/VirusScanJob.java | 38 +++++++--- .../webapp/WEB-INF/deposit-jobs-context.xml | 1 + .../deposit/validate/VirusScanJobTest.java | 75 +++++++++---------- 3 files changed, 66 insertions(+), 48 deletions(-) diff --git a/deposit-app/src/main/java/edu/unc/lib/boxc/deposit/validate/VirusScanJob.java b/deposit-app/src/main/java/edu/unc/lib/boxc/deposit/validate/VirusScanJob.java index da44308328..f4c6947181 100644 --- a/deposit-app/src/main/java/edu/unc/lib/boxc/deposit/validate/VirusScanJob.java +++ b/deposit-app/src/main/java/edu/unc/lib/boxc/deposit/validate/VirusScanJob.java @@ -42,6 +42,7 @@ public class VirusScanJob extends AbstractConcurrentDepositJob { .getLogger(VirusScanJob.class); private static final int MAX_RETRIES = 5; + private long maxStreamSize = 0; private ClamAVClient clamClient; @@ -97,18 +98,20 @@ public void runJob() { Path file = Paths.get(fileURI); ScanResult result; - // Clamd is unable to find files with unicode characters in their path - if (charactersInBoundsForClam(file)) { - result = clamClient.scanWithResult(file); - } else { - // Scan files with unicode in their paths via streaming - try { + try { + if (shouldScanByPath(file)) { + // Scan entire file by path + log.debug("Scanning file {} by path", file); + result = clamClient.scanWithResult(file); + } else { + // Scanning via InputStream up to the max number of bytes + log.debug("Scanning file {} by stream", file); result = clamClient.scanWithResult(Files.newInputStream(file)); - } catch (IOException e) { - failures.put(fileURI.toString(), "Failed to scan file"); - log.error("Unable to scan file {}", file, e); - return; } + } catch (IOException e) { + failures.put(fileURI.toString(), "Failed to scan file"); + log.error("Unable to scan file {}", file, e); + return; } switch (result.getStatus()) { @@ -180,8 +183,23 @@ private boolean charactersInBoundsForClam(Path path) { return CharMatcher.ascii().matchesAllOf(path.toString()); } + /** + * Determines if we should scan a file by its file path or use streaming. Files larger than the scanning + * limit or with characters in their path that clamd can't handle will return false. + * @param path + * @return + * @throws IOException + */ + private boolean shouldScanByPath(Path path) throws IOException { + return Files.size(path) < this.maxStreamSize && charactersInBoundsForClam(path); + } + // unused, no results to flush @Override protected void registrationAction() { } + + public void setMaxStreamSize(long maxStreamSize) { + this.maxStreamSize = maxStreamSize; + } } \ No newline at end of file diff --git a/deposit-app/src/main/webapp/WEB-INF/deposit-jobs-context.xml b/deposit-app/src/main/webapp/WEB-INF/deposit-jobs-context.xml index d553496622..98ede9e199 100644 --- a/deposit-app/src/main/webapp/WEB-INF/deposit-jobs-context.xml +++ b/deposit-app/src/main/webapp/WEB-INF/deposit-jobs-context.xml @@ -191,6 +191,7 @@ + Date: Wed, 13 Dec 2023 14:34:54 -0500 Subject: [PATCH 2/2] Remove assignment --- .../java/edu/unc/lib/boxc/deposit/validate/VirusScanJob.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deposit-app/src/main/java/edu/unc/lib/boxc/deposit/validate/VirusScanJob.java b/deposit-app/src/main/java/edu/unc/lib/boxc/deposit/validate/VirusScanJob.java index f4c6947181..dcc3be253a 100644 --- a/deposit-app/src/main/java/edu/unc/lib/boxc/deposit/validate/VirusScanJob.java +++ b/deposit-app/src/main/java/edu/unc/lib/boxc/deposit/validate/VirusScanJob.java @@ -42,7 +42,7 @@ public class VirusScanJob extends AbstractConcurrentDepositJob { .getLogger(VirusScanJob.class); private static final int MAX_RETRIES = 5; - private long maxStreamSize = 0; + private long maxStreamSize; private ClamAVClient clamClient; @@ -202,4 +202,4 @@ protected void registrationAction() { public void setMaxStreamSize(long maxStreamSize) { this.maxStreamSize = maxStreamSize; } -} \ No newline at end of file +}