Skip to content

Commit dc7dea2

Browse files
authored
Merge pull request #1642 from UNC-Libraries/bxc-4376-virus
BXC-4376 - Virus scan for large files
2 parents 1a929e7 + b674317 commit dc7dea2

File tree

3 files changed

+67
-49
lines changed

3 files changed

+67
-49
lines changed

deposit-app/src/main/java/edu/unc/lib/boxc/deposit/validate/VirusScanJob.java

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ public class VirusScanJob extends AbstractConcurrentDepositJob {
4242
.getLogger(VirusScanJob.class);
4343

4444
private static final int MAX_RETRIES = 5;
45+
private long maxStreamSize;
4546

4647
private ClamAVClient clamClient;
4748

@@ -97,18 +98,20 @@ public void runJob() {
9798
Path file = Paths.get(fileURI);
9899

99100
ScanResult result;
100-
// Clamd is unable to find files with unicode characters in their path
101-
if (charactersInBoundsForClam(file)) {
102-
result = clamClient.scanWithResult(file);
103-
} else {
104-
// Scan files with unicode in their paths via streaming
105-
try {
101+
try {
102+
if (shouldScanByPath(file)) {
103+
// Scan entire file by path
104+
log.debug("Scanning file {} by path", file);
105+
result = clamClient.scanWithResult(file);
106+
} else {
107+
// Scanning via InputStream up to the max number of bytes
108+
log.debug("Scanning file {} by stream", file);
106109
result = clamClient.scanWithResult(Files.newInputStream(file));
107-
} catch (IOException e) {
108-
failures.put(fileURI.toString(), "Failed to scan file");
109-
log.error("Unable to scan file {}", file, e);
110-
return;
111110
}
111+
} catch (IOException e) {
112+
failures.put(fileURI.toString(), "Failed to scan file");
113+
log.error("Unable to scan file {}", file, e);
114+
return;
112115
}
113116

114117
switch (result.getStatus()) {
@@ -180,8 +183,23 @@ private boolean charactersInBoundsForClam(Path path) {
180183
return CharMatcher.ascii().matchesAllOf(path.toString());
181184
}
182185

186+
/**
187+
* Determines if we should scan a file by its file path or use streaming. Files larger than the scanning
188+
* limit or with characters in their path that clamd can't handle will return false.
189+
* @param path
190+
* @return
191+
* @throws IOException
192+
*/
193+
private boolean shouldScanByPath(Path path) throws IOException {
194+
return Files.size(path) < this.maxStreamSize && charactersInBoundsForClam(path);
195+
}
196+
183197
// unused, no results to flush
184198
@Override
185199
protected void registrationAction() {
186200
}
187-
}
201+
202+
public void setMaxStreamSize(long maxStreamSize) {
203+
this.maxStreamSize = maxStreamSize;
204+
}
205+
}

deposit-app/src/main/webapp/WEB-INF/deposit-jobs-context.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@
191191
<property name="clamClient" ref="clamClient" />
192192
<property name="maxQueuedJobs" value="${job.fileValidation.maxQueuedJobs:5}" />
193193
<property name="executorService" ref="fileValidationExecutor" />
194+
<property name="maxStreamSize" value="${clamd.maxStreamSize:64000000}" />
194195
</bean>
195196

196197
<bean id="FixityCheckJob" class="edu.unc.lib.boxc.deposit.validate.FixityCheckJob"

deposit-app/src/test/java/edu/unc/lib/boxc/deposit/validate/VirusScanJobTest.java

Lines changed: 37 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,10 @@
11
package edu.unc.lib.boxc.deposit.validate;
22

3-
import static edu.unc.lib.boxc.common.test.TestHelpers.setField;
4-
import static org.junit.jupiter.api.Assertions.fail;
5-
import static org.mockito.Matchers.any;
6-
import static org.mockito.Matchers.anyBoolean;
7-
import static org.mockito.Matchers.anyInt;
8-
import static org.mockito.Matchers.anyString;
9-
import static org.mockito.Matchers.eq;
10-
import static org.mockito.Mockito.doReturn;
11-
import static org.mockito.Mockito.mock;
12-
import static org.mockito.Mockito.never;
13-
import static org.mockito.Mockito.times;
14-
import static org.mockito.Mockito.verify;
15-
import static org.mockito.Mockito.when;
16-
17-
import java.io.File;
18-
import java.io.InputStream;
19-
import java.nio.file.Path;
20-
import java.util.UUID;
21-
import java.util.concurrent.ExecutorService;
22-
23-
import org.apache.commons.io.FileUtils;
24-
import org.apache.jena.rdf.model.Bag;
25-
import org.apache.jena.rdf.model.Model;
26-
import org.apache.jena.rdf.model.Resource;
27-
import org.apache.jena.vocabulary.RDF;
28-
import org.junit.jupiter.api.AfterAll;
29-
import org.junit.jupiter.api.BeforeEach;
30-
import org.junit.jupiter.api.Test;
31-
import org.mockito.Mock;
32-
import org.mockito.invocation.InvocationOnMock;
33-
import org.mockito.stubbing.Answer;
34-
353
import com.google.common.util.concurrent.MoreExecutors;
36-
374
import edu.unc.lib.boxc.common.util.URIUtil;
385
import edu.unc.lib.boxc.deposit.api.RedisWorkerConstants.DepositState;
396
import edu.unc.lib.boxc.deposit.fcrepo4.AbstractDepositJobTest;
407
import edu.unc.lib.boxc.deposit.impl.model.DepositModelHelpers;
41-
import edu.unc.lib.boxc.deposit.validate.VirusScanJob;
428
import edu.unc.lib.boxc.deposit.work.JobFailedException;
439
import edu.unc.lib.boxc.deposit.work.JobInterruptedException;
4410
import edu.unc.lib.boxc.model.api.exceptions.RepositoryException;
@@ -51,6 +17,37 @@
5117
import fi.solita.clamav.ClamAVClient;
5218
import fi.solita.clamav.ScanResult;
5319
import fi.solita.clamav.ScanResult.Status;
20+
import org.apache.commons.io.FileUtils;
21+
import org.apache.jena.rdf.model.Bag;
22+
import org.apache.jena.rdf.model.Model;
23+
import org.apache.jena.rdf.model.Resource;
24+
import org.apache.jena.vocabulary.RDF;
25+
import org.junit.jupiter.api.AfterAll;
26+
import org.junit.jupiter.api.BeforeEach;
27+
import org.junit.jupiter.api.Test;
28+
import org.mockito.Mock;
29+
import org.mockito.invocation.InvocationOnMock;
30+
import org.mockito.stubbing.Answer;
31+
32+
import java.io.File;
33+
import java.io.InputStream;
34+
import java.nio.file.Path;
35+
import java.util.UUID;
36+
import java.util.concurrent.ExecutorService;
37+
38+
import static edu.unc.lib.boxc.common.test.TestHelpers.setField;
39+
import static org.junit.jupiter.api.Assertions.fail;
40+
import static org.mockito.Matchers.any;
41+
import static org.mockito.Matchers.anyBoolean;
42+
import static org.mockito.Matchers.anyInt;
43+
import static org.mockito.Matchers.anyString;
44+
import static org.mockito.Matchers.eq;
45+
import static org.mockito.Mockito.doReturn;
46+
import static org.mockito.Mockito.mock;
47+
import static org.mockito.Mockito.never;
48+
import static org.mockito.Mockito.times;
49+
import static org.mockito.Mockito.verify;
50+
import static org.mockito.Mockito.when;
5451

5552
/**
5653
*
@@ -92,6 +89,7 @@ public PID answer(InvocationOnMock invocation) throws Throwable {
9289
});
9390

9491
when(clamClient.scanWithResult(any(Path.class))).thenReturn(scanResult);
92+
when(clamClient.scanWithResult(any(InputStream.class))).thenReturn(scanResult);
9593

9694
File examplesFile = new File("src/test/resources/examples");
9795
FileUtils.copyDirectory(examplesFile, depositDir);
@@ -104,6 +102,7 @@ private void initializeJob() {
104102
job.setJobUUID(jobUUID);
105103
job.setDepositUUID(depositUUID);
106104
job.setDepositDirectory(depositDir);
105+
job.setMaxStreamSize(300l);
107106
setField(job, "pidMinter", pidMinter);
108107
job.setClamClient(clamClient);
109108
job.setPremisLoggerFactory(premisLoggerFactory);
@@ -142,7 +141,8 @@ public void passScanTest() throws Exception {
142141

143142
job.run();
144143

145-
verify(clamClient, times(3)).scanWithResult(any(Path.class));
144+
verify(clamClient, times(1)).scanWithResult(any(InputStream.class));
145+
verify(clamClient, times(2)).scanWithResult(any(Path.class));
146146

147147
verify(jobStatusFactory).setTotalCompletion(eq(jobUUID), eq(3));
148148
verify(jobStatusFactory, times(3)).incrCompletion(eq(jobUUID), eq(1));
@@ -189,9 +189,8 @@ public void failOneScanTest() throws Exception {
189189
when(result2.getStatus()).thenReturn(Status.FOUND);
190190
File pdfFile = new File(depositDir, "pdf.pdf");
191191
File textFile = new File(depositDir, "text.txt");
192-
when(clamClient.scanWithResult(any(Path.class)))
193-
.thenReturn(scanResult)
194-
.thenReturn(result2);
192+
when(clamClient.scanWithResult(any(InputStream.class))).thenReturn(scanResult);
193+
when(clamClient.scanWithResult(any(Path.class))).thenReturn(result2);
195194

196195
Model model = job.getWritableModel();
197196
Bag depBag = model.createBag(depositPid.getRepositoryPath());

0 commit comments

Comments
 (0)