Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BXC-4376 - Virus scan for large files #1642

Merged
merged 2 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class VirusScanJob extends AbstractConcurrentDepositJob {
.getLogger(VirusScanJob.class);

private static final int MAX_RETRIES = 5;
private long maxStreamSize = 0;
sharonluong marked this conversation as resolved.
Show resolved Hide resolved

private ClamAVClient clamClient;

Expand Down Expand Up @@ -97,18 +98,20 @@ public void runJob() {
Path file = Paths.get(fileURI);

ScanResult result;
// Clamd is unable to find files with unicode characters in their path
if (charactersInBoundsForClam(file)) {
result = clamClient.scanWithResult(file);
} else {
// Scan files with unicode in their paths via streaming
try {
try {
if (shouldScanByPath(file)) {
// Scan entire file by path
log.debug("Scanning file {} by path", file);
result = clamClient.scanWithResult(file);
} else {
// Scanning via InputStream up to the max number of bytes
log.debug("Scanning file {} by stream", file);
result = clamClient.scanWithResult(Files.newInputStream(file));
} catch (IOException e) {
failures.put(fileURI.toString(), "Failed to scan file");
log.error("Unable to scan file {}", file, e);
return;
}
} catch (IOException e) {
failures.put(fileURI.toString(), "Failed to scan file");
log.error("Unable to scan file {}", file, e);
return;
}

switch (result.getStatus()) {
Expand Down Expand Up @@ -180,8 +183,23 @@ private boolean charactersInBoundsForClam(Path path) {
return CharMatcher.ascii().matchesAllOf(path.toString());
}

/**
* Determines if we should scan a file by its file path or use streaming. Files larger than the scanning
* limit or with characters in their path that clamd can't handle will return false.
* @param path
* @return
* @throws IOException
*/
private boolean shouldScanByPath(Path path) throws IOException {
return Files.size(path) < this.maxStreamSize && charactersInBoundsForClam(path);
}

// unused, no results to flush
@Override
protected void registrationAction() {
}

public void setMaxStreamSize(long maxStreamSize) {
this.maxStreamSize = maxStreamSize;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@
<property name="clamClient" ref="clamClient" />
<property name="maxQueuedJobs" value="${job.fileValidation.maxQueuedJobs:5}" />
<property name="executorService" ref="fileValidationExecutor" />
<property name="maxStreamSize" value="${clamd.maxStreamSize:64000000}" />
sharonluong marked this conversation as resolved.
Show resolved Hide resolved
</bean>

<bean id="FixityCheckJob" class="edu.unc.lib.boxc.deposit.validate.FixityCheckJob"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,44 +1,10 @@
package edu.unc.lib.boxc.deposit.validate;

import static edu.unc.lib.boxc.common.test.TestHelpers.setField;
import static org.junit.jupiter.api.Assertions.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.anyInt;
import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.io.File;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.UUID;
import java.util.concurrent.ExecutorService;

import org.apache.commons.io.FileUtils;
import org.apache.jena.rdf.model.Bag;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.vocabulary.RDF;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import com.google.common.util.concurrent.MoreExecutors;

import edu.unc.lib.boxc.common.util.URIUtil;
import edu.unc.lib.boxc.deposit.api.RedisWorkerConstants.DepositState;
import edu.unc.lib.boxc.deposit.fcrepo4.AbstractDepositJobTest;
import edu.unc.lib.boxc.deposit.impl.model.DepositModelHelpers;
import edu.unc.lib.boxc.deposit.validate.VirusScanJob;
import edu.unc.lib.boxc.deposit.work.JobFailedException;
import edu.unc.lib.boxc.deposit.work.JobInterruptedException;
import edu.unc.lib.boxc.model.api.exceptions.RepositoryException;
Expand All @@ -51,6 +17,37 @@
import fi.solita.clamav.ClamAVClient;
import fi.solita.clamav.ScanResult;
import fi.solita.clamav.ScanResult.Status;
import org.apache.commons.io.FileUtils;
import org.apache.jena.rdf.model.Bag;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.vocabulary.RDF;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import java.io.File;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.UUID;
import java.util.concurrent.ExecutorService;

import static edu.unc.lib.boxc.common.test.TestHelpers.setField;
import static org.junit.jupiter.api.Assertions.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.anyInt;
import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

/**
*
Expand Down Expand Up @@ -92,6 +89,7 @@ public PID answer(InvocationOnMock invocation) throws Throwable {
});

when(clamClient.scanWithResult(any(Path.class))).thenReturn(scanResult);
when(clamClient.scanWithResult(any(InputStream.class))).thenReturn(scanResult);

File examplesFile = new File("src/test/resources/examples");
FileUtils.copyDirectory(examplesFile, depositDir);
Expand All @@ -104,6 +102,7 @@ private void initializeJob() {
job.setJobUUID(jobUUID);
job.setDepositUUID(depositUUID);
job.setDepositDirectory(depositDir);
job.setMaxStreamSize(300l);
setField(job, "pidMinter", pidMinter);
job.setClamClient(clamClient);
job.setPremisLoggerFactory(premisLoggerFactory);
Expand Down Expand Up @@ -142,7 +141,8 @@ public void passScanTest() throws Exception {

job.run();

verify(clamClient, times(3)).scanWithResult(any(Path.class));
verify(clamClient, times(1)).scanWithResult(any(InputStream.class));
verify(clamClient, times(2)).scanWithResult(any(Path.class));

verify(jobStatusFactory).setTotalCompletion(eq(jobUUID), eq(3));
verify(jobStatusFactory, times(3)).incrCompletion(eq(jobUUID), eq(1));
Expand Down Expand Up @@ -189,9 +189,8 @@ public void failOneScanTest() throws Exception {
when(result2.getStatus()).thenReturn(Status.FOUND);
File pdfFile = new File(depositDir, "pdf.pdf");
File textFile = new File(depositDir, "text.txt");
when(clamClient.scanWithResult(any(Path.class)))
.thenReturn(scanResult)
.thenReturn(result2);
when(clamClient.scanWithResult(any(InputStream.class))).thenReturn(scanResult);
when(clamClient.scanWithResult(any(Path.class))).thenReturn(result2);

Model model = job.getWritableModel();
Bag depBag = model.createBag(depositPid.getRepositoryPath());
Expand Down
Loading