-
Notifications
You must be signed in to change notification settings - Fork 11
File Test Rule
The File Analyzer Tool walks a directory tree and performs a "File Test" on each file that is encountered. The application framework allows new File Tests to be quickly developed and deployed into the application. The results of each File Test are compiled into a table that summarizes the results of the analysis.
A File Test is a simple set of actions that are performed upon a single file such as filename validation, file size statistical analysis, checksum calculation, file type extraction. Depending on the action, the content of the file may or may not be read. Each File Test is configured with filters that determine which files will be processed by the File Test (i.e. only image files).
Each File Test will generate a table of results. The number of columns and the definition of the columns will vary from test to test. For example, a file type analysis will report the file extension and the number of files discovered with that extension. The checksum file tests will report the name of a file and the checksum string associated with that file.
The File Analyzer tool can be run as a GUI in which the results are displayed in a table. The File Analyzer can also be run in batch mode. In batch mode, the results will be written to a tab-separated file. The GUI version of the application allows the results of multiple executions to be merged. The merged information can be filtered to display matching values and mismatched values.
##Components of a File Test
public class NameChecksum extends DefaultFileTest {
public String toString() {
return "Sort By Checksum";
}
public String getShortName(){return "Checksum";}
public String getDescription() {
return "This test reports the checksum for a given filename.\n" +
"The summary report will identify files with the same checksum value.\n" +
"You may select from a number of standard checksum algorithms.";
}
}
public void initFilters() {
initAllFilters();
}
/* from DefaultFileTest.java*/
public void initAllFilters() {
filters.add(new DefaultFileTestFilter());
filters.add(new AVFileTestFilter());
filters.add(new ImageFileTestFilter());
filters.add(new TiffFileTestFilter());
filters.add(new JpegFileTestFilter());
}
public static final String ALGORITHM = "Algorithm";
static enum Algorithm {
MD5("MD5"),
SHA1("SHA-1"),
SHA256("SHA-256"),
SHA384("SHA-384"),
SHA512("SHA-512");
String algorithm;
Algorithm(String s) {algorithm = s;}
MessageDigest getInstance() throws NoSuchAlgorithmException {
return MessageDigest.getInstance(algorithm);
}
}
public NameChecksum(FTDriver dt) {
super(dt);
keymap = new HashMap<String, List<ChecksumStats>>();
this.ftprops.add(new FTPropEnum(dt, this.getClass().getName(), ALGORITHM, "algorithm",
"Checksum Algorithm", Algorithm.values(), Algorithm.MD5));
}
public Stats createStats(String key){
return ChecksumStats.Generator.INSTANCE.create(key);
}
public StatsItemConfig getStatsDetails() {
return ChecksumStats.details;
}
/*from ChecksumStats.java*/
public class ChecksumStats extends Stats {
public static enum DUP {Unique, FirstFound, Duplicate;}
public static enum ChecksumStatsItems implements StatsItemEnum {
Key(StatsItem.makeStringStatsItem("Key", 400)),
Data(StatsItem.makeStatsItem(Object.class, "Data", 300).setInitVal("")),
IsDuplicate(StatsItem.makeEnumStatsItem(YN.class, "Is Duplicate").setInitVal(YN.N)),
DuplicateStat(StatsItem.makeEnumStatsItem(DUP.class, "Duplicate Stat").setInitVal(DUP.Unique)),
MatchCount(StatsItem.makeIntStatsItem("Num of Matches").setInitVal(1));
StatsItem si;
ChecksumStatsItems(StatsItem si) {this.si=si;}
public StatsItem si() {return si;}
}
public static enum Generator implements StatsGenerator {
INSTANCE;
public ChecksumStats create(String key) {return new ChecksumStats(key);}
}
}
Result Key: defines the unique key value that will be saved for each file (or set of files) that is processed
public String getKey(File f) {
return getRelPath(f);
}
/*from DefaultFileTest.java*/
public String getRelPath(File f) {
return f.getAbsolutePath().substring(getRoot().getAbsolutePath().length());
}
In the example displayed above, a checksum is generated on the file using the algorithm provided by the user.
public String getChecksum(File f) {
Algorithm algorithm = (Algorithm)getProperty(ALGORITHM);
FileInputStream fis = null;
try {
MessageDigest md = algorithm.getInstance();
fis = new FileInputStream(f);
byte[] dataBytes = new byte[1204];
int nread = 0;
while((nread = fis.read(dataBytes)) != -1){
md.update(dataBytes, 0, nread);
}
byte[] mdbytes = md.digest();
StringBuffer sb = new StringBuffer();
for(int i=0; i<mdbytes.length; i++){
sb.append(Integer.toString((mdbytes[i] & 0xFF) + 0x100, 16).substring(1));
}
return sb.toString();
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (fis!=null)
try {
fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
public Object fileTest(File f) {
return getChecksum(f);
}
public boolean isTestable(File f) {
return true;
}
public boolean isTestDirectory() {
return false;
}
public boolean processRoot() {
return false;
}
public boolean isTestFiles() {
return true;
}
@Override public void init() {
keymap.clear();
}
@Override public void refineResults() {
for(List<ChecksumStats> matches: keymap.values()) {
if (matches.size() == 1) continue;
int count = 0;
for(ChecksumStats match: matches) {
match.setVal(ChecksumStatsItems.IsDuplicate, YN.Y);
if (count == 0) {
match.setVal(ChecksumStatsItems.DuplicateStat, ChecksumStats.DUP.FirstFound);
} else {
match.setVal(ChecksumStatsItems.DuplicateStat, ChecksumStats.DUP.Duplicate);
}
count++;
match.setVal(ChecksumStatsItems.MatchCount, matches.size());
}
}
}
public class ActionRegistry extends Vector<FileTest> {
private static final long serialVersionUID = 1L;
boolean modifyAllowed = true;
public ActionRegistry(FTDriver dt, boolean modifyAllowed) {
this.modifyAllowed = modifyAllowed;
...
add(new NameChecksum(dt));