Skip to content

Commit bbeb4f9

Browse files
authored
Merge pull request #16 from Nuix/continued_memory_refinements
More tweaks aimed at curbing RegexScanner memory usage
2 parents 64cf2a9 + fdb488e commit bbeb4f9

File tree

3 files changed

+61
-16
lines changed

3 files changed

+61
-16
lines changed

Java/src/main/java/com/nuix/superutilities/regex/ItemRegexMatchCollection.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package com.nuix.superutilities.regex;
22

3+
import java.io.IOException;
34
import java.util.ArrayList;
45
import java.util.List;
56
import java.util.stream.Collectors;
67

8+
import nuix.Case;
79
import nuix.Item;
810

911
/***
@@ -12,23 +14,28 @@
1214
*
1315
*/
1416
public class ItemRegexMatchCollection {
15-
private Item item = null;
17+
private String itemGuid = null;
1618
private List<RegexMatch> matchData = new ArrayList<RegexMatch>();
1719

1820
/***
1921
* Creates a new empty instance against the specified item.
2022
* @param item The item to associated.
2123
*/
2224
public ItemRegexMatchCollection(Item item){
23-
this.item = item;
25+
this.itemGuid = item.getGuid();
2426
}
2527

2628
/***
2729
* Gets the associated item.
2830
* @return The associated item.
2931
*/
30-
public Item getItem(){
31-
return item;
32+
public Item getItem(Case nuixCase){
33+
try {
34+
return nuixCase.search(String.format("guid:%s", itemGuid)).get(0);
35+
} catch (IOException e) {
36+
e.printStackTrace();
37+
return null;
38+
}
3239
}
3340

3441
/***
@@ -66,7 +73,8 @@ public List<RegexMatch> getPropertyMatches(){
6673
* @param matchEnd Offset in source text where this match ends
6774
*/
6875
public void addMatch(PatternInfo patternInfo, String location, boolean isContentMatch, String value, String valueContext, int matchStart, int matchEnd){
69-
matchData.add(new RegexMatch(patternInfo,location,isContentMatch,value,valueContext,matchStart,matchEnd));
76+
// Intern location in case there is a large amount of duplication of a small set of actual values
77+
matchData.add(new RegexMatch(patternInfo,location.intern(),isContentMatch,value,valueContext,matchStart,matchEnd));
7078
}
7179

7280
/***

Java/src/main/java/com/nuix/superutilities/regex/RegexScanError.java

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package com.nuix.superutilities.regex;
22

3+
import java.io.IOException;
4+
5+
import nuix.Case;
36
import nuix.Item;
47

58
/***
@@ -8,7 +11,7 @@
811
*
912
*/
1013
public class RegexScanError {
11-
private Item item = null;
14+
private String itemGuid = null;
1215
private PatternInfo patternInfo = null;
1316
private String location = null;
1417
private Exception exception = null;
@@ -21,7 +24,7 @@ public class RegexScanError {
2124
* @param exception The exception which was thrown
2225
*/
2326
public RegexScanError(Item item, PatternInfo patternInfo, String location, Exception exception){
24-
this.item = item;
27+
this.itemGuid = item.getGuid();
2528
this.patternInfo = patternInfo;
2629
this.location = location;
2730
this.exception = exception;
@@ -31,8 +34,17 @@ public RegexScanError(Item item, PatternInfo patternInfo, String location, Excep
3134
* Gets the associated item
3235
* @return The associated item
3336
*/
34-
public Item getItem() {
35-
return item;
37+
public Item getItem(Case nuixCase) {
38+
try {
39+
return nuixCase.search(String.format("guid:%s", itemGuid)).get(0);
40+
} catch (IOException e) {
41+
e.printStackTrace();
42+
return null;
43+
}
44+
}
45+
46+
public String getItemGuid() {
47+
return this.itemGuid;
3648
}
3749

3850
/***

Java/src/main/java/com/nuix/superutilities/regex/RegexScanner.java

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import com.nuix.superutilities.misc.FormatUtility;
2222

2323
import nuix.Item;
24+
import nuix.ItemCustomMetadataMap;
2425

2526
/***
2627
* Class for scanning a series of items with a series of regular expressions.
@@ -107,7 +108,7 @@ protected void fireScanError(RegexScanError error){
107108
if(error.getLocation() != null){
108109
errorMessage.add("\tLocation: "+error.getLocation());
109110
}
110-
errorMessage.add("\tItem GUID: "+error.getItem().getGuid());
111+
errorMessage.add("\tItem GUID: "+error.getItemGuid());
111112
logger.error(errorMessage.toString());
112113
logger.error(error.getException());
113114
}
@@ -444,12 +445,25 @@ protected ItemRegexMatchCollection scanItem(Item item) {
444445
* @return Map of "stringified" metadata properties for the specified item
445446
*/
446447
public static Map<String,String> getStringProperties(Item item, Set<String> specificProperties){
448+
// Note below String.intern use on property names which likely is highly repetitive
449+
447450
Map<String,String> result = new HashMap<String,String>();
448-
for (Entry<String, Object> entry : item.getProperties().entrySet()) {
449-
if(specificProperties == null || specificProperties.contains(entry.getKey())){
450-
result.put(entry.getKey(), FormatUtility.getInstance().convertToString(entry.getValue()));
451+
452+
if(specificProperties == null | specificProperties.size() == 0) {
453+
// We're scanning all the properties
454+
for (Entry<String, Object> entry : item.getProperties().entrySet()) {
455+
result.put(entry.getKey().intern(), FormatUtility.getInstance().convertToString(entry.getValue()));
456+
}
457+
} else {
458+
// We're just scanning specific properties
459+
Map<String,Object> itemProperties = item.getProperties();
460+
for(String specificProperty : specificProperties) {
461+
if(itemProperties.containsKey(specificProperty)) {
462+
result.put(specificProperty.intern(), FormatUtility.getInstance().convertToString(itemProperties.get(specificProperty)));
463+
}
451464
}
452465
}
466+
453467
return result;
454468
}
455469

@@ -462,11 +476,22 @@ public static Map<String,String> getStringProperties(Item item, Set<String> spec
462476
*/
463477
public static Map<String,String> getStringCustomMetadata(Item item, Set<String> specificFields){
464478
Map<String,String> result = new HashMap<String,String>();
465-
for (Entry<String, Object> entry : item.getCustomMetadata().entrySet()) {
466-
if(specificFields == null || specificFields.contains(entry.getKey())){
467-
result.put(entry.getKey(), FormatUtility.getInstance().convertToString(entry.getValue()));
479+
480+
if(specificFields == null || specificFields.size() == 0) {
481+
// We're scanning all the custom metadata fields
482+
for (Entry<String, Object> entry : item.getCustomMetadata().entrySet()) {
483+
result.put(entry.getKey().intern(), FormatUtility.getInstance().convertToString(entry.getValue()));
484+
}
485+
} else {
486+
ItemCustomMetadataMap itemCustomMetadata = item.getCustomMetadata();
487+
// We're scanning specific custom metadata fields
488+
for(String specificField : specificFields) {
489+
if(itemCustomMetadata.containsKey(specificField)) {
490+
result.put(specificField.intern(),FormatUtility.getInstance().convertToString(itemCustomMetadata.get(specificField)));
491+
}
468492
}
469493
}
494+
470495
return result;
471496
}
472497

0 commit comments

Comments
 (0)