diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueGranularity.java b/warehouse/query-core/src/main/java/datawave/query/attributes/TemporalGranularity.java similarity index 59% rename from warehouse/query-core/src/main/java/datawave/query/attributes/UniqueGranularity.java rename to warehouse/query-core/src/main/java/datawave/query/attributes/TemporalGranularity.java index 521e7e57484..7c3c406fab8 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueGranularity.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/TemporalGranularity.java @@ -12,61 +12,61 @@ import datawave.data.normalizer.DateNormalizer; /** - * Represents different levels of granularity supported by the {@code #unique()} function. This class is also responsible for providing the functionality to - * transform values such that they conform to the specified granularity. + * Represents different levels of granularity supported by the {@code #unique()} and {@code #groupby} function. This class is also responsible for providing the + * functionality to transform values such that they conform to the specified granularity. */ -public enum UniqueGranularity { +public enum TemporalGranularity { /** - * A {@link UniqueGranularity} implementation that will always return the original value. + * A {@link TemporalGranularity} implementation that will always return the original value. */ ALL("ALL", Function.identity()), /** - * A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the day. Otherwise, the original + * A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the day. Otherwise, the original * value will be returned. */ TRUNCATE_TEMPORAL_TO_DAY("DAY", new DateTimeValueFormatter("yyyy-MM-dd")), /** - * A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the hour. Otherwise, the original + * A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the hour. Otherwise, the original * value will be returned. */ TRUNCATE_TEMPORAL_TO_HOUR("HOUR", new DateTimeValueFormatter("yyyy-MM-dd'T'HH")), /** - * A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the month. Otherwise, the original + * A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the month. Otherwise, the original * value will be returned. */ TRUNCATE_TEMPORAL_TO_MONTH("MONTH", new DateTimeValueFormatter("yyyy-MM")), /** - * A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the year. Otherwise, the original + * A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the year. Otherwise, the original * value will be returned. */ TRUNCATE_TEMPORAL_TO_YEAR("YEAR", new DateTimeValueFormatter("yyyy")), /** - * A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the second. Otherwise, the original - * value will be returned. + * A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the second. Otherwise, the + * original value will be returned. */ TRUNCATE_TEMPORAL_TO_SECOND("SECOND", new DateTimeValueFormatter("yyyy-MM-dd'T'HH:mm:ss")), /** - * A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the millisecond. Otherwise, the + * A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the millisecond. Otherwise, the * original value will be returned. */ TRUNCATE_TEMPORAL_TO_MILLISECOND("MILLISECOND", new DateTimeValueFormatter("yyyy-MM-dd'T'HH:mm:ss.SSS")), /** - * A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the tenth of an hour. Otherwise, the - * original value will be returned. + * A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the tenth of an hour. Otherwise, + * the original value will be returned. */ TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR("TENTH_OF_HOUR", new DateTimeValueFormatter("yyyy-MM-dd'T'HH:m", true)), /** - * A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the minute. Otherwise, the original - * value will be returned. + * A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the minute. Otherwise, the + * original value will be returned. */ TRUNCATE_TEMPORAL_TO_MINUTE("MINUTE", new DateTimeValueFormatter("yyyy-MM-dd'T'HH:mm")); @@ -74,38 +74,39 @@ public enum UniqueGranularity { private final Function function; @JsonCreator - public static UniqueGranularity of(String name) { + public static TemporalGranularity of(String name) { + name = name.toUpperCase(); switch (name) { case "ALL": - return UniqueGranularity.ALL; + return TemporalGranularity.ALL; case "YEAR": - return UniqueGranularity.TRUNCATE_TEMPORAL_TO_YEAR; + return TemporalGranularity.TRUNCATE_TEMPORAL_TO_YEAR; case "MONTH": - return UniqueGranularity.TRUNCATE_TEMPORAL_TO_MONTH; + return TemporalGranularity.TRUNCATE_TEMPORAL_TO_MONTH; case "DAY": - return UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY; + return TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY; case "HOUR": - return UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR; + return TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR; case "TENTH_OF_HOUR": - return UniqueGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR; + return TemporalGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR; case "MINUTE": - return UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE; + return TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE; case "SECOND": - return UniqueGranularity.TRUNCATE_TEMPORAL_TO_SECOND; + return TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND; case "MILLISECOND": - return UniqueGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND; + return TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND; default: - throw new IllegalArgumentException("No " + UniqueGranularity.class.getSimpleName() + " exists with the name " + name); + throw new IllegalArgumentException("No " + TemporalGranularity.class.getSimpleName() + " exists with the name " + name); } } - UniqueGranularity(String name, Function function) { + TemporalGranularity(String name, Function function) { this.name = name; this.function = function; } /** - * Return the unique name of this {@link UniqueGranularity}. + * Return the unique name of this {@link TemporalGranularity}. * * @return the name */ diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java b/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java index 0c861f54e6b..5b3bfe50d52 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java @@ -7,14 +7,12 @@ import java.util.NavigableSet; import java.util.Objects; import java.util.Set; -import java.util.SortedSet; import org.apache.commons.lang.StringUtils; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonValue; import com.google.common.collect.Multimap; -import com.google.common.collect.Multimaps; import com.google.common.collect.Sets; import com.google.common.collect.SortedSetMultimap; import com.google.common.collect.TreeMultimap; @@ -30,19 +28,19 @@ */ public class UniqueFields implements Serializable, Cloneable { - private final TreeMultimap fieldMap = TreeMultimap.create(); + private static final String MOST_RECENT_UNIQUE = "_MOST_RECENT_"; + private final TreeMultimap fieldMap = TreeMultimap.create(); private boolean mostRecent = false; - private static String MOST_RECENT_UNIQUE = "_MOST_RECENT_"; /** * Returns a new {@link UniqueFields} parsed from this string. The provided string is expected to have the format returned by - * {@link UniqueFields#toString()}. Any fields not specified with a {@link UniqueGranularity} name will be added with the default ALL granularity. All + * {@link UniqueFields#toString()}. Any fields not specified with a {@link TemporalGranularity} name will be added with the default ALL granularity. All * whitespace will be stripped before parsing. See below for certain edge cases: *
    *
  • Given null, null will be returned.
  • *
  • Given an empty or blank string, an empty {@link UniqueFields} will be returned.
  • *
  • Given {@code field1[],field2[DAY]}, or {@code field1,field2[DAY]}, or {@code field1[ALL],field2[DAY]}, a {@link UniqueFields} will be returned where - * field1 is added with {@link UniqueGranularity#ALL}, and field2 is added with {@link UniqueGranularity#TRUNCATE_TEMPORAL_TO_DAY}.
  • + * field1 is added with {@link TemporalGranularity#ALL}, and field2 is added with {@link TemporalGranularity#TRUNCATE_TEMPORAL_TO_DAY}. *
* * @param string @@ -81,7 +79,7 @@ public static UniqueFields from(String string) { uniqueFields.setMostRecent(true); } else { // Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],, - uniqueFields.put(field, UniqueGranularity.ALL); + uniqueFields.put(field, TemporalGranularity.ALL); } } break; // There are no more fields to be parsed. @@ -100,7 +98,7 @@ public static UniqueFields from(String string) { uniqueFields.setMostRecent(true); } else { // Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY] - uniqueFields.put(field, UniqueGranularity.ALL); + uniqueFields.put(field, TemporalGranularity.ALL); } } currentIndex = nextComma + 1; // Advance to the start of the next field. @@ -119,11 +117,11 @@ public static UniqueFields from(String string) { String granularityList = string.substring((nextStartBracket + 1), nextEndBracket); // An empty granularity list, e.g. field[] is equivalent to field[ALL]. if (granularityList.isEmpty()) { - uniqueFields.put(field, UniqueGranularity.ALL); + uniqueFields.put(field, TemporalGranularity.ALL); } else { String[] granularities = StringUtils.split(granularityList, Constants.COMMA); for (String granularity : granularities) { - uniqueFields.put(field, parseGranularity(granularity)); + uniqueFields.put(field, TemporalGranularity.of(granularity)); } } } @@ -135,15 +133,6 @@ public static UniqueFields from(String string) { return uniqueFields; } - // Return the parsed granularity instance, or throw an exception if one could not be parsed. - private static UniqueGranularity parseGranularity(String granularity) { - try { - return UniqueGranularity.of(granularity.toUpperCase()); - } catch (Exception e) { - throw new IllegalArgumentException("Invalid unique granularity given: " + granularity); - } - } - /** * Return a clone of this class * @@ -165,7 +154,7 @@ public UniqueFields() {} * @param fieldMap * the field map to use */ - public UniqueFields(SortedSetMultimap fieldMap) { + public UniqueFields(SortedSetMultimap fieldMap) { putAll(fieldMap); } @@ -182,20 +171,20 @@ public UniqueFields clear() { * * @param fields */ - public UniqueFields set(Multimap fields) { + public UniqueFields set(Multimap fields) { return clear().putAll(fields); } /** - * Put a field-{@link UniqueGranularity} key pair into this {@link UniqueFields}. + * Put a field-{@link TemporalGranularity} key pair into this {@link UniqueFields}. * * @param field * the field - * @param uniqueGranularity + * @param temporalGranularity * the granularity */ - public UniqueFields put(String field, UniqueGranularity uniqueGranularity) { - fieldMap.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), uniqueGranularity); + public UniqueFields put(String field, TemporalGranularity temporalGranularity) { + fieldMap.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), temporalGranularity); return this; } @@ -205,7 +194,7 @@ public UniqueFields put(String field, UniqueGranularity uniqueGranularity) { * @param fieldMap * the field map to add entries from */ - public UniqueFields putAll(Multimap fieldMap) { + public UniqueFields putAll(Multimap fieldMap) { if (fieldMap != null) { for (String field : fieldMap.keySet()) { this.fieldMap.putAll(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), fieldMap.get(field)); @@ -221,7 +210,7 @@ public UniqueFields putAll(Multimap fieldMap) { * @param replacement */ public void replace(String field, String replacement) { - Collection value = fieldMap.removeAll(field); + Collection value = fieldMap.removeAll(field); if (value != null && !value.isEmpty()) { fieldMap.putAll(replacement, value); } @@ -241,7 +230,7 @@ public NavigableSet getFields() { * * @return the field map */ - public TreeMultimap getFieldMap() { + public TreeMultimap getFieldMap() { return fieldMap; } @@ -252,9 +241,9 @@ public TreeMultimap getFieldMap() { * the model to find mappings from */ public void remapFields(Multimap model) { - Multimap newFieldMap = TreeMultimap.create(fieldMap); + Multimap newFieldMap = TreeMultimap.create(fieldMap); for (String field : fieldMap.keySet()) { - Collection granularities = fieldMap.get(field); + Collection granularities = fieldMap.get(field); if (model.containsKey(field)) { model.get(field).forEach((newField) -> newFieldMap.putAll(newField, granularities)); } @@ -282,13 +271,13 @@ public boolean isEmpty() { * @return a set containing the result of each transformation */ public Set transformValues(String field, Collection values) { - Collection granularities = fieldMap.get(field); + Collection granularities = fieldMap.get(field); // If there is no granularity, or only the ALL granularity was specified, return the original values. - if (granularities.isEmpty() || (granularities.size() == 1 && granularities.contains(UniqueGranularity.ALL))) { + if (granularities.isEmpty() || (granularities.size() == 1 && granularities.contains(TemporalGranularity.ALL))) { return Sets.newHashSet(values); } else { Set transformedValues = new HashSet<>(); - for (UniqueGranularity granularity : granularities) { + for (TemporalGranularity granularity : granularities) { values.stream().map(granularity::transform).forEach(transformedValues::add); } return transformedValues; @@ -296,14 +285,14 @@ public Set transformValues(String field, Collection values) { } public String transformValue(String field, String value) { - Collection granularities = fieldMap.get(field); + Collection granularities = fieldMap.get(field); // If there is no granularity, or only the ALL granularity was specified, return the original values. - if (granularities.isEmpty() || (granularities.size() == 1 && granularities.contains(UniqueGranularity.ALL))) { + if (granularities.isEmpty() || (granularities.size() == 1 && granularities.contains(TemporalGranularity.ALL))) { return value; } else { StringBuilder combinedValue = new StringBuilder(); String separator = ""; - for (UniqueGranularity granularity : granularities) { + for (TemporalGranularity granularity : granularities) { combinedValue.append(separator).append(granularity.transform(value)); } return combinedValue.toString(); @@ -331,7 +320,7 @@ public String toString() { String field = fieldIterator.next(); sb.append(field).append(Constants.BRACKET_START); // Write each granularity for the field. - Iterator valueIterator = fieldMap.get(field).iterator(); + Iterator valueIterator = fieldMap.get(field).iterator(); while (valueIterator.hasNext()) { sb.append(valueIterator.next().getName()); if (valueIterator.hasNext()) { diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/DocumentGrouper.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/DocumentGrouper.java index fb06973f4d9..120115620c8 100644 --- a/warehouse/query-core/src/main/java/datawave/query/common/grouping/DocumentGrouper.java +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/DocumentGrouper.java @@ -11,6 +11,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.NavigableSet; import java.util.Objects; import java.util.Set; import java.util.SortedSet; @@ -26,11 +27,13 @@ import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; +import com.google.common.collect.TreeMultimap; import datawave.data.type.Type; import datawave.query.attributes.Attribute; import datawave.query.attributes.Attributes; import datawave.query.attributes.Document; +import datawave.query.attributes.TemporalGranularity; import datawave.query.attributes.TypeAttribute; /** @@ -127,6 +130,7 @@ public class DocumentGrouper { public static final String FIELD_AVERAGE_DIVISOR_SUFFIX = "_AVERAGE_DIVISOR"; public static final String FIELD_AVERAGE_SUFFIX = "_AVERAGE"; public static final String FIELD_COUNT_SUFFIX = "_COUNT"; + public static final String FIELD_VALUE_OVERRIDE = "_VALUE_OVERRIDE"; /** * Groups and aggregates fields from the entries in the given document and merges the new group information into the given {@link Groups} instance. @@ -145,7 +149,7 @@ public static void group(Map.Entry entry, GroupFields groupFields, private final Key documentKey; private final Document document; - private final Set groupFields; + private final TreeMultimap groupFields; private final Map reverseModelMappings; private final FieldAggregator.Factory fieldAggregatorFactory; @@ -159,30 +163,45 @@ public static void group(Map.Entry entry, GroupFields groupFields, private DocumentGrouper(Map.Entry documentEntry, GroupFields groupFields, Groups groups) { this.documentKey = documentEntry.getKey(); this.document = documentEntry.getValue(); - this.groupFields = groupFields.getGroupByFields(); + this.groupFields = groupFields.getGroupByFieldMap(); this.fieldAggregatorFactory = groupFields.getFieldAggregatorFactory(); this.reverseModelMappings = groupFields.getReverseModelMap(); this.groups = groups; - this.maxGroupSize = this.groupFields.size(); + this.maxGroupSize = this.groupFields.keySet().size(); } /** * Identify valid groups in the given document and aggregate relevant events to those groups. */ private void group() { - log.trace("apply to {} {}", documentKey, document); + if (log.isDebugEnabled()) { + log.debug("Applying grouping {} to {} {}", groupFields, documentKey, document); + } // If the document contains entries that indicate grouping has already been performed, we are seeing a document that was generated by // GroupingIterator.flatten(). No further grouping can occur. Extract the grouping information from the document and merge them into the current groups. if (isDocumentAlreadyGrouped()) { + if (log.isTraceEnabled()) { + log.trace("Groups from previous grouping found in document {}", documentKey); + } extractGroupsFromDocument(); - } else { // Otherwise, the document contains entries that have not yet been grouped and counted. + } else { + // Otherwise, the document contains entries that have not yet been grouped and counted. + if (log.isTraceEnabled()) { + log.trace("No previous groupings in document {}", documentKey); + } + // Index the document entries. indexDocumentEntries(); // Group the document entries. groupEntries(); // Aggregate fields only if there were aggregation fields specified and if any entries for aggregation were found. if (fieldAggregatorFactory.hasFieldsToAggregate() && !aggregateFieldsIndex.isEmpty()) { + if (log.isTraceEnabled()) { + log.trace("Fields found for aggregation in document {}", documentKey); + } aggregateEntries(); + } else { + log.trace("Aggregation skipped, either no fields targeted for aggregation or no entries found for targeted fields"); } // Merge the groups and aggregations we found in this particular group-by operation into the groups passed by the user. The separation is required @@ -206,6 +225,10 @@ private boolean isDocumentAlreadyGrouped() { */ @SuppressWarnings("unchecked") private void extractGroupsFromDocument() { + if (log.isTraceEnabled()) { + log.trace("Extracting existing groups from document {}", documentKey); + } + // Parse a field from each entry and store them in instanceToFields. The id indicates which grouping, count, and aggregated values go together. Multimap idToFields = HashMultimap.create(); for (Map.Entry>> entry : document.entrySet()) { @@ -254,14 +277,31 @@ private void extractGroupsFromDocument() { } else if (field.getBase().endsWith(FIELD_MAX_SUFFIX)) { String fieldName = removeSuffix(field.getBase(), FIELD_MAX_SUFFIX); fieldAggregator.mergeAggregator(MaxAggregator.of(fieldName, field.getAttribute())); - // We found a field that is part of the grouping. - } else if (!field.getBase().endsWith(FIELD_AVERAGE_DIVISOR_SUFFIX)) { + // We found a field that is part of the grouping. Any field ending with _AVERAGE_DIVISOR should have been handled above for the + // _AVERAGE_NUMERATOR case above, and any field ending with _TYPE_OVERRIDE_CLASS or _TYPE_OVERRIDE_VALUE will be handled below. + } else if (!field.getBase().endsWith(FIELD_AVERAGE_DIVISOR_SUFFIX) && !field.getBase().endsWith(FIELD_VALUE_OVERRIDE) + && !field.getBase().endsWith(FIELD_VALUE_OVERRIDE)) { + + String overridingValue = null; + // The key for the overriding type class and value will be the key of their associated attribute with the suffix and instance. + String overridingValueKey = field.getBase() + FIELD_VALUE_OVERRIDE + "." + field.getInstance(); + Attribute overridingValueAttribute = document.get(overridingValueKey); + + // If an overriding value exists for the current attribute, fetch it from the document. + if (overridingValueAttribute != null) { + overridingValue = ((TypeAttribute) overridingValueAttribute).getType().getDelegate(); + } + + // Create the grouping attribute. Attribute attribute = field.getAttribute(); - GroupingAttribute newAttribute = new GroupingAttribute<>((Type) attribute.getData(), new Key(field.getBase()), true); + GroupingAttribute newAttribute = new GroupingAttribute<>((Type) attribute.getData(), new Key(field.getBase()), true, overridingValue); newAttribute.setColumnVisibility(attribute.getColumnVisibility()); grouping.add(newAttribute); } } + if (log.isTraceEnabled()) { + log.trace("Extracted grouping {} with count of {} from document {}", grouping, count, documentKey); + } // Create a new group and merge it into the existing groups. Group group = new Group(grouping, count); group.setFieldAggregator(fieldAggregator); @@ -288,10 +328,13 @@ private String removeSuffix(String str, String suffix) { * Identify which events in the document are targets for grouping and/or aggregation, and index them. */ private void indexDocumentEntries() { + if (log.isTraceEnabled()) { + log.trace("Indexing document entries for document {}", documentKey); + } for (Map.Entry> entry : document.entrySet()) { Field field = parseField(entry); // The current field is a target for grouping. - if (groupFields.contains(field.getBase())) { + if (groupFields.containsKey(field.getBase())) { groupFieldsIndex.index(field); } // The current field is a target for aggregation. @@ -305,8 +348,12 @@ private void indexDocumentEntries() { * Identify valid groupings consisting of target group pairs and create/update their corresponding {@link Group} in {@link #currentGroups}. */ private void groupEntries() { - // If we found any entries for target group fields, identify all valid groupings. - if (groupEntriesFound()) { + if (log.isTraceEnabled()) { + log.trace("Grouping entries for document {}", documentKey); + } + + // If we found any entries for target group fields after indexing, identify all valid groupings. + if (!groupFieldsIndex.isEmpty()) { // The groupings combinations that we find. Each combination may only have one Field from a particular target group field, e.g. if doing // #GROUP_BY(AGE,GENDER), a combination set will have at most one AGE field and one GENDER field. List> groupings = new ArrayList<>(); @@ -314,18 +361,27 @@ private void groupEntries() { // If we only have one target grouping field, we do not need to find any group combinations. All events for the given target group field should be // tracked as individual groupings. if (maxGroupSize == 1) { + log.trace("Max group size is 1, only singleton groupings required."); groupFieldsIndex.fields.values().stream().map(Collections::singleton).forEach(groupings::add); } else { + if (log.isTraceEnabled()) { + log.trace("Max group size is {}, attempting to find matches", maxGroupSize); + } // If we have any group field events with grouping contexts and instances, e.g. GENDER.FOO.1, it's possible that we will find direct matches to // other group field events with the same grouping context and instance (a direct match). These should be found first for efficiency purposes. if (groupFieldsIndex.hasFieldsWithPossibleDirectMatch()) { + log.trace("Possible direct matches."); groupings = getGroupingsWithDirectMatches(); + } else { + log.trace("No possible direct matches."); } // If we have any group field events that do not have a grouping context and instance, e.g. GENDER.1 or GENDER, then each one of those events - // should - // be combined with each existing group combination, effectively creating cartesian products. + // should be combined with each existing group combination, effectively creating cartesian products. if (groupFieldsIndex.hasFieldsWithoutDirectMatch()) { + log.trace("Possible indirect matches."); groupings = getGroupingsWithoutDirectMatches(groupings); + } else { + log.trace("No possible indirect matches."); } } @@ -334,6 +390,9 @@ private void groupEntries() { } else { // If no entries were found for any of the target group fields, create a single 'empty' group that will represent this document in the final // grouping results. + if (log.isTraceEnabled()) { + log.trace("No valid groupings found in group fields index, using empty grouping for document {}", documentKey); + } trackGroup(Grouping.emptyGrouping()); } } @@ -345,14 +404,23 @@ private void groupEntries() { * @return the direct match combinations */ private List> getGroupingsWithDirectMatches() { + if (log.isTraceEnabled()) { + log.trace("Searching for groupings with direct matches for document {}", documentKey); + } List> groupings = new ArrayList<>(); Set fieldsWithGroupingContextAndInstance = groupFieldsIndex.getFieldsWithPossibleDirectMatch(); // If we only saw one field with a grouping context and instance, return a list of singletons with each field event. We cannot create any combinations // at this time. if (fieldsWithGroupingContextAndInstance.size() == 1) { - Collection fields = groupFieldsIndex.getFields(fieldsWithGroupingContextAndInstance.iterator().next()); + String field = fieldsWithGroupingContextAndInstance.iterator().next(); + if (log.isTraceEnabled()) { + log.trace("Only one field found with a grouping context and instance: {}. Returning singleton groupings for each distinct value.", field); + } + Collection fields = groupFieldsIndex.getFields(field); fields.stream().map(Collections::singleton).forEach(groupings::add); } else { + log.trace("Multiple fields found with grouping contexts and instances."); + // If we have more than one target field with a grouping context and instance, determine the correct groupings based off matching the grouping // context and instance where possible with direct 1-to-1 matches, i.e. AGE.FOO.1 is a direct match to GENDER.FOO.1. Multimap,Field> groupingContextAndInstanceToField = HashMultimap.create(); @@ -391,10 +459,16 @@ private List> getGroupingsWithDirectMatches() { // If this is the first time we are seeing this field, then we have found the largest batch size for the grouping that this field is in. // Automatically keep this grouping. fieldToLargestGroupingSize.put(field.getBase(), fields.size()); + if (log.isTraceEnabled()) { + log.trace("Establishing largest batch size for field {} is {}", field.getBase(), fields.size()); + } keep = true; } } if (keep) { + if (log.isTraceEnabled()) { + log.trace("Retaining entries as direct matches for fields {}", groupingFields); + } fieldsToGroupings.put(groupingFields, Sets.newHashSet(fields)); } } @@ -411,6 +485,7 @@ private List> getGroupingsWithDirectMatches() { // {"20","MALE","123","Summary","East"} // {"10","FEMALE","123","Summary","West"} // {"10","FEMALE","123","Summary","East"} + log.trace("Creating cartesian products between direct matches."); for (SortedSet fields : fieldsToGroupings.keySet()) { Collection> currentGroupings = fieldsToGroupings.get(fields); if (groupings.isEmpty()) { @@ -439,6 +514,10 @@ private List> getGroupingsWithDirectMatches() { * @return the updated grouping combinations */ private List> getGroupingsWithoutDirectMatches(List> prevGroupings) { + if (log.isTraceEnabled()) { + log.trace("Searching for groupings without direct matches for document {}", documentKey); + } + List> groupings = new ArrayList<>(prevGroupings); for (String fieldName : groupFieldsIndex.getFieldsWithoutDirectMatch()) { Collection fields = groupFieldsIndex.getFields(fieldName); @@ -479,6 +558,10 @@ private List> getGroupingsWithoutDirectMatches(List> prevG * the group combination */ private void trackGroup(Collection groupedFields) { + if (log.isTraceEnabled()) { + log.trace("Tracking group {} for document {}", groupedFields, documentKey); + } + // The grouping context-instance pairs seen for all grouping keys generated in this method. Set> groupingContextAndInstances = new HashSet<>(); // The set of 'keys' that are used to identify individual distinct groupings. @@ -490,20 +573,22 @@ private void trackGroup(Collection groupedFields) { if (field.hasGroupingContext() && field.hasInstance()) { groupingContextAndInstances.add(Pair.with(field.getGroupingContext(), field.getInstance())); } - // If we have no grouping keys yet, create keys consisting of each value of the current field. + + // Create the set of distinct grouping attributes for each value of the field in the event. + Set> groupingAttributes = createGroupingAttributes(field.getBase(), field.getAttributes()); + + // If we have no grouping keys yet, create initial groupings that consist just of each distinct field value. if (groupings.isEmpty()) { - for (Attribute attribute : field.getAttributes()) { - GroupingAttribute copy = createCopyWithKey(attribute, field.getBase()); - groupings.add(new Grouping(copy)); + for (GroupingAttribute attribute : groupingAttributes) { + groupings.add(new Grouping(attribute)); } } else { - // Otherwise, create the cartesian product between the current field's value and each existing key. + // Otherwise, create the cartesian product between the current field's value and each existing grouping. List newGroupings = new ArrayList<>(); - for (Attribute attribute : field.getAttributes()) { - GroupingAttribute copy = createCopyWithKey(attribute, field.getBase()); + for (GroupingAttribute attribute : groupingAttributes) { for (Grouping grouping : groupings) { Grouping groupingCopy = new Grouping(grouping); - groupingCopy.add(copy); + groupingCopy.add(attribute); newGroupings.add(groupingCopy); } } @@ -531,6 +616,9 @@ private void trackGroup(Grouping grouping) { Group group = currentGroups.getGroup(grouping); // Create a group for the grouping if one does not already exist. if (group == null) { + if (log.isTraceEnabled()) { + log.trace("Creating new Group for grouping {}", grouping); + } group = new Group(grouping); group.setFieldAggregator(fieldAggregatorFactory.newInstance()); currentGroups.putGroup(group); @@ -542,23 +630,56 @@ private void trackGroup(Grouping grouping) { group.addDocumentVisibility(document.getColumnVisibility()); } - private GroupingAttribute createCopyWithKey(Attribute attribute, String key) { - Type type = ((TypeAttribute) attribute).getType(); - GroupingAttribute newAttribute = new GroupingAttribute<>(type, new Key(key), true); - newAttribute.setColumnVisibility(attribute.getColumnVisibility()); - return newAttribute; + private Set> createGroupingAttributes(String field, Set> attributes) { + Set> groupingAttributes = new HashSet<>(); + // Fetch the set of temporal granularities that field values should be transformed by for the given field. + NavigableSet granularities = groupFields.get(field); + // Create grouping attributes for each value of the field. + for (Attribute attribute : attributes) { + Type type = ((TypeAttribute) attribute).getType(); + // If no temporal granularities were specified, add a grouping attribute of the original value. + if (granularities.isEmpty()) { + GroupingAttribute newAttribute = new GroupingAttribute<>(type, new Key(field), true); + newAttribute.setColumnVisibility(attribute.getColumnVisibility()); + groupingAttributes.add(newAttribute); + } else { + // Otherwise, create a grouping attribute for each transformed version of the value resulting from applying each temporal granularity to it. + for (TemporalGranularity granularity : granularities) { + if (granularity == TemporalGranularity.ALL) { + // Do not transform the value for the granularity ALL. + GroupingAttribute newAttribute = new GroupingAttribute<>(type, new Key(field), true); + newAttribute.setColumnVisibility(attribute.getColumnVisibility()); + groupingAttributes.add(newAttribute); + } else { + String comparingValue = granularity.transform(type.getDelegateAsString()); + GroupingAttribute newAttribute = new GroupingAttribute<>(type, new Key(field), true, comparingValue); + newAttribute.setColumnVisibility(attribute.getColumnVisibility()); + groupingAttributes.add(newAttribute); + } + } + } + } + return groupingAttributes; } /** * Aggregate all qualifying events that are from target aggregation fields. */ private void aggregateEntries() { + if (log.isTraceEnabled()) { + log.trace("Aggregating entries for document {}", documentKey); + } + // Groupings were found in the document. Aggregate entries according to their association based on each entry's grouping context and instance. - if (groupEntriesFound()) { + if (!groupFieldsIndex.isEmpty()) { + if (log.isTraceEnabled()) { + log.trace("Groupings found for document {}. Fields will be aggregated to these groupings.", documentKey); + } // If we have any target events for aggregation that have a grouping context and instance, e.g. AGE.FOO.1, attempt to find groups that have matching // grouping context and instance pairs, and aggregate the events into those groups only. If we do not find any direct match at all for a specified // aggregation field, then all events for the aggregation field will be aggregated into each group. if (aggregateFieldsIndex.hasFieldsWithPossibleDirectMatch()) { + log.trace("Aggregating fields with possible direct matches."); // Attempt to find a direct match for the current aggregation target field. for (String fieldName : aggregateFieldsIndex.fieldToFieldsByGroupingContextAndInstance.keySet()) { Multimap,Field> groupingContextAndInstanceToFields = aggregateFieldsIndex.fieldToFieldsByGroupingContextAndInstance @@ -587,12 +708,16 @@ private void aggregateEntries() { // If there are any target aggregation events that do not have a grouping context, e.g. AGE or AGE.1, then all target aggregation events should be // aggregated into all groups. if (aggregateFieldsIndex.hasFieldsWithoutDirectMatch()) { + log.trace("Aggregating fields without direct match."); for (String fieldName : aggregateFieldsIndex.fieldsWithoutDirectMatch) { Collection fields = aggregateFieldsIndex.getFields(fieldName); currentGroups.aggregateToAllGroups(fields); } } } else { + if (log.isTraceEnabled()) { + log.trace("No groupings found for document {}. Fields will be aggregated to an empty grouping.", documentKey); + } // No groupings were found in the document. In this case, we will consider this document to contain a placeholder 'empty' grouping, and aggregate // all aggregation entries to the empty grouping. Group group = currentGroups.getGroup(Grouping.emptyGrouping()); @@ -604,10 +729,6 @@ private void aggregateEntries() { } } - private boolean groupEntriesFound() { - return !groupFieldsIndex.isEmpty(); - } - /** * Parses the relevant information from the given entry and returns a {@link Field} that contains the field name, group, instance, and the value. It is * assumed that the entry's key will have the format {@code }, {@code .} or {@code ....}. diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/Group.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Group.java index dc40eee9bdc..d9c3ee67541 100644 --- a/warehouse/query-core/src/main/java/datawave/query/common/grouping/Group.java +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Group.java @@ -162,7 +162,7 @@ public void merge(Group other) { @Override public String toString() { - return new ToStringBuilder(this).append("attributes", grouping).append("attributeVisibilities", attributeVisibilities) + return new ToStringBuilder(this).append("grouping", grouping).append("attributeVisibilities", attributeVisibilities) .append("documentVisibilities", documentVisibilities).append("count", count).append("aggregatedFields", fieldAggregator).toString(); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java index 5f7cd133e2b..04b75afa5ca 100644 --- a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java @@ -1,6 +1,7 @@ package datawave.query.common.grouping; import java.io.Serializable; +import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -16,8 +17,10 @@ import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; +import com.google.common.collect.TreeMultimap; import datawave.query.Constants; +import datawave.query.attributes.TemporalGranularity; import datawave.query.jexl.JexlASTHelper; /** @@ -36,7 +39,7 @@ public class GroupFields implements Serializable { private static final String MAX = "MAX"; private static final String MODEL_MAP = "REVERSE_MODEL_MAP"; - private Set groupByFields = new HashSet<>(); + private TreeMultimap groupByFieldMap = TreeMultimap.create(); private Set sumFields = new HashSet<>(); private Set countFields = new HashSet<>(); private Set averageFields = new HashSet<>(); @@ -82,7 +85,7 @@ public static GroupFields from(String string) { String elementContents = element.substring(leftParen + 1, rightParen); switch (name) { case GROUP: - groupFields.groupByFields = parseSet(elementContents); + groupFields.groupByFieldMap = parseGroupByFields(elementContents); break; case SUM: groupFields.sumFields = parseSet(elementContents); @@ -108,13 +111,73 @@ public static GroupFields from(String string) { } } else { // Otherwise, the string may be in the legacy format of a comma-delimited string with group-fields only. - String[] groupByFields = StringUtils.split(string, Constants.PARAM_VALUE_SEP); - groupFields.setGroupByFields(Sets.newHashSet(groupByFields)); + groupFields.groupByFieldMap = parseGroupByFields(string); } } return groupFields; } + public static TreeMultimap parseGroupByFields(String string) { + TreeMultimap map = TreeMultimap.create(); + int currentIndex = 0; + int finalIndex = string.length() - 1; + while (currentIndex < finalIndex) { + int nextComma = string.indexOf(Constants.COMMA, currentIndex); + int nextStartBracket = string.indexOf(Constants.BRACKET_START, currentIndex); + // If there is no comma or start bracket to be found, we have a trailing field at the end of the string with no specified granularity, + // e.g. + // + // field1[ALL],field2[HOUR],field3 + // + // Add the field with the ALL granularity. + if (nextComma == -1 && nextStartBracket == -1) { + String field = string.substring(currentIndex); + if (!field.isEmpty()) { + // Add the field only if it's not blank. Ignore cases with consecutive trailing commas like field1[ALL], + map.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), TemporalGranularity.ALL); + } + break; // There are no more fields to be parsed. + } else if (nextComma != -1 && (nextStartBracket == -1 || nextComma < nextStartBracket)) { + // If a comma is located before the next starting bracket, we have a field without a granularity located somewhere before the end of the + // string, e.g. + // + // field1,field2[HOUR,DAY] + // field1[MINUTE],field2,field3[HOUR,DAY] + // field1,field2 + // + // Add the field with the ALL granularity. + String field = string.substring(currentIndex, nextComma); + if (!field.isEmpty()) { + // Add the field only if it's not blank. Ignore cases with consecutive commas like field1,,field2[DAY] + map.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), TemporalGranularity.ALL); + } + currentIndex = nextComma + 1; // Advance to the start of the next field. + } else { + // The current field has granularities defined within brackets, e.g. + // + // field[DAY,MINUTE] + // + // Parse and add each granularity for the field. + String field = string.substring(currentIndex, nextStartBracket); + int nextEndBracket = string.indexOf(Constants.BRACKET_END, currentIndex); + if (!field.isEmpty()) { + String granularityList = string.substring((nextStartBracket + 1), nextEndBracket); + // An empty granularity list, e.g. field[] is equivalent to field[ALL]. + if (granularityList.isEmpty()) { + map.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), TemporalGranularity.ALL); + } else { + String[] granularities = StringUtils.split(granularityList, Constants.COMMA); + for (String granularity : granularities) { + map.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), TemporalGranularity.of(granularity)); + } + } + } + currentIndex = nextEndBracket + 1; // Advance to the start of the next field. + } + } + return map; + } + // Parse a set of fields from the string. private static Set parseSet(String str) { return Sets.newHashSet(StringUtils.split(str, Constants.COMMA)); @@ -146,7 +209,7 @@ public static GroupFields copyOf(GroupFields other) { } GroupFields copy = new GroupFields(); - copy.groupByFields = other.groupByFields == null ? null : Sets.newHashSet(other.groupByFields); + copy.groupByFieldMap = other.groupByFieldMap == null ? null : TreeMultimap.create(other.groupByFieldMap); copy.sumFields = other.sumFields == null ? null : Sets.newHashSet(other.sumFields); copy.countFields = other.countFields == null ? null : Sets.newHashSet(other.countFields); copy.averageFields = other.averageFields == null ? null : Sets.newHashSet(other.averageFields); @@ -157,13 +220,41 @@ public static GroupFields copyOf(GroupFields other) { } /** - * Set the fields to group by. + * Set the fields to group on, as well as the temporal granularities to subsequently group on. * - * @param fields - * the fields + * @param map + * the field map + */ + public void setGroupByFieldMap(Multimap map) { + this.groupByFieldMap.clear(); + this.groupByFieldMap.putAll(map); + } + + /** + * Put a field-{@link TemporalGranularity} mapping into this {@link GroupFields}. + * + * @param field + * the field + * @param temporalGranularity + * the granularity */ - public void setGroupByFields(Set fields) { - this.groupByFields = fields; + public void put(String field, TemporalGranularity temporalGranularity) { + this.groupByFieldMap.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), temporalGranularity); + } + + /** + * Put all field-granularity pairings from the provided field map into this {@link GroupFields}. + * + * @param fieldMap + * the field map to add entries from + */ + public GroupFields putAll(Multimap fieldMap) { + if (fieldMap != null) { + for (String field : fieldMap.keySet()) { + this.groupByFieldMap.putAll(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), fieldMap.get(field)); + } + } + return this; } /** @@ -173,7 +264,8 @@ public void setGroupByFields(Set fields) { * the fields */ public void setSumFields(Set fields) { - this.sumFields = fields; + this.sumFields.clear(); + this.sumFields.addAll(fields); } /** @@ -183,7 +275,8 @@ public void setSumFields(Set fields) { * the fields */ public void setCountFields(Set fields) { - this.countFields = fields; + this.countFields.clear(); + this.countFields.addAll(fields); } /** @@ -193,7 +286,8 @@ public void setCountFields(Set fields) { * the fields */ public void setAverageFields(Set fields) { - this.averageFields = fields; + this.averageFields.clear(); + this.averageFields.addAll(fields); } /** @@ -203,7 +297,8 @@ public void setAverageFields(Set fields) { * the fields */ public void setMinFields(Set fields) { - this.minFields = fields; + this.minFields.clear(); + this.minFields.addAll(fields); } /** @@ -213,16 +308,50 @@ public void setMinFields(Set fields) { * the fields */ public void setMaxFields(Set fields) { - this.maxFields = fields; + this.maxFields.clear(); + this.maxFields.addAll(fields); } /** - * Return the fields to group by. + * Return the fields to group by along with their temporal granularities to group by. + * + * @return the field map + */ + public TreeMultimap getGroupByFieldMap() { + return groupByFieldMap; + } + + /** + * Return the set of fields to group by. * * @return the fields */ public Set getGroupByFields() { - return groupByFields; + return groupByFieldMap.keySet(); + } + + /** + * Return whether this {@link GroupFields} has any fields to group by. + * + * @return true if there are fields to group by, or false otherwise + */ + public boolean hasGroupByFields() { + return groupByFieldMap != null && !groupByFieldMap.isEmpty(); + } + + /** + * Replace a field mapping with another field. + * + * @param field + * the field + * @param replacement + * the replacement + */ + public void replaceGroupByField(String field, String replacement) { + Collection value = groupByFieldMap.removeAll(field); + if (!value.isEmpty()) { + groupByFieldMap.putAll(replacement, value); + } } /** @@ -270,15 +399,6 @@ public Set getMaxFields() { return maxFields; } - /** - * Return whether this {@link GroupFields} has any fields to group by. - * - * @return true if there are fields to group by, or false otherwise - */ - public boolean hasGroupByFields() { - return groupByFields != null && !groupByFields.isEmpty(); - } - /** * Return the set of all fields to group by, sum, count, average, and find the min and max of that must be included in projection. * @@ -286,7 +406,7 @@ public boolean hasGroupByFields() { */ public Set getProjectionFields() { Set fields = new HashSet<>(); - fields.addAll(this.groupByFields); + fields.addAll(this.groupByFieldMap.keySet()); fields.addAll(this.sumFields); fields.addAll(this.countFields); fields.addAll(this.averageFields); @@ -301,7 +421,13 @@ public Set getProjectionFields() { * Deconstruct the identifiers of all fields in this {@link GroupFields}. */ public void deconstructIdentifiers() { - this.groupByFields = deconstructIdentifiers(this.groupByFields); + TreeMultimap newGroupByFieldMap = TreeMultimap.create(); + for (String field : groupByFieldMap.keySet()) { + String deconstructedId = JexlASTHelper.deconstructIdentifier(field); + newGroupByFieldMap.putAll(deconstructedId, groupByFieldMap.get(field)); + } + this.groupByFieldMap = newGroupByFieldMap; + this.sumFields = deconstructIdentifiers(this.sumFields); this.countFields = deconstructIdentifiers(this.countFields); this.averageFields = deconstructIdentifiers(this.averageFields); @@ -323,7 +449,14 @@ private Set deconstructIdentifiers(Set set) { * the reverse model map */ public void remapFields(Multimap modelMap, Map reverseModelMap) { - this.groupByFields = remap(this.groupByFields, modelMap); + final TreeMultimap expandedGroupByFields = TreeMultimap.create(groupByFieldMap); + for (String field : groupByFieldMap.keySet()) { + if (modelMap.containsKey(field.toUpperCase())) { + Collection granularities = groupByFieldMap.get(field); + modelMap.get(field).forEach(newField -> expandedGroupByFields.putAll(newField, granularities)); + } + } + this.groupByFieldMap = expandedGroupByFields; this.sumFields = remap(this.sumFields, modelMap); this.countFields = remap(this.countFields, modelMap); this.averageFields = remap(this.averageFields, modelMap); @@ -332,7 +465,7 @@ public void remapFields(Multimap modelMap, Map rev // Make a copy of the given reverse model map that only contains relevant mappings for efficiency. Set allFields = new HashSet<>(); - allFields.addAll(groupByFields); + allFields.addAll(groupByFieldMap.keySet()); allFields.addAll(sumFields); allFields.addAll(countFields); allFields.addAll(averageFields); @@ -346,8 +479,14 @@ public void remapFields(Multimap modelMap, Map rev } } - // now we can reduce the fields to only those that map to themselves wrt the reverse model map - this.groupByFields = reduce(this.groupByFields, this.reverseModelMap); + // Now we can reduce the fields to only those that map to themselves wrt the reverse model map. + TreeMultimap reducedGroupByFields = TreeMultimap.create(groupByFieldMap); + for (String field : groupByFieldMap.keySet()) { + if (!field.equals(this.reverseModelMap.getOrDefault(field, field))) { + reducedGroupByFields.removeAll(field); + } + } + this.groupByFieldMap = reducedGroupByFields; this.sumFields = reduce(this.sumFields, this.reverseModelMap); this.countFields = reduce(this.countFields, this.reverseModelMap); this.averageFields = reduce(this.averageFields, this.reverseModelMap); @@ -400,21 +539,22 @@ public boolean equals(Object o) { return false; } GroupFields that = (GroupFields) o; - return Objects.equals(groupByFields, that.groupByFields) && Objects.equals(sumFields, that.sumFields) && Objects.equals(countFields, that.countFields) - && Objects.equals(averageFields, that.averageFields) && Objects.equals(minFields, that.minFields) - && Objects.equals(maxFields, that.maxFields) && Objects.equals(reverseModelMap, that.reverseModelMap); + return Objects.equals(groupByFieldMap, that.groupByFieldMap) && Objects.equals(sumFields, that.sumFields) + && Objects.equals(countFields, that.countFields) && Objects.equals(averageFields, that.averageFields) + && Objects.equals(minFields, that.minFields) && Objects.equals(maxFields, that.maxFields) + && Objects.equals(reverseModelMap, that.reverseModelMap); } @Override public int hashCode() { - return Objects.hash(groupByFields, sumFields, countFields, averageFields, minFields, maxFields, reverseModelMap); + return Objects.hash(groupByFieldMap, sumFields, countFields, averageFields, minFields, maxFields, reverseModelMap); } @JsonValue @Override public String toString() { StringBuilder sb = new StringBuilder(); - writeFormattedSet(sb, GROUP, this.groupByFields); + writeGroupByFieldMap(sb); writeFormattedSet(sb, SUM, this.sumFields); writeFormattedSet(sb, COUNT, this.countFields); writeFormattedSet(sb, AVERAGE, this.averageFields); @@ -424,6 +564,35 @@ public String toString() { return sb.toString(); } + // Write the fields to group on to the given string builder. + private void writeGroupByFieldMap(StringBuilder sb) { + if (!groupByFieldMap.isEmpty()) { + sb.append(GROUP).append(Constants.LEFT_PAREN); + Iterator fieldIterator = groupByFieldMap.keySet().iterator(); + while (fieldIterator.hasNext()) { + String field = fieldIterator.next(); + sb.append(field); + Iterator temporalGranularityIterator = groupByFieldMap.get(field).iterator(); + if (temporalGranularityIterator.hasNext()) { + sb.append(Constants.BRACKET_START); + while (temporalGranularityIterator.hasNext()) { + TemporalGranularity granularity = temporalGranularityIterator.next(); + sb.append(granularity.getName()); + if (temporalGranularityIterator.hasNext()) { + sb.append(Constants.COMMA); + } + } + } + sb.append(Constants.BRACKET_END); + + if (fieldIterator.hasNext()) { + sb.append(Constants.COMMA); + } + } + sb.append(Constants.RIGHT_PAREN); + } + } + // Write the given set if not empty to the given string builder. private void writeFormattedSet(StringBuilder sb, String name, Set set) { if (!set.isEmpty()) { diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingAttribute.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingAttribute.java index 54db8ddf715..ba7c35477e1 100644 --- a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingAttribute.java +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingAttribute.java @@ -1,5 +1,7 @@ package datawave.query.common.grouping; +import java.util.Objects; + import org.apache.accumulo.core.data.Key; import org.apache.commons.lang.builder.HashCodeBuilder; @@ -8,16 +10,37 @@ import datawave.query.attributes.TypeAttribute; /** - * This class serves as a wrapper for the {@link TypeAttribute} that overrides the default {@code equals()} and {@code hashCode()} behavior so that equality is - * determined by the attribute's field and value, and the hashCode is generated solely with the attribute's value. + * This class serves as a wrapper for the {@link TypeAttribute} that overrides the default {@link #equals(Object)} and {@link #hashCode()} behavior so that + * equality is determined by the attribute's field and value, and the hashCode is generated solely with the attribute's value. * * @param * the delegate type */ +@SuppressWarnings("rawtypes") public class GroupingAttribute> extends TypeAttribute { + /** + * This type, if not null, will be used instead of the original type when comparing types in the {@link #equals(Object)} and {@link #hashCode()}. This + * allows us to preserve the original type value while also comparing based on a transformed version of the value, such as when applying a temporal + * granularity in the case of {@code #GROUPBY(FOO[DAY])}. + */ + private final String overridingValue; + public GroupingAttribute(Type type, Key key, boolean toKeep) { + this(type, key, toKeep, null); + } + + public GroupingAttribute(Type type, Key key, boolean toKeep, String overridingValue) { super(type, key, toKeep); + this.overridingValue = overridingValue; + } + + public String getOverridingValue() { + return overridingValue; + } + + public boolean hasOverridingValue() { + return overridingValue != null; } /** @@ -32,9 +55,21 @@ public boolean equals(Object other) { if (null == other) { return false; } - if (other instanceof TypeAttribute) { - TypeAttribute otherType = (TypeAttribute) other; - return this.getType().equals(otherType.getType()) && isMetadataRowEqual(otherType); + if (other instanceof GroupingAttribute) { + GroupingAttribute otherType = (GroupingAttribute) other; + // If either attribute has a type override, determine equality based on the class of the types, the overriding value, and the metadata row. This + // allows us to make groupings that may involve versions of a field value that has been transformed such as for #GROUPBY(FIELD[DAY]). + if (this.hasOverridingValue() || otherType.hasOverridingValue()) { + // @formatter:off + return Objects.equals(this.getType().getClass(), otherType.getType().getClass()) && + Objects.equals(this.overridingValue, otherType.overridingValue) && + isMetadataRowEqual(otherType); + // @formatter:on + } else { + // If neither attribute has a comparing type override + return this.getType().equals(otherType.getType()) && isMetadataRowEqual(otherType); + } + } return false; } @@ -57,6 +92,10 @@ private boolean isMetadataRowEqual(Attribute other) { */ @Override public int hashCode() { - return new HashCodeBuilder(2099, 2129).append(getType().getDelegateAsString()).toHashCode(); + return hasOverridingValue() ? hashCodeOf(overridingValue) : hashCodeOf(getType().getDelegateAsString()); + } + + private int hashCodeOf(String value) { + return new HashCodeBuilder(2099, 2129).append(value).toHashCode(); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtils.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtils.java index 1f88991207c..68014f9907b 100644 --- a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtils.java +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtils.java @@ -15,6 +15,7 @@ import com.google.common.base.Preconditions; import datawave.data.type.NumberType; +import datawave.data.type.StringType; import datawave.marking.MarkingFunctions; import datawave.query.attributes.Document; import datawave.query.attributes.TypeAttribute; @@ -103,7 +104,18 @@ public static Document createDocument(Group group, Key key, MarkingFunctions mar for (GroupingAttribute attribute : group.getGrouping()) { // Update the visibility to the combined visibilities of each visibility seen for this attribute in a grouping. attribute.setColumnVisibility(combineVisibilities(group.getVisibilitiesForAttribute(attribute), markingFunctions, false)); - document.put(attribute.getMetadata().getRow().toString(), attribute); + String attributeKey = attribute.getMetadata().getRow().toString(); + document.put(attributeKey, attribute); + // If the attribute has an overriding value, add an attribute for it so that we may fetch it later if we have subsequent groupings to perform. + if (attribute.hasOverridingValue()) { + // Write the overriding value. + StringType overridingValueType = new StringType(); + overridingValueType.setDelegate(attribute.getOverridingValue()); + TypeAttribute overridingValueAttribute = new TypeAttribute<>(overridingValueType, + new Key(attributeKey + DocumentGrouper.FIELD_VALUE_OVERRIDE), true); + overridingValueAttribute.setColumnVisibility(attribute.getColumnVisibility()); + document.put(attributeKey + DocumentGrouper.FIELD_VALUE_OVERRIDE, overridingValueAttribute); + } } // Add an attribute for the count. @@ -120,10 +132,8 @@ public static Document createDocument(Group group, Key key, MarkingFunctions mar for (Aggregator aggregator : entry.getValue().values()) { String field = aggregator.getField(); // Do not include an entry for the aggregation if it is null (indicating that no entries were found to be aggregated). The exception to this - // is - // the #COUNT aggregation. This will return a non-null value of 0 if no entries were found to be aggregated, and can be included in the - // final - // output. + // is the #COUNT aggregation. This will return a non-null value of 0 if no entries were found to be aggregated, and can be included in the + // final output. if (aggregator.getAggregation() != null) { switch (aggregator.getOperation()) { case SUM: diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java index 4611a2cee38..657ed804fc6 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java @@ -26,6 +26,7 @@ import datawave.query.Constants; import datawave.query.attributes.AttributeFactory; import datawave.query.attributes.UniqueFields; +import datawave.query.common.grouping.GroupFields; import datawave.query.config.ShardQueryConfiguration; import datawave.query.jexl.ArithmeticJexlEngines; import datawave.query.jexl.JexlASTHelper; @@ -34,6 +35,7 @@ import datawave.query.jexl.nodes.QueryPropertyMarker; import datawave.query.jexl.visitors.EventDataQueryExpressionVisitor; import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; +import datawave.query.language.functions.jexl.GroupByDate; import datawave.query.util.DateIndexHelper; import datawave.query.util.MetadataHelper; import datawave.util.StringUtils; @@ -206,7 +208,6 @@ public Set fields(MetadataHelper helper, Set datatypeFilter) { case QueryFunctions.MIN: case QueryFunctions.MAX: case QueryFunctions.AVERAGE: - case QueryFunctions.GROUPBY_FUNCTION: case QueryFunctions.NO_EXPANSION: case QueryFunctions.LENIENT_FIELDS_FUNCTION: case QueryFunctions.STRICT_FIELDS_FUNCTION: @@ -219,6 +220,14 @@ public Set fields(MetadataHelper helper, Set datatypeFilter) { case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION: case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_SECOND_FUNCTION: case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION: + case GroupByDate.GROUPBY_YEAR_FUNCTION: + case GroupByDate.GROUPBY_MONTH_FUNCTION: + case GroupByDate.GROUPBY_DAY_FUNCTION: + case GroupByDate.GROUPBY_HOUR_FUNCTION: + case GroupByDate.GROUPBY_TENTH_OF_HOUR_FUNCTION: + case GroupByDate.GROUPBY_MINUTE_FUNCTION: + case GroupByDate.GROUPBY_SECOND_FUNCTION: + case GroupByDate.GROUPBY_MILLISECOND_FUNCTION: // In practice each of these functions should be parsed from the query // almost immediately. This implementation is added for consistency for (JexlNode arg : args) { @@ -234,6 +243,20 @@ public Set fields(MetadataHelper helper, Set datatypeFilter) { } } break; + case QueryFunctions.GROUPBY_FUNCTION: + for (JexlNode arg : args) { + if (arg instanceof ASTStringLiteral) { + // FIELD[GRANULARITY] is represented by an ASTStringLiteral + String literal = ((ASTStringLiteral) arg).getLiteral(); + fields.addAll(GroupFields.from(literal).getGroupByFields()); + } else { + // otherwise it's just an ASTIdentifier + for (String identifier : JexlASTHelper.getIdentifierNames(arg)) { + fields.addAll(GroupFields.from(identifier).getGroupByFields()); + } + } + } + break; case QueryFunctions.UNIQUE_FUNCTION: for (JexlNode arg : args) { if (arg instanceof ASTStringLiteral) { @@ -353,6 +376,14 @@ private static void verify(String name, int numArgs) { case QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MONTH_FUNCTION: case QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_YEAR_FUNCTION: case QueryFunctions.GROUPBY_FUNCTION: + case GroupByDate.GROUPBY_MILLISECOND_FUNCTION: + case GroupByDate.GROUPBY_SECOND_FUNCTION: + case GroupByDate.GROUPBY_MINUTE_FUNCTION: + case GroupByDate.GROUPBY_TENTH_OF_HOUR_FUNCTION: + case GroupByDate.GROUPBY_HOUR_FUNCTION: + case GroupByDate.GROUPBY_DAY_FUNCTION: + case GroupByDate.GROUPBY_MONTH_FUNCTION: + case GroupByDate.GROUPBY_YEAR_FUNCTION: case QueryFunctions.EXCERPT_FIELDS_FUNCTION: case QueryFunctions.MATCH_REGEX: case QueryFunctions.INCLUDE_TEXT: diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java index 8a5b5fc2dbc..82a25119907 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java @@ -25,9 +25,11 @@ import com.google.common.collect.ImmutableSet; import datawave.query.QueryParameters; +import datawave.query.attributes.TemporalGranularity; import datawave.query.attributes.UniqueFields; -import datawave.query.attributes.UniqueGranularity; +import datawave.query.common.grouping.GroupFields; import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.language.functions.jexl.GroupByDate; /** * Visits the query tree and extracts the parameters from any options functions present and adds them to the provided data {@link Map}. Any options function @@ -35,7 +37,8 @@ * are as followed: *
    *
  • {@code f:options()}: Expects a comma-delimited list of key/value pairs, e.g. {@code f:options('hit.list','true','limit.fields','FOO_1_BAR=3)}
  • - *
  • {@code f:groupby()}: Expects a comma-delimited list of fields to group by, e.g. {@code f:groupby('field1','field2','field3')}
  • + *
  • {@code f:groupby()}: Expects a comma-delimited list of fields to group by and their granularity levels, e.g. * + * {@code f:groupby('field1[ALL]','field2[DAY]','field3[MINUTE,SECOND]')}
  • *
  • {@code f:noexpansion()}: Expects a comma-delimited list of fields, e.g. {@code f:noExpansion('field1','field2','field3')}
  • *
  • {@code f:lenient()}: Expects a comma-delimited list of fields, e.g. {@code f:lenient('field1','field2','field3')}
  • *
  • {@code f:strict()}: Expects a comma-delimited list of fields, e.g. {@code f:strict('field1','field2','field3')}
  • @@ -62,10 +65,19 @@ public class QueryOptionsFromQueryVisitor extends RebuildingVisitor { private static final Joiner JOINER = Joiner.on(',').skipNulls(); - private static final Set RESERVED = ImmutableSet.of(QueryFunctions.QUERY_FUNCTION_NAMESPACE, QueryFunctions.OPTIONS_FUNCTION, - QueryFunctions.UNIQUE_FUNCTION, UniqueFunction.UNIQUE_BY_DAY_FUNCTION, UniqueFunction.UNIQUE_BY_HOUR_FUNCTION, - UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION, UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION, UniqueFunction.UNIQUE_BY_MONTH_FUNCTION, - UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION, UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, + // @formatter:off + private static final Set RESERVED = ImmutableSet.of( + QueryFunctions.QUERY_FUNCTION_NAMESPACE, + QueryFunctions.OPTIONS_FUNCTION, + QueryFunctions.UNIQUE_FUNCTION, + UniqueFunction.UNIQUE_BY_DAY_FUNCTION, + UniqueFunction.UNIQUE_BY_HOUR_FUNCTION, + UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION, + UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION, + UniqueFunction.UNIQUE_BY_MONTH_FUNCTION, + UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, + UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION, + UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, QueryFunctions.MOST_RECENT_PREFIX + QueryFunctions.UNIQUE_FUNCTION, QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_DAY_FUNCTION, QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_HOUR_FUNCTION, @@ -74,10 +86,26 @@ public class QueryOptionsFromQueryVisitor extends RebuildingVisitor { QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_MONTH_FUNCTION, QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION, - QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, QueryFunctions.GROUPBY_FUNCTION, - QueryFunctions.EXCERPT_FIELDS_FUNCTION, QueryFunctions.SUMMARY_FUNCTION, QueryFunctions.NO_EXPANSION, - QueryFunctions.LENIENT_FIELDS_FUNCTION, QueryFunctions.STRICT_FIELDS_FUNCTION, QueryFunctions.SUM, QueryFunctions.MIN, QueryFunctions.MAX, - QueryFunctions.AVERAGE, QueryFunctions.COUNT, QueryFunctions.RENAME_FUNCTION); + QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, + QueryFunctions.GROUPBY_FUNCTION, + GroupByDate.GROUPBY_YEAR_FUNCTION, + GroupByDate.GROUPBY_MONTH_FUNCTION, GroupByDate.GROUPBY_DAY_FUNCTION, + GroupByDate.GROUPBY_HOUR_FUNCTION, + GroupByDate.GROUPBY_TENTH_OF_HOUR_FUNCTION, + GroupByDate.GROUPBY_MINUTE_FUNCTION, + GroupByDate.GROUPBY_SECOND_FUNCTION, + GroupByDate.GROUPBY_MILLISECOND_FUNCTION, + QueryFunctions.EXCERPT_FIELDS_FUNCTION, + QueryFunctions.SUMMARY_FUNCTION, + QueryFunctions.NO_EXPANSION, + QueryFunctions.LENIENT_FIELDS_FUNCTION, + QueryFunctions.STRICT_FIELDS_FUNCTION, + QueryFunctions.SUM, QueryFunctions.MIN, + QueryFunctions.MAX, + QueryFunctions.AVERAGE, + QueryFunctions.COUNT, + QueryFunctions.RENAME_FUNCTION); + // @formatter:on @SuppressWarnings("unchecked") public static T collect(T node, Object data) { @@ -150,14 +178,14 @@ private Object visitJunction(JexlNode node, Object data, Supplier crea } public enum UniqueFunction { - UNIQUE_BY_DAY(UniqueFunction.UNIQUE_BY_DAY_FUNCTION, UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY), - UNIQUE_BY_HOUR(UniqueFunction.UNIQUE_BY_HOUR_FUNCTION, UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR), - UNIQUE_BY_MILLISECOND(UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION, UniqueGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND), - UNIQUE_BY_MINUTE(UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION, UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE), - UNIQUE_BY_MONTH(UniqueFunction.UNIQUE_BY_MONTH_FUNCTION, UniqueGranularity.TRUNCATE_TEMPORAL_TO_MONTH), - UNIQUE_BY_SECOND(UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, UniqueGranularity.TRUNCATE_TEMPORAL_TO_SECOND), - UNIQUE_BY_TENTH_OF_HOUR(UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION, UniqueGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR), - UNIQUE_BY_YEAR(UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, UniqueGranularity.TRUNCATE_TEMPORAL_TO_YEAR); + UNIQUE_BY_DAY(UniqueFunction.UNIQUE_BY_DAY_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY), + UNIQUE_BY_HOUR(UniqueFunction.UNIQUE_BY_HOUR_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR), + UNIQUE_BY_MILLISECOND(UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND), + UNIQUE_BY_MINUTE(UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE), + UNIQUE_BY_MONTH(UniqueFunction.UNIQUE_BY_MONTH_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_MONTH), + UNIQUE_BY_SECOND(UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND), + UNIQUE_BY_TENTH_OF_HOUR(UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR), + UNIQUE_BY_YEAR(UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_YEAR); public static final String UNIQUE_BY_DAY_FUNCTION = "unique_by_day"; public static final String UNIQUE_BY_HOUR_FUNCTION = "unique_by_hour"; @@ -169,9 +197,9 @@ public enum UniqueFunction { public static final String UNIQUE_BY_YEAR_FUNCTION = "unique_by_year"; public final String name; - public final UniqueGranularity granularity; + public final TemporalGranularity granularity; - UniqueFunction(String name, UniqueGranularity granularity) { + UniqueFunction(String name, TemporalGranularity granularity) { this.name = name; this.granularity = granularity; } @@ -185,11 +213,6 @@ public static UniqueFunction findByName(String name) { } } - private void updateUniqueFields(ASTFunctionNode node, UniqueFields uniqueFields, Map optionsMap, UniqueFunction uniqueFunction) { - putFieldsFromChildren(node, uniqueFields, uniqueFunction.granularity); - updateUniqueFieldsOption(optionsMap, uniqueFields); - } - /** * If this is a function that contains key/value options, descend the tree with a {@link List} as the data. The function args will be collected into the * list when visiting the child {@link ASTStringLiteral} nodes. @@ -259,15 +282,35 @@ private Object visit(ASTFunctionNode node, Map optionsMap) { case UniqueFunction.UNIQUE_BY_SECOND_FUNCTION: case UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION: { UniqueFields uniqueFields = new UniqueFields(); - updateUniqueFields(node, uniqueFields, optionsMap, UniqueFunction.findByName(function)); + putFieldsFromChildren(node, uniqueFields, UniqueFunction.findByName(function).granularity); + updateUniqueFieldsOption(optionsMap, uniqueFields); return null; } case QueryFunctions.GROUPBY_FUNCTION: { - List optionsList = new ArrayList<>(); - this.visit(node, optionsList); - updateFieldsOption(optionsMap, QueryParameters.GROUP_FIELDS, optionsList); + List fieldList = new ArrayList<>(); + this.visit(node, fieldList); + String fieldString = JOINER.join(fieldList); + + // Parse the group by fields. + GroupFields groupFields = GroupFields.from(fieldString); + updateGroupByFieldsOption(optionsMap, groupFields); return null; } + case GroupByDate.GROUPBY_YEAR_FUNCTION: + case GroupByDate.GROUPBY_MONTH_FUNCTION: + case GroupByDate.GROUPBY_DAY_FUNCTION: + case GroupByDate.GROUPBY_HOUR_FUNCTION: + case GroupByDate.GROUPBY_TENTH_OF_HOUR_FUNCTION: + case GroupByDate.GROUPBY_MINUTE_FUNCTION: + case GroupByDate.GROUPBY_SECOND_FUNCTION: + case GroupByDate.GROUPBY_MILLISECOND_FUNCTION: + GroupFields groupFields = new GroupFields(); + TemporalGranularity granularity = GroupByDate.GroupByDateFunction.findByName(function).granularity; + List fields = new ArrayList<>(); + node.jjtAccept(this, fields); + fields.forEach((field) -> groupFields.put(field, granularity)); + updateGroupByFieldsOption(optionsMap, groupFields); + return null; case QueryFunctions.EXCERPT_FIELDS_FUNCTION: { List optionsList = new ArrayList<>(); this.visit(node, optionsList); @@ -339,8 +382,18 @@ private Object visit(ASTFunctionNode node, Map optionsMap) { return super.visit(node, optionsMap); } + // Update the group.fields option to include the given group by fields. + private void updateGroupByFieldsOption(Map optionsMap, GroupFields groupFields) { + // Combine with any previously found unique fields. + if (optionsMap.containsKey(QueryParameters.GROUP_FIELDS)) { + GroupFields existingFields = GroupFields.from(optionsMap.get(QueryParameters.GROUP_FIELDS)); + groupFields.putAll(existingFields.getGroupByFieldMap()); + } + optionsMap.put(QueryParameters.GROUP_FIELDS, groupFields.toString()); + } + // Find all unique fields declared in the provided node and add them to the provided {@link UniqueFields} with the specified transformer. - private void putFieldsFromChildren(JexlNode node, UniqueFields uniqueFields, UniqueGranularity transformer) { + private void putFieldsFromChildren(JexlNode node, UniqueFields uniqueFields, TemporalGranularity transformer) { List fields = new ArrayList<>(); node.jjtAccept(this, fields); fields.forEach((field) -> uniqueFields.put(field, transformer)); diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupBy.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupBy.java index 1618b69d45b..13f6a18cf07 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupBy.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupBy.java @@ -3,6 +3,7 @@ import java.text.MessageFormat; import java.util.ArrayList; +import datawave.query.common.grouping.GroupFields; import datawave.query.jexl.functions.QueryFunctions; import datawave.query.language.functions.QueryFunction; import datawave.webservice.query.exception.BadRequestQueryException; @@ -23,8 +24,18 @@ public GroupBy() { @Override public void validate() throws IllegalArgumentException { if (this.parameterList.isEmpty()) { - BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, MessageFormat.format("{0}", this.name)); + BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, + MessageFormat.format("{0} requires at least one argument", this.name)); throw new IllegalArgumentException(qe); + } else { + String parameters = String.join(",", parameterList); + try { + GroupFields.from(parameters); + } catch (Exception e) { + BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, + MessageFormat.format("Unable to parse fields from arguments for function {0}", this.name)); + throw new IllegalArgumentException(qe); + } } } @@ -43,7 +54,6 @@ public String toString() { } sb.append(')'); } - return sb.toString(); } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByDate.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByDate.java new file mode 100644 index 00000000000..ccc0517e49c --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByDate.java @@ -0,0 +1,89 @@ +package datawave.query.language.functions.jexl; + +import java.text.MessageFormat; +import java.util.List; + +import datawave.query.Constants; +import datawave.query.attributes.TemporalGranularity; +import datawave.query.jexl.functions.QueryFunctions; +import datawave.webservice.query.exception.BadRequestQueryException; +import datawave.webservice.query.exception.DatawaveErrorCode; + +public abstract class GroupByDate extends JexlQueryFunction { + + public static final String GROUPBY_MILLISECOND_FUNCTION = "groupby_millisecond"; + public static final String GROUPBY_SECOND_FUNCTION = "groupby_second"; + public static final String GROUPBY_MINUTE_FUNCTION = "groupby_minute"; + public static final String GROUPBY_TENTH_OF_HOUR_FUNCTION = "groupby_tenth_of_hour"; + public static final String GROUPBY_HOUR_FUNCTION = "groupby_hour"; + public static final String GROUPBY_DAY_FUNCTION = "groupby_day"; + public static final String GROUPBY_MONTH_FUNCTION = "groupby_month"; + public static final String GROUPBY_YEAR_FUNCTION = "groupby_year"; + + public enum GroupByDateFunction { + GROUPBY_MILLISECOND(GROUPBY_MILLISECOND_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND), + GROUPBY_SECOND(GROUPBY_SECOND_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND), + GROUPBY_MINUTE(GROUPBY_MINUTE_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE), + GROUPBY_TENTH_OF_HOUR(GROUPBY_TENTH_OF_HOUR_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR), + GROUPBY_HOUR(GROUPBY_HOUR_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR), + GROUPBY_DAY(GROUPBY_DAY_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY), + GROUPBY_MONTH(GROUPBY_MONTH_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_MONTH), + GROUPBY_YEAR(GROUPBY_YEAR_FUNCTION, TemporalGranularity.TRUNCATE_TEMPORAL_TO_YEAR); + + public final String name; + public final TemporalGranularity granularity; + + GroupByDateFunction(String name, TemporalGranularity granularity) { + this.name = name; + this.granularity = granularity; + } + + public String getName() { + return name; + } + + public static GroupByDateFunction findByName(String name) { + return GroupByDateFunction.valueOf(name.toUpperCase()); + } + } + + public GroupByDate(String functionName, List parameterList) { + super(functionName, parameterList); + } + + @Override + public void validate() throws IllegalArgumentException { + // Verify at least one parameter was passed in. + if (this.parameterList.isEmpty()) { + BadRequestQueryException e = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, + MessageFormat.format("{0} requires at least one argument", this.name)); + throw new IllegalArgumentException(e); + } + + // Verify that the advanced group-by syntax, e.g. FIELD[DAY,HOUR], is not used. + for (String param : this.parameterList) { + if (param.contains(Constants.BRACKET_START)) { + BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, MessageFormat.format( + "{0} does not support the advanced group-by syntax, only a simple comma-delimited list of fields is allowed.", this.name)); + throw new IllegalArgumentException(qe); + } + } + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(QueryFunctions.QUERY_FUNCTION_NAMESPACE).append(':').append(this.name); + if (parameterList.isEmpty()) { + sb.append("()"); + } else { + char separator = '('; + for (String param : parameterList) { + sb.append(separator).append(escapeString(param)); + separator = ','; + } + sb.append(")"); + } + return sb.toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByDay.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByDay.java new file mode 100644 index 00000000000..3390ad27519 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByDay.java @@ -0,0 +1,17 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.language.functions.QueryFunction; + +public class GroupByDay extends GroupByDate { + + public GroupByDay() { + super(GroupByDate.GROUPBY_DAY_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new GroupByDay(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByHour.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByHour.java new file mode 100644 index 00000000000..61bef859edf --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByHour.java @@ -0,0 +1,17 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.language.functions.QueryFunction; + +public class GroupByHour extends GroupByDate { + + public GroupByHour() { + super(GroupByDate.GROUPBY_HOUR_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new GroupByHour(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByMillisecond.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByMillisecond.java new file mode 100644 index 00000000000..50912e1a46a --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByMillisecond.java @@ -0,0 +1,17 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.language.functions.QueryFunction; + +public class GroupByMillisecond extends GroupByDate { + + public GroupByMillisecond() { + super(GroupByDate.GROUPBY_MILLISECOND_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new GroupByMillisecond(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByMinute.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByMinute.java new file mode 100644 index 00000000000..1accce0d4f2 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByMinute.java @@ -0,0 +1,17 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.language.functions.QueryFunction; + +public class GroupByMinute extends GroupByDate { + + public GroupByMinute() { + super(GroupByDate.GROUPBY_MINUTE_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new GroupByMinute(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByMonth.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByMonth.java new file mode 100644 index 00000000000..d7cdfacaa9f --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByMonth.java @@ -0,0 +1,17 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.language.functions.QueryFunction; + +public class GroupByMonth extends GroupByDate { + + public GroupByMonth() { + super(GroupByDate.GROUPBY_MONTH_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new GroupByMonth(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupBySecond.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupBySecond.java new file mode 100644 index 00000000000..aebf78623ac --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupBySecond.java @@ -0,0 +1,17 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.language.functions.QueryFunction; + +public class GroupBySecond extends GroupByDate { + + public GroupBySecond() { + super(GroupByDate.GROUPBY_SECOND_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new GroupBySecond(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByTenthOfHour.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByTenthOfHour.java new file mode 100644 index 00000000000..89e38233c29 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByTenthOfHour.java @@ -0,0 +1,17 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.language.functions.QueryFunction; + +public class GroupByTenthOfHour extends GroupByDate { + + public GroupByTenthOfHour() { + super(GroupByDate.GROUPBY_TENTH_OF_HOUR_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new GroupByTenthOfHour(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByYear.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByYear.java new file mode 100644 index 00000000000..0a23ea473e9 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/GroupByYear.java @@ -0,0 +1,17 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.language.functions.QueryFunction; + +public class GroupByYear extends GroupByDate { + + public GroupByYear() { + super(GroupByDate.GROUPBY_YEAR_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new GroupByYear(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java index ec52b26e895..5e271d1a5ea 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java @@ -44,9 +44,8 @@ public static void apply(Map optionsMap, ShardQueryConfiguration config.setMatchingFieldSets(Sets.newHashSet(mfs)); break; case QueryParameters.GROUP_FIELDS: - String[] groups = StringUtils.split(value, Constants.PARAM_VALUE_SEP); groupFields = config.getGroupFields(); - groupFields.setGroupByFields(Sets.newHashSet(groups)); + groupFields.setGroupByFieldMap(GroupFields.from(value).getGroupByFieldMap()); config.setGroupFields(groupFields); // If there are any group-by fields, update the projection fields to include them. if (groupFields.hasGroupByFields()) { diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java index 6744e8c56e3..79fc82d5091 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java @@ -42,6 +42,7 @@ import com.google.common.cache.CacheBuilder; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import com.google.common.collect.TreeMultimap; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; @@ -66,6 +67,7 @@ import datawave.query.QueryParameters; import datawave.query.attributes.ExcerptFields; import datawave.query.attributes.SummaryOptions; +import datawave.query.attributes.TemporalGranularity; import datawave.query.attributes.UniqueFields; import datawave.query.cardinality.CardinalityConfiguration; import datawave.query.common.grouping.GroupFields; @@ -995,12 +997,12 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the GROUP_FIELDS parameter if given String groupFieldsParam = settings.findParameter(QueryParameters.GROUP_FIELDS).getParameterValue().trim(); if (StringUtils.isNotBlank(groupFieldsParam)) { - String[] groupFields = StringUtils.split(groupFieldsParam, Constants.PARAM_VALUE_SEP); + TreeMultimap groupByFieldMap = GroupFields.parseGroupByFields(groupFieldsParam); // Only set the group fields if we were actually given some. - if (groupFields.length > 0) { + if (!groupByFieldMap.isEmpty()) { GroupFields groupByFields = config.getGroupFields(); - groupByFields.setGroupByFields(Sets.newHashSet(groupFields)); + groupByFields.setGroupByFieldMap(groupByFieldMap); // Update the sum fields if given. String sumFieldsParam = settings.findParameter(QueryParameters.SUM_FIELDS).getParameterValue().trim(); diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ShardQueryUtils.java b/warehouse/query-core/src/main/java/datawave/query/util/ShardQueryUtils.java index 8e1275ad76a..6ef221cd487 100644 --- a/warehouse/query-core/src/main/java/datawave/query/util/ShardQueryUtils.java +++ b/warehouse/query-core/src/main/java/datawave/query/util/ShardQueryUtils.java @@ -4,11 +4,9 @@ import java.util.Set; import java.util.stream.Collectors; -import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.commons.jexl3.parser.ASTJexlScript; import org.apache.log4j.Logger; -import com.google.common.cache.Cache; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; @@ -43,9 +41,9 @@ public class ShardQueryUtils { public static ASTJexlScript upperCaseIdentifiers(MetadataHelper metadataHelper, ShardQueryConfiguration config, ASTJexlScript script) { GroupFields groupFields = config.getGroupFields(); if (groupFields != null && groupFields.hasGroupByFields()) { + Sets.newHashSet(groupFields.getGroupByFields()).forEach(field -> groupFields.replaceGroupByField(field, field.toUpperCase())); groupFields.setMaxFields(toUpperCase(groupFields.getMaxFields())); groupFields.setSumFields(toUpperCase(groupFields.getSumFields())); - groupFields.setGroupByFields(toUpperCase(groupFields.getGroupByFields())); groupFields.setAverageFields(toUpperCase(groupFields.getAverageFields())); groupFields.setCountFields(toUpperCase(groupFields.getCountFields())); groupFields.setMinFields(toUpperCase(groupFields.getMinFields())); diff --git a/warehouse/query-core/src/test/java/datawave/query/attributes/TemporalGranularityTest.java b/warehouse/query-core/src/test/java/datawave/query/attributes/TemporalGranularityTest.java new file mode 100644 index 00000000000..c00ecb301b8 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/attributes/TemporalGranularityTest.java @@ -0,0 +1,137 @@ +package datawave.query.attributes; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; + +import java.util.HashSet; +import java.util.Set; + +import org.junit.Test; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +public class TemporalGranularityTest { + + private static final ObjectMapper objectMapper = new ObjectMapper(); + + @Test + public void testAll() { + assertEquals("ALL", TemporalGranularity.ALL.getName()); + assertNull(TemporalGranularity.ALL.transform(null)); + assertEquals("nonNullValue", TemporalGranularity.ALL.transform("nonNullValue")); + } + + @Test + public void testTruncateTemporalToDay() { + assertEquals("DAY", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY.getName()); + assertNull(TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY.transform(null)); + assertEquals("nonDateValue", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY.transform("nonDateValue")); + assertEquals("2019-01-15", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY.transform("2019-01-15 12:30:15")); + } + + @Test + public void testTruncateTemporalToHour() { + assertEquals("HOUR", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR.getName()); + assertNull(TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR.transform(null)); + assertEquals("nonDateValue", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR.transform("nonDateValue")); + assertEquals("2019-01-15T12", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR.transform("2019-01-15 12:30:15")); + } + + @Test + public void testTruncateTemporalToMinute() { + assertEquals("MINUTE", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.getName()); + assertNull(TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform(null)); + assertEquals("nonDateValue", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform("nonDateValue")); + assertEquals("2019-01-15T12:30", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform("2019-01-15 12:30:15")); + } + + @Test + public void testTruncateTemporalToSecond() { + assertEquals("SECOND", TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND.getName()); + assertNull(TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND.transform(null)); + assertEquals("nonDateValue", TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND.transform("nonDateValue")); + assertEquals("2019-01-15T12:30:15", TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND.transform("2019-01-15 12:30:15")); + } + + @Test + public void testTruncateTemporalToMillisecond() { + assertEquals("MILLISECOND", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.getName()); + assertNull(TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.transform(null)); + assertEquals("nonDateValue", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.transform("nonDateValue")); + assertEquals("2022-11-03T12:30:00.976", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.transform("2022-11-03T12:30:00.976Z")); + } + + @Test + public void testTruncateTemporalToMonth() { + assertEquals("MONTH", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MONTH.getName()); + assertNull(TemporalGranularity.TRUNCATE_TEMPORAL_TO_MONTH.transform(null)); + assertEquals("nonDateValue", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MONTH.transform("nonDateValue")); + assertEquals("2019-01", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MONTH.transform("2019-01-15 12:30:15")); + } + + @Test + public void testMinuteTruncation() { + assertEquals("MINUTE", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.getName()); + assertNull(TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform(null)); + assertEquals("nonDateValue", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform("nonDateValue")); + assertEquals("2019-01-15T12:30", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform("2019-01-15 12:30:15")); + } + + @Test + public void testTenthMinuteTruncation() { + assertEquals("TENTH_OF_HOUR", TemporalGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR.getName()); + assertNull(TemporalGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR.transform(null)); + assertEquals("nonDateValue", TemporalGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR.transform("nonDateValue")); + assertEquals("2019-01-15T12:3", TemporalGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR.transform("2019-01-15 12:30:15")); + assertEquals("2019-01-15T03:1", TemporalGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR.transform("2019-01-15 3:10:15")); + } + + @Test + public void testNamesForUniqueness() { + Set names = new HashSet<>(); + for (TemporalGranularity transformer : TemporalGranularity.values()) { + assertFalse("Duplicate name found: " + transformer.getName(), names.contains(transformer.getName())); + names.add(transformer.getName()); + } + } + + @Test + public void testStaticOf() { + for (TemporalGranularity transformer : TemporalGranularity.values()) { + TemporalGranularity actual = TemporalGranularity.of(transformer.getName()); + assertEquals("Incorrect transformer " + actual + " returned for name " + transformer.getName(), transformer, actual); + } + } + + @Test + public void testSerialization() throws JsonProcessingException { + assertEquals("\"" + TemporalGranularity.ALL.getName() + "\"", objectMapper.writeValueAsString(TemporalGranularity.ALL)); + assertEquals("\"" + TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY.getName() + "\"", + objectMapper.writeValueAsString(TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY)); + assertEquals("\"" + TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR.getName() + "\"", + objectMapper.writeValueAsString(TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR)); + assertEquals("\"" + TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.getName() + "\"", + objectMapper.writeValueAsString(TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE)); + assertEquals("\"" + TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND.getName() + "\"", + objectMapper.writeValueAsString(TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND)); + assertEquals("\"" + TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.getName() + "\"", + objectMapper.writeValueAsString(TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND)); + } + + @Test + public void testDeserialization() throws JsonProcessingException { + assertEquals(TemporalGranularity.ALL, objectMapper.readValue("\"" + TemporalGranularity.ALL.getName() + "\"", TemporalGranularity.class)); + assertEquals(TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY, + objectMapper.readValue("\"" + TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY.getName() + "\"", TemporalGranularity.class)); + assertEquals(TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR, + objectMapper.readValue("\"" + TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR.getName() + "\"", TemporalGranularity.class)); + assertEquals(TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE, + objectMapper.readValue("\"" + TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.getName() + "\"", TemporalGranularity.class)); + assertEquals(TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND, + objectMapper.readValue("\"" + TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND.getName() + "\"", TemporalGranularity.class)); + assertEquals(TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND, + objectMapper.readValue("\"" + TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.getName() + "\"", TemporalGranularity.class)); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/attributes/UniqueFieldsTest.java b/warehouse/query-core/src/test/java/datawave/query/attributes/UniqueFieldsTest.java index 00850fba9a6..20859df4f03 100644 --- a/warehouse/query-core/src/test/java/datawave/query/attributes/UniqueFieldsTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/attributes/UniqueFieldsTest.java @@ -44,7 +44,7 @@ public void testIsEmptyForEmptyUniqueFields() { @Test public void testIsEmptyForNonEmptyUniqueFields() { UniqueFields uniqueFields = new UniqueFields(); - uniqueFields.put("fieldA", UniqueGranularity.ALL); + uniqueFields.put("fieldA", TemporalGranularity.ALL); assertFalse(uniqueFields.isEmpty()); } @@ -64,12 +64,12 @@ public void testEmptyUniqueFieldsToString() { @Test public void testNonEmptyUniqueFieldsToString() { UniqueFields uniqueFields = new UniqueFields(); - uniqueFields.put("fieldA", UniqueGranularity.ALL); - uniqueFields.put("fieldB", UniqueGranularity.ALL); - uniqueFields.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); - uniqueFields.put("fieldC", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - uniqueFields.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - uniqueFields.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + uniqueFields.put("fieldA", TemporalGranularity.ALL); + uniqueFields.put("fieldB", TemporalGranularity.ALL); + uniqueFields.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + uniqueFields.put("fieldC", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + uniqueFields.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + uniqueFields.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); assertEquals("FIELDA[ALL],FIELDB[ALL,DAY],FIELDC[HOUR],FIELDD[HOUR,MINUTE]", uniqueFields.toString()); } @@ -96,7 +96,7 @@ public void testParsingFromEmptyString() { @Test public void testParsingSingleFieldWithoutValueGranularity() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); + expected.put("fieldA", TemporalGranularity.ALL); UniqueFields actual = UniqueFields.from("fieldA"); assertEquals(expected, actual); @@ -105,9 +105,9 @@ public void testParsingSingleFieldWithoutValueGranularity() { @Test public void testParsingMultipleFieldsWithoutValueGranularities() { UniqueFields expected = new UniqueFields(); - expected.put("DEATH_DATE", UniqueGranularity.ALL); - expected.put("$MAGIC", UniqueGranularity.ALL); - expected.put("$BIRTH_DATE", UniqueGranularity.ALL); + expected.put("DEATH_DATE", TemporalGranularity.ALL); + expected.put("$MAGIC", TemporalGranularity.ALL); + expected.put("$BIRTH_DATE", TemporalGranularity.ALL); UniqueFields actual = UniqueFields.from("DEATH_DATE,$MAGIC,$BIRTH_DATE"); @@ -120,7 +120,7 @@ public void testParsingMultipleFieldsWithoutValueGranularities() { @Test public void testParsingSingleFieldWithValueGranularity() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldA", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); UniqueFields actual = UniqueFields.from("fieldA[HOUR]"); assertEquals(expected, actual); @@ -132,8 +132,8 @@ public void testParsingSingleFieldWithValueGranularity() { @Test public void testParsingFieldWithNoGranularityAtStartOfMixedFields() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); UniqueFields actual = UniqueFields.from("fieldA,fieldB[MINUTE]"); assertEquals(expected, actual); @@ -145,8 +145,8 @@ public void testParsingFieldWithNoGranularityAtStartOfMixedFields() { @Test public void testParsingFieldWithNoGranularityAtEndOfMixedFields() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); UniqueFields actual = UniqueFields.from("fieldB[MINUTE],fieldA"); assertEquals(expected, actual); @@ -158,9 +158,9 @@ public void testParsingFieldWithNoGranularityAtEndOfMixedFields() { @Test public void testParsingFieldWithNoGranularityInMiddleOfEndOfMixedFields() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); - expected.put("fieldC", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + expected.put("fieldC", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); UniqueFields actual = UniqueFields.from("fieldB[MINUTE],fieldA,fieldC[HOUR]"); assertEquals(expected, actual); @@ -172,7 +172,7 @@ public void testParsingFieldWithNoGranularityInMiddleOfEndOfMixedFields() { @Test public void testParsingSingleFieldWithEmptyGranularityList() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); + expected.put("fieldA", TemporalGranularity.ALL); UniqueFields actual = UniqueFields.from("fieldA[]"); assertEquals(expected, actual); @@ -184,8 +184,8 @@ public void testParsingSingleFieldWithEmptyGranularityList() { @Test public void testParsingFieldWithEmptyGranularityListAtStartOfMixedFields() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); UniqueFields actual = UniqueFields.from("fieldA[],fieldB[MINUTE]"); assertEquals(expected, actual); @@ -197,8 +197,8 @@ public void testParsingFieldWithEmptyGranularityListAtStartOfMixedFields() { @Test public void testParsingFieldWithEmptyGranularityListAtEndOfMixedFields() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); UniqueFields actual = UniqueFields.from("fieldB[MINUTE],fieldA[]"); assertEquals(expected, actual); @@ -210,9 +210,9 @@ public void testParsingFieldWithEmptyGranularityListAtEndOfMixedFields() { @Test public void testParsingFieldWithEmptyGranularityListInMiddleOfMixedFields() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); - expected.put("fieldC", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + expected.put("fieldC", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); UniqueFields actual = UniqueFields.from("fieldB[MINUTE],fieldA[],fieldC[HOUR]"); assertEquals(expected, actual); @@ -224,12 +224,12 @@ public void testParsingFieldWithEmptyGranularityListInMiddleOfMixedFields() { @Test public void testParsingMixedFieldsAndGranularities() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); - expected.put("fieldC", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + expected.put("fieldC", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); UniqueFields actual = UniqueFields.from("fieldA[ALL],fieldB[ALL,DAY],fieldC[HOUR],fieldD[HOUR,MINUTE]"); @@ -242,12 +242,12 @@ public void testParsingMixedFieldsAndGranularities() { @Test public void testParsingNonDeconstructedIdentifiers() { UniqueFields expected = new UniqueFields(); - expected.put("$fieldA", UniqueGranularity.ALL); - expected.put("$fieldB", UniqueGranularity.ALL); - expected.put("$fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); - expected.put("$fieldC", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("$fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("$fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + expected.put("$fieldA", TemporalGranularity.ALL); + expected.put("$fieldB", TemporalGranularity.ALL); + expected.put("$fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + expected.put("$fieldC", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("$fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("$fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); UniqueFields actual = UniqueFields.from("$fieldA[ALL],$fieldB[ALL,DAY],$fieldC[HOUR],$fieldD[HOUR,MINUTE]"); @@ -260,12 +260,12 @@ public void testParsingNonDeconstructedIdentifiers() { @Test public void testParsingWithWhitespace() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); - expected.put("fieldC", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + expected.put("fieldC", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); UniqueFields actual = UniqueFields.from("fieldA[ALL], fieldB[ALL, DAY], fieldC[HOUR],fieldD[HOUR, MINUTE]"); @@ -278,12 +278,12 @@ public void testParsingWithWhitespace() { @Test public void testParsingGranularitiesIsCaseInsensitive() { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); - expected.put("fieldC", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + expected.put("fieldC", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); UniqueFields actual = UniqueFields.from("fieldA[all], fieldB[ALL, day], fieldC[Hour],fieldD[HOUR, minute]"); @@ -297,8 +297,8 @@ public void testParsingGranularitiesIsCaseInsensitive() { public void testParsingConsecutiveCommas() { // Test consecutive commas at the start. UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); UniqueFields actual = UniqueFields.from(",,fieldA,fieldB[DAY]"); @@ -306,8 +306,8 @@ public void testParsingConsecutiveCommas() { // Test consecutive commas in the middle. expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); actual = UniqueFields.from("fieldA,,fieldB[DAY]"); @@ -315,8 +315,8 @@ public void testParsingConsecutiveCommas() { // Test consecutive commas at the end. expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); actual = UniqueFields.from("fieldA,fieldB[DAY],,"); @@ -326,7 +326,7 @@ public void testParsingConsecutiveCommas() { @Test public void testParsingInvalidGranularity() { Exception exception = assertThrows(IllegalArgumentException.class, () -> UniqueFields.from("fieldA[BAD]")); - assertEquals("Invalid unique granularity given: BAD", exception.getMessage()); + assertEquals("No TemporalGranularity exists with the name BAD", exception.getMessage()); } /** @@ -334,13 +334,13 @@ public void testParsingInvalidGranularity() { */ @Test public void testSerialization() throws JsonProcessingException { - SortedSetMultimap sortedFields = TreeMultimap.create(); - sortedFields.put("fieldA", UniqueGranularity.ALL); - sortedFields.put("fieldB", UniqueGranularity.ALL); - sortedFields.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); - sortedFields.put("fieldC", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - sortedFields.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - sortedFields.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + SortedSetMultimap sortedFields = TreeMultimap.create(); + sortedFields.put("fieldA", TemporalGranularity.ALL); + sortedFields.put("fieldB", TemporalGranularity.ALL); + sortedFields.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + sortedFields.put("fieldC", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + sortedFields.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + sortedFields.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); UniqueFields uniqueFields = new UniqueFields(sortedFields); @@ -354,12 +354,12 @@ public void testSerialization() throws JsonProcessingException { @Test public void testDeserialization() throws JsonProcessingException { UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.ALL); - expected.put("fieldB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); - expected.put("fieldC", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + expected.put("fieldA", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.ALL); + expected.put("fieldB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + expected.put("fieldC", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldD", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); String json = "\"fieldA[ALL],fieldB[ALL,DAY],fieldC[HOUR],fieldD[HOUR,MINUTE]\""; UniqueFields actual = objectMapper.readValue(json, UniqueFields.class); @@ -381,10 +381,10 @@ public void testValueTransformation() { expected.add("nonDateValue"); UniqueFields uniqueFields = new UniqueFields(); - uniqueFields.put("fieldA", UniqueGranularity.ALL); - uniqueFields.put("fieldA", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); - uniqueFields.put("fieldA", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - uniqueFields.put("fieldA", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + uniqueFields.put("fieldA", TemporalGranularity.ALL); + uniqueFields.put("fieldA", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + uniqueFields.put("fieldA", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + uniqueFields.put("fieldA", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); Set values = Sets.newHashSet("2020-01-12 15:30:45", "nonDateValue"); SortedSet actual = Sets.newTreeSet(uniqueFields.transformValues("FIELDA", values)); @@ -403,9 +403,9 @@ public void testDeconstructIdentifierFields() { expected.add("FIELDC"); UniqueFields uniqueFields = new UniqueFields(); - uniqueFields.put("$FIELDA", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - uniqueFields.put("$FIELDB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - uniqueFields.put("FIELDC", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + uniqueFields.put("$FIELDA", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + uniqueFields.put("$FIELDB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + uniqueFields.put("FIELDC", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); SortedSet actual = Sets.newTreeSet(uniqueFields.getFields()); @@ -418,7 +418,7 @@ public void testDeconstructIdentifierFields() { @Test public void testRemapFields() { UniqueFields actual = new UniqueFields(); - actual.put("fieldA", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + actual.put("fieldA", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); Multimap model = HashMultimap.create(); model.put("FIELDA", "FIELDB"); @@ -427,9 +427,9 @@ public void testRemapFields() { actual.remapFields(model); UniqueFields expected = new UniqueFields(); - expected.put("fieldA", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("FIELDB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - expected.put("fieldc", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldA", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("FIELDB", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + expected.put("fieldc", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); assertEquals(expected, actual); } diff --git a/warehouse/query-core/src/test/java/datawave/query/attributes/UniqueGranularityTest.java b/warehouse/query-core/src/test/java/datawave/query/attributes/UniqueGranularityTest.java deleted file mode 100644 index 29119f8118b..00000000000 --- a/warehouse/query-core/src/test/java/datawave/query/attributes/UniqueGranularityTest.java +++ /dev/null @@ -1,137 +0,0 @@ -package datawave.query.attributes; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; - -import java.util.HashSet; -import java.util.Set; - -import org.junit.Test; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -public class UniqueGranularityTest { - - private static final ObjectMapper objectMapper = new ObjectMapper(); - - @Test - public void testAll() { - assertEquals("ALL", UniqueGranularity.ALL.getName()); - assertNull(UniqueGranularity.ALL.transform(null)); - assertEquals("nonNullValue", UniqueGranularity.ALL.transform("nonNullValue")); - } - - @Test - public void testTruncateTemporalToDay() { - assertEquals("DAY", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY.getName()); - assertNull(UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY.transform(null)); - assertEquals("nonDateValue", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY.transform("nonDateValue")); - assertEquals("2019-01-15", UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY.transform("2019-01-15 12:30:15")); - } - - @Test - public void testTruncateTemporalToHour() { - assertEquals("HOUR", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR.getName()); - assertNull(UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR.transform(null)); - assertEquals("nonDateValue", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR.transform("nonDateValue")); - assertEquals("2019-01-15T12", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR.transform("2019-01-15 12:30:15")); - } - - @Test - public void testTruncateTemporalToMinute() { - assertEquals("MINUTE", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.getName()); - assertNull(UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform(null)); - assertEquals("nonDateValue", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform("nonDateValue")); - assertEquals("2019-01-15T12:30", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform("2019-01-15 12:30:15")); - } - - @Test - public void testTruncateTemporalToSecond() { - assertEquals("SECOND", UniqueGranularity.TRUNCATE_TEMPORAL_TO_SECOND.getName()); - assertNull(UniqueGranularity.TRUNCATE_TEMPORAL_TO_SECOND.transform(null)); - assertEquals("nonDateValue", UniqueGranularity.TRUNCATE_TEMPORAL_TO_SECOND.transform("nonDateValue")); - assertEquals("2019-01-15T12:30:15", UniqueGranularity.TRUNCATE_TEMPORAL_TO_SECOND.transform("2019-01-15 12:30:15")); - } - - @Test - public void testTruncateTemporalToMillisecond() { - assertEquals("MILLISECOND", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.getName()); - assertNull(UniqueGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.transform(null)); - assertEquals("nonDateValue", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.transform("nonDateValue")); - assertEquals("2022-11-03T12:30:00.976", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.transform("2022-11-03T12:30:00.976Z")); - } - - @Test - public void testTruncateTemporalToMonth() { - assertEquals("MONTH", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MONTH.getName()); - assertNull(UniqueGranularity.TRUNCATE_TEMPORAL_TO_MONTH.transform(null)); - assertEquals("nonDateValue", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MONTH.transform("nonDateValue")); - assertEquals("2019-01", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MONTH.transform("2019-01-15 12:30:15")); - } - - @Test - public void testMinuteTruncation() { - assertEquals("MINUTE", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.getName()); - assertNull(UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform(null)); - assertEquals("nonDateValue", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform("nonDateValue")); - assertEquals("2019-01-15T12:30", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.transform("2019-01-15 12:30:15")); - } - - @Test - public void testTenthMinuteTruncation() { - assertEquals("TENTH_OF_HOUR", UniqueGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR.getName()); - assertNull(UniqueGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR.transform(null)); - assertEquals("nonDateValue", UniqueGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR.transform("nonDateValue")); - assertEquals("2019-01-15T12:3", UniqueGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR.transform("2019-01-15 12:30:15")); - assertEquals("2019-01-15T03:1", UniqueGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR.transform("2019-01-15 3:10:15")); - } - - @Test - public void testNamesForUniqueness() { - Set names = new HashSet<>(); - for (UniqueGranularity transformer : UniqueGranularity.values()) { - assertFalse("Duplicate name found: " + transformer.getName(), names.contains(transformer.getName())); - names.add(transformer.getName()); - } - } - - @Test - public void testStaticOf() { - for (UniqueGranularity transformer : UniqueGranularity.values()) { - UniqueGranularity actual = UniqueGranularity.of(transformer.getName()); - assertEquals("Incorrect transformer " + actual + " returned for name " + transformer.getName(), transformer, actual); - } - } - - @Test - public void testSerialization() throws JsonProcessingException { - assertEquals("\"" + UniqueGranularity.ALL.getName() + "\"", objectMapper.writeValueAsString(UniqueGranularity.ALL)); - assertEquals("\"" + UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY.getName() + "\"", - objectMapper.writeValueAsString(UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY)); - assertEquals("\"" + UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR.getName() + "\"", - objectMapper.writeValueAsString(UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR)); - assertEquals("\"" + UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.getName() + "\"", - objectMapper.writeValueAsString(UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE)); - assertEquals("\"" + UniqueGranularity.TRUNCATE_TEMPORAL_TO_SECOND.getName() + "\"", - objectMapper.writeValueAsString(UniqueGranularity.TRUNCATE_TEMPORAL_TO_SECOND)); - assertEquals("\"" + UniqueGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.getName() + "\"", - objectMapper.writeValueAsString(UniqueGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND)); - } - - @Test - public void testDeserialization() throws JsonProcessingException { - assertEquals(UniqueGranularity.ALL, objectMapper.readValue("\"" + UniqueGranularity.ALL.getName() + "\"", UniqueGranularity.class)); - assertEquals(UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY, - objectMapper.readValue("\"" + UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY.getName() + "\"", UniqueGranularity.class)); - assertEquals(UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR, - objectMapper.readValue("\"" + UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR.getName() + "\"", UniqueGranularity.class)); - assertEquals(UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE, - objectMapper.readValue("\"" + UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE.getName() + "\"", UniqueGranularity.class)); - assertEquals(UniqueGranularity.TRUNCATE_TEMPORAL_TO_SECOND, - objectMapper.readValue("\"" + UniqueGranularity.TRUNCATE_TEMPORAL_TO_SECOND.getName() + "\"", UniqueGranularity.class)); - assertEquals(UniqueGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND, - objectMapper.readValue("\"" + UniqueGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND.getName() + "\"", UniqueGranularity.class)); - } -} diff --git a/warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java b/warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java index 93bba6524fd..b50a43f20ac 100644 --- a/warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java @@ -1,12 +1,14 @@ package datawave.query.common.grouping; import java.math.BigDecimal; +import java.text.SimpleDateFormat; import java.util.AbstractMap; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.security.ColumnVisibility; @@ -18,12 +20,14 @@ import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; +import datawave.data.type.DateType; import datawave.data.type.LcNoDiacriticsType; import datawave.data.type.NumberType; import datawave.data.type.Type; import datawave.query.attributes.Attribute; import datawave.query.attributes.Attributes; import datawave.query.attributes.Document; +import datawave.query.attributes.TemporalGranularity; import datawave.query.attributes.TypeAttribute; import datawave.test.GroupsAssert; @@ -50,6 +54,8 @@ public static void beforeClass() { inverseReverseMap.put("LOC", "BUILDING"); inverseReverseMap.put("LOC", "LOCATION"); inverseReverseMap.put("PEAK", "HEIGHT"); + inverseReverseMap.put("END", "EXPIRATION_DATE"); + inverseReverseMap.put("START", "CREATION_DATE"); reverseMap.put("GENERE", "GEN"); reverseMap.put("GENDER", "GEN"); @@ -58,6 +64,8 @@ public static void beforeClass() { reverseMap.put("BUILDING", "LOC"); reverseMap.put("LOCATION", "LOC"); reverseMap.put("HEIGHT", "PEAK"); + reverseMap.put("EXPIRATION_DATE", "END"); + reverseMap.put("CREATION_DATE", "START"); } @Before @@ -1066,8 +1074,218 @@ public void testDedupingEquivalentEntriesWithModelMapping() { // @formatter:on } + /** + * Verify that a grouping operation where specifying to truncate a date field to the day results in the expected groupings. + */ + @Test + public void testGroupingWithTruncateToDay() { + Multimap groupByFieldMap = HashMultimap.create(); + groupByFieldMap.put("GENDER", TemporalGranularity.ALL); + groupByFieldMap.put("EXPIRATION_DATE", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + givenGroupFields(groupByFieldMap); + + // 2025-02-04T10:04:46.0000912 + // 2025-01-05T12:15:30:5585555 + + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.FOO.A.B.C.1").withDateType("2025-01-05T12:15:30.558Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.FOO.A.B.2").withDateType("2025-01-05T20:13:30.343Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.FOO.C.3").withDateType("2025-01-05T12:15:30.534Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.B.C.1").withDateType("2025-12-12T04:15:30.4545585")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.V.A.2").withDateType("2025-01-12T12:05:30.6545585")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.V.A.3").withDateType("2025-12-12T12:22:30.6555474")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.V.A.4").withDateType("2025-12-12T11:11:22.6455733")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.HAT.V.B.1").withDateType("2025-01-05T23:15:30.6534070")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.HAT.V.B.2").withDateType("2025-01-05T16:01:15.8755555")); + + // Direct match to EXPIRATION_DATE.FOO.1. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.A.C.1").withLcNoDiacritics("MALE")); + // Direct match to EXPIRATION_DATE.FOO.2 + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.V.S.2").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.FOO.3 + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.F.3").withLcNoDiacritics("FEMALE")); + // No direct match with an AGE record, should be ignored since we have a direct match for a GENDER entry elsewhere. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.F.G.4").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.BAR.1. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.V.C.A.1").withLcNoDiacritics("MALE")); + // Direct match to EXPIRATION_DATE.BAR.2. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.G.S.2").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.BAR.3. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.G.S.3").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.BAR.4. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.G.S.4").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.HAT.1. + givenDocumentEntry(DocumentEntry.of("GENDER.HAT.G.S.1").withLcNoDiacritics("MALE")); + // Direct match to EXPIRATION_DATE.HAT.2. + givenDocumentEntry(DocumentEntry.of("GENDER.HAT.G.S.2").withLcNoDiacritics("MALE")); + + executeGrouping(); + + // We should end up with the following groupings: + // 2025-01-05-MALE (Count of 3) + // 2025-01-05-FEMALE (Count of 2) + // 2025-01-12-FEMALE (Count of 1) + // 2025-12-12-MALE (Count of 1) + // 2025-12-12-FEMALE (Count of 2) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(5); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), dateKey("EXPIRATION_DATE", "2025-01-05T12:15:30.558Z", "2025-01-05")).hasCount(3); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-01-05T20:13:30.343Z", "2025-01-05")).hasCount(2); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-01-12T12:05:30.654Z", "2025-01-12")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), dateKey("EXPIRATION_DATE", "2025-12-12T04:15:30.454Z", "2025-12-12")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-12-12T12:22:30.655Z", "2025-12-12")).hasCount(2); + } + + /** + * Verify that a grouping operation where specifying multiple granularities for the same date field results in the expected groupings. + */ + @Test + public void testGroupingWithTruncateToDayAndYear() { + Multimap groupByFieldMap = HashMultimap.create(); + groupByFieldMap.put("GENDER", TemporalGranularity.ALL); + groupByFieldMap.putAll("EXPIRATION_DATE", Set.of(TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY, TemporalGranularity.TRUNCATE_TEMPORAL_TO_YEAR)); + givenGroupFields(groupByFieldMap); + + // 2025-02-04T10:04:46.0000912 + // 2025-01-05T12:15:30:5585555 + + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.FOO.A.B.C.1").withDateType("2025-01-05T12:15:30.558Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.FOO.A.B.2").withDateType("2025-01-05T20:13:30.343Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.FOO.C.3").withDateType("2025-01-05T12:15:30.534Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.B.C.1").withDateType("2025-12-12T04:15:30.454Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.V.A.2").withDateType("2025-01-12T12:05:30.654Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.V.A.3").withDateType("2025-12-12T12:22:30.655Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.V.A.4").withDateType("2025-12-12T11:11:22.645Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.HAT.V.B.1").withDateType("2025-01-05T23:15:30.653Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.HAT.V.B.2").withDateType("2025-01-05T16:01:15.875Z")); + + // Direct match to EXPIRATION_DATE.FOO.1. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.A.C.1").withLcNoDiacritics("MALE")); + // Direct match to EXPIRATION_DATE.FOO.2 + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.V.S.2").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.FOO.3 + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.F.3").withLcNoDiacritics("FEMALE")); + // No direct match with an AGE record, should be ignored since we have a direct match for a GENDER entry elsewhere. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.F.G.4").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.BAR.1. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.V.C.A.1").withLcNoDiacritics("MALE")); + // Direct match to EXPIRATION_DATE.BAR.2. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.G.S.2").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.BAR.3. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.G.S.3").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.BAR.4. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.G.S.4").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.HAT.1. + givenDocumentEntry(DocumentEntry.of("GENDER.HAT.G.S.1").withLcNoDiacritics("MALE")); + // Direct match to EXPIRATION_DATE.HAT.2. + givenDocumentEntry(DocumentEntry.of("GENDER.HAT.G.S.2").withLcNoDiacritics("MALE")); + + executeGrouping(); + + // We should end up with the following groupings: + // 2025-01-05-MALE (Count of 3) + // 2025-01-05-FEMALE (Count of 2) + // 2025-01-12-FEMALE (Count of 1) + // 2025-12-12-MALE (Count of 1) + // 2025-12-12-FEMALE (Count of 2) + // 2025-MALE (Count of 4) + // 2025-FEMALE (Count of 5) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(7); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), dateKey("EXPIRATION_DATE", "2025-01-05T12:15:30.558Z", "2025-01-05")).hasCount(3); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-01-05T20:13:30.343Z", "2025-01-05")).hasCount(2); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-01-12T12:05:30.654Z", "2025-01-12")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), dateKey("EXPIRATION_DATE", "2025-12-12T04:15:30.454Z", "2025-12-12")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-12-12T12:22:30.655Z", "2025-12-12")).hasCount(2); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), dateKey("EXPIRATION_DATE", "2025-01-05T12:15:30.558Z", "2025")).hasCount(4); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-01-05T20:13:30.343Z", "2025")).hasCount(5); + } + + /** + * Verify that a grouping operation where specifying granularities for multiple date fields results in the expected groupings. + */ + @Test + public void testGroupingWithTruncateMultipleFields() { + Multimap groupByFieldMap = HashMultimap.create(); + groupByFieldMap.put("GENDER", TemporalGranularity.ALL); + groupByFieldMap.putAll("EXPIRATION_DATE", Set.of(TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY, TemporalGranularity.TRUNCATE_TEMPORAL_TO_YEAR)); + groupByFieldMap.putAll("CREATION_DATE", Set.of(TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR)); + givenGroupFields(groupByFieldMap); + + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.FOO.A.B.C.1").withDateType("2025-01-05T12:15:30.558Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.FOO.A.B.2").withDateType("2025-01-05T20:13:30.343Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.FOO.C.3").withDateType("2025-01-05T12:15:30.534Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.B.C.1").withDateType("2025-12-12T04:15:30.454Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.V.A.2").withDateType("2025-01-12T12:05:30.654Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.V.A.3").withDateType("2025-12-12T12:22:30.655Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.BAR.V.A.4").withDateType("2025-12-12T11:11:22.645Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.HAT.V.B.1").withDateType("2025-01-05T23:15:30.653Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.HAT.V.B.2").withDateType("2025-01-05T16:01:15.875Z")); + givenDocumentEntry(DocumentEntry.of("EXPIRATION_DATE.OTHER.V.B.1").withDateType("2025-01-05T16:01:15.875Z")); // No match to any other entry. + + givenDocumentEntry(DocumentEntry.of("CREATION_DATE.FOO.A.B.C.1").withDateType("2025-01-05T10:15:22.655Z")); + givenDocumentEntry(DocumentEntry.of("CREATION_DATE.FOO.A.B.2").withDateType("2025-01-05T22:15:15.653Z")); + givenDocumentEntry(DocumentEntry.of("CREATION_DATE.FOO.C.3").withDateType("2025-01-05T10:15:01.558Z")); + givenDocumentEntry(DocumentEntry.of("CREATION_DATE.BAR.B.C.1").withDateType("2025-01-05T10:15:30.655Z")); + givenDocumentEntry(DocumentEntry.of("CREATION_DATE.BAR.V.A.2").withDateType("2025-01-05T10:15:22.454Z")); + givenDocumentEntry(DocumentEntry.of("CREATION_DATE.BRAIN.V.A.1").withDateType("2025-01-05T10:15:22.454Z")); // No match to any other entry. + + // Direct match to EXPIRATION_DATE.FOO.1 and CREATION_DATE.FOO.1. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.A.C.1").withLcNoDiacritics("MALE")); + // Direct match to EXPIRATION_DATE.FOO.2 and CREATION_DATE.FOO.2. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.V.S.2").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.FOO.3 and CREATION_DATE.FOO.3. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.F.3").withLcNoDiacritics("FEMALE")); + // No direct match with an AGE record, should be ignored since we have a direct match for a GENDER entry elsewhere. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.F.G.4").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.BAR.1 and CREATION_DATE.BAR.1. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.V.C.A.1").withLcNoDiacritics("MALE")); + // Direct match to EXPIRATION_DATE.BAR.2 and CREATION_DATE.BAR.2. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.G.S.2").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.BAR.3. No match to CREATION_DATE. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.G.S.3").withLcNoDiacritics("FEMALE")); + // Direct match to EXPIRATION_DATE.BAR.4. No match to CREATION_DATE. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.G.S.4").withLcNoDiacritics("FEMALE")); + + executeGrouping(); + + // We should end up with the following groupings: [EXPIRATION_DATE, GENDER, CREATION_DATE] + // 2025-01-05, FEMALE, 2025-01-05T10 (Count of 1) + // 2025-01-05, MALE, 2025-01-05T10 (Count of 1) + // 2025-01-05, FEMALE, 2025-01-05T22 (Count of 1) + // 2025-01-12, FEMALE, 2025-01-05T10 (Count of 1) + // 2025-12-12, MALE, 2025-01-05T10 (Count of 1) + // 2025, FEMALE, 2025-01-05T22 (Count of 1) + // 2025, MALE, 2025-01-05T10 (Count of 2) + // 2025, FEMALE, 2025-01-05T10 (Count of 2) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + // Groupings for GENDER[ALL], EXPIRATION_DATE[DAY], CREATION_DATE[HOUR] + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-01-05T12:15:30.534Z", "2025-01-05"), + dateKey("CREATION_DATE", "2025-01-05T10:15:01.558Z", "2025-01-05T10")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), dateKey("EXPIRATION_DATE", "2025-01-05T12:15:30.558Z", "2025-01-05"), + dateKey("CREATION_DATE", "2025-01-05T10:15:22.655Z", "2025-01-05T10")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-01-05T20:13:30.343Z", "2025-01-05"), + dateKey("CREATION_DATE", "2025-01-05T22:15:15.653Z", "2025-01-05T22")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-01-12T12:05:30.654Z", "2025-01-12"), + dateKey("CREATION_DATE", "2025-01-05T10:15:22.454Z", "2025-01-05T10")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), dateKey("EXPIRATION_DATE", "2025-12-12T04:15:30.454Z", "2025-12-12"), + dateKey("CREATION_DATE", "2025-01-05T10:15:30.655Z", "2025-01-05T10")).hasCount(1); + // Groupings for GENDER[ALL], EXPIRATION_DATE[YEAR], CREATION_DATE[HOUR] + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-01-05T20:13:30.343Z", "2025"), + dateKey("CREATION_DATE", "2025-01-05T22:15:15.653Z", "2025-01-05T22")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), dateKey("EXPIRATION_DATE", "2025-12-12T04:15:30.454Z", "2025"), + dateKey("CREATION_DATE", "2025-01-05T10:15:30.655Z", "2025-01-05T10")).hasCount(2); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), dateKey("EXPIRATION_DATE", "2025-01-12T12:05:30.654Z", "2025"), + dateKey("CREATION_DATE", "2025-01-05T10:15:22.454Z", "2025-01-05T10")).hasCount(2); + } + private void givenGroupFields(String... fields) { - groupFields.setGroupByFields(Sets.newHashSet(Arrays.asList(fields))); + Multimap groupByFieldMap = HashMultimap.create(); + Arrays.asList(fields).forEach((field) -> groupByFieldMap.put(field, TemporalGranularity.ALL)); + groupFields.setGroupByFieldMap(groupByFieldMap); + } + + private void givenGroupFields(Multimap map) { + groupFields.setGroupByFieldMap(map); } private void givenSumFields(String... fields) { @@ -1112,15 +1330,19 @@ private void executeGrouping() { } private GroupingAttribute numericKey(String key, String value) { - return createGroupingAttribute(key, new NumberType(value)); + return createGroupingAttribute(key, new NumberType(value), null); } private GroupingAttribute textKey(String key, String value) { - return createGroupingAttribute(key, new LcNoDiacriticsType(value)); + return createGroupingAttribute(key, new LcNoDiacriticsType(value), null); + } + + private GroupingAttribute dateKey(String key, String value, String overridingValue) { + return createGroupingAttribute(key, new DateType(value), overridingValue); } - private GroupingAttribute createGroupingAttribute(String key, Type type) { - return new GroupingAttribute<>(type, new Key(key), true); + private GroupingAttribute createGroupingAttribute(String key, Type type, String comparingTypeValue) { + return new GroupingAttribute<>(type, new Key(key), true, comparingTypeValue); } private static class DocumentEntry { @@ -1153,6 +1375,11 @@ public DocumentEntry withLcNoDiacritics(String value, ColumnVisibility visibilit return this; } + public DocumentEntry withDateType(String value) { + addTypedAttribute(new DateType(value), COLVIS_ALL); + return this; + } + private void addTypedAttribute(Type type, ColumnVisibility visibility) { TypeAttribute attribute = new TypeAttribute<>(type, new Key("cf", "cq"), true); attribute.setColumnVisibility(visibility); diff --git a/warehouse/query-core/src/test/java/datawave/query/common/grouping/GroupFieldsTest.java b/warehouse/query-core/src/test/java/datawave/query/common/grouping/GroupFieldsTest.java index e6accddacc2..6519dfa26b0 100644 --- a/warehouse/query-core/src/test/java/datawave/query/common/grouping/GroupFieldsTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/common/grouping/GroupFieldsTest.java @@ -14,6 +14,8 @@ import com.google.common.collect.Multimap; import com.google.common.collect.Sets; +import datawave.query.attributes.TemporalGranularity; + public class GroupFieldsTest { private static final ObjectMapper objectMapper = new ObjectMapper(); @@ -44,20 +46,26 @@ public void testEmptyGroupFieldsToString() { @Test public void testGroupFieldsToString() { GroupFields groupFields = new GroupFields(); - groupFields.setGroupByFields(Sets.newHashSet("A", "1")); + Multimap map = HashMultimap.create(); + map.put("A", TemporalGranularity.ALL); + map.put("1", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + groupFields.setGroupByFieldMap(map); groupFields.setSumFields(Sets.newHashSet("B", "2")); groupFields.setCountFields(Sets.newHashSet("C", "3")); groupFields.setAverageFields(Sets.newHashSet("D", "4")); groupFields.setMinFields(Sets.newHashSet("E", "5")); groupFields.setMaxFields(Sets.newHashSet("F", "6")); - assertThat(groupFields.toString()).isEqualTo("GROUP(A,1)|SUM(B,2)|COUNT(C,3)|AVERAGE(D,4)|MIN(E,5)|MAX(F,6)"); + assertThat(groupFields.toString()).isEqualTo("GROUP(1[DAY],A[ALL])|SUM(B,2)|COUNT(C,3)|AVERAGE(D,4)|MIN(E,5)|MAX(F,6)"); } @Test public void testRemappedGroupFieldsToString() { GroupFields groupFields = new GroupFields(); - groupFields.setGroupByFields(Sets.newHashSet("AG", "GEN")); + Multimap map = HashMultimap.create(); + map.put("AG", TemporalGranularity.ALL); + map.put("GEN", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + groupFields.setGroupByFieldMap(map); groupFields.setSumFields(Sets.newHashSet("AG")); groupFields.setCountFields(Sets.newHashSet("NOME")); groupFields.setAverageFields(Sets.newHashSet("AG")); @@ -67,7 +75,7 @@ public void testRemappedGroupFieldsToString() { groupFields.remapFields(inverseReverseModel, reverseModel); assertThat(groupFields.toString()).isEqualTo( - "GROUP(GEN,AG)|SUM(AG)|COUNT(NOME)|AVERAGE(AG)|MIN(GEN)|MAX(NOME)|REVERSE_MODEL_MAP(GENERE=GEN:GENDER=GEN:AGE=AG:NAME=NOME)"); + "GROUP(AG[ALL],GEN[DAY])|SUM(AG)|COUNT(NOME)|AVERAGE(AG)|MIN(GEN)|MAX(NOME)|REVERSE_MODEL_MAP(GENERE=GEN:GENDER=GEN:AGE=AG:NAME=NOME)"); } @Test @@ -88,7 +96,10 @@ public void testParsingFromWhitespace() { @Test public void testParsingGroupFieldsWithGroupByFieldsOnly() { GroupFields expected = new GroupFields(); - expected.setGroupByFields(Sets.newHashSet("AGE", "GENDER")); + Multimap map = HashMultimap.create(); + map.put("AGE", TemporalGranularity.ALL); + map.put("GENDER", TemporalGranularity.ALL); + expected.setGroupByFieldMap(map); GroupFields actual = GroupFields.from("GROUP(AGE,GENDER)"); @@ -98,7 +109,10 @@ public void testParsingGroupFieldsWithGroupByFieldsOnly() { @Test public void testParsingGroupFieldsWithSomeAggregationFields() { GroupFields expected = new GroupFields(); - expected.setGroupByFields(Sets.newHashSet("AGE", "GENDER")); + Multimap map = HashMultimap.create(); + map.put("AGE", TemporalGranularity.ALL); + map.put("GENDER", TemporalGranularity.ALL); + expected.setGroupByFieldMap(map); expected.setSumFields(Sets.newHashSet("AGE")); expected.setMaxFields(Sets.newHashSet("NAME")); @@ -110,14 +124,17 @@ public void testParsingGroupFieldsWithSomeAggregationFields() { @Test public void testParsingGroupFieldsWithAllAggregationFields() { GroupFields expected = new GroupFields(); - expected.setGroupByFields(Sets.newHashSet("AGE", "GENDER")); + Multimap map = HashMultimap.create(); + map.put("AGE", TemporalGranularity.ALL); + map.put("GENDER", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + expected.setGroupByFieldMap(map); expected.setSumFields(Sets.newHashSet("BAT")); expected.setCountFields(Sets.newHashSet("FOO")); expected.setAverageFields(Sets.newHashSet("BAR")); expected.setMinFields(Sets.newHashSet("HAT")); expected.setMaxFields(Sets.newHashSet("BAH")); - GroupFields actual = GroupFields.from("GROUP(AGE,GENDER)|SUM(BAT)|COUNT(FOO)|AVERAGE(BAR)|MIN(HAT)|MAX(BAH)"); + GroupFields actual = GroupFields.from("GROUP(AGE,GENDER[DAY])|SUM(BAT)|COUNT(FOO)|AVERAGE(BAR)|MIN(HAT)|MAX(BAH)"); assertThat(actual).isEqualTo(expected); } @@ -125,7 +142,9 @@ public void testParsingGroupFieldsWithAllAggregationFields() { @Test public void testParsingRemappedGroupFields() { GroupFields expected = new GroupFields(); - expected.setGroupByFields(Sets.newHashSet("AG")); + Multimap map = HashMultimap.create(); + map.put("AG", TemporalGranularity.ALL); + expected.setGroupByFieldMap(map); expected.setSumFields(Sets.newHashSet("AG")); expected.setCountFields(Sets.newHashSet("NOME")); expected.setAverageFields(Sets.newHashSet("BAR")); @@ -141,7 +160,11 @@ public void testParsingRemappedGroupFields() { @Test public void testParsingLegacyFormat() { GroupFields expected = new GroupFields(); - expected.setGroupByFields(Sets.newHashSet("AGE", "GENDER", "NAME")); + Multimap map = HashMultimap.create(); + map.put("AGE", TemporalGranularity.ALL); + map.put("GENDER", TemporalGranularity.ALL); + map.put("NAME", TemporalGranularity.ALL); + expected.setGroupByFieldMap(map); GroupFields actual = GroupFields.from("AGE,GENDER,NAME"); @@ -151,7 +174,10 @@ public void testParsingLegacyFormat() { @Test public void testDeconstructIdentifiers() { GroupFields groupFields = new GroupFields(); - groupFields.setGroupByFields(Sets.newHashSet("$AGE", "$GENDER")); + Multimap map = HashMultimap.create(); + map.put("$AGE", TemporalGranularity.ALL); + map.put("$GENDER", TemporalGranularity.ALL); + groupFields.setGroupByFieldMap(map); groupFields.setSumFields(Sets.newHashSet("$AGE", "$GENDER")); groupFields.setCountFields(Sets.newHashSet("$AGE", "$GENDER")); groupFields.setAverageFields(Sets.newHashSet("$AGE", "$GENDER")); @@ -171,7 +197,10 @@ public void testDeconstructIdentifiers() { @Test public void testRemapFields() { GroupFields groupFields = new GroupFields(); - groupFields.setGroupByFields(Sets.newHashSet("AG", "GEN")); + Multimap map = HashMultimap.create(); + map.put("AG", TemporalGranularity.ALL); + map.put("GEN", TemporalGranularity.ALL); + groupFields.setGroupByFieldMap(map); groupFields.setSumFields(Sets.newHashSet("AG")); groupFields.setCountFields(Sets.newHashSet("NOME")); groupFields.setAverageFields(Sets.newHashSet("AG")); @@ -193,7 +222,10 @@ public void testRemapFields() { @Test public void testSerialization() throws JsonProcessingException { GroupFields groupFields = new GroupFields(); - groupFields.setGroupByFields(Sets.newHashSet("AG", "GEN")); + Multimap map = HashMultimap.create(); + map.put("AG", TemporalGranularity.ALL); + map.put("GEN", TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); + groupFields.setGroupByFieldMap(map); groupFields.setSumFields(Sets.newHashSet("AG")); groupFields.setCountFields(Sets.newHashSet("NOME")); groupFields.setAverageFields(Sets.newHashSet("AG")); @@ -204,13 +236,16 @@ public void testSerialization() throws JsonProcessingException { String json = objectMapper.writeValueAsString(groupFields); assertThat(json).isEqualTo( - "\"GROUP(GEN,AG)|SUM(AG)|COUNT(NOME)|AVERAGE(AG)|MIN(GEN)|MAX(NOME)|REVERSE_MODEL_MAP(GENERE=GEN:GENDER=GEN:AGE=AG:NAME=NOME)\""); + "\"GROUP(AG[ALL],GEN[DAY])|SUM(AG)|COUNT(NOME)|AVERAGE(AG)|MIN(GEN)|MAX(NOME)|REVERSE_MODEL_MAP(GENERE=GEN:GENDER=GEN:AGE=AG:NAME=NOME)\""); } @Test public void testDeserialization() throws JsonProcessingException { GroupFields expected = new GroupFields(); - expected.setGroupByFields(Sets.newHashSet("AG", "GEN")); + Multimap map = HashMultimap.create(); + map.put("AG", TemporalGranularity.ALL); + map.put("GEN", TemporalGranularity.ALL); + expected.setGroupByFieldMap(map); expected.setSumFields(Sets.newHashSet("AG")); expected.setCountFields(Sets.newHashSet("NOME")); expected.setAverageFields(Sets.newHashSet("AG")); @@ -218,7 +253,7 @@ public void testDeserialization() throws JsonProcessingException { expected.setMaxFields(Sets.newHashSet("NOME")); expected.remapFields(inverseReverseModel, reverseModel); - String json = "\"GROUP(GEN,AG)|SUM(AG)|COUNT(NOME)|AVERAGE(AG)|MIN(GEN)|MAX(NOME)|REVERSE_MODEL_MAP(GENERE=GEN:GENDER=GEN:AGE=AG:NAME=NOME)\""; + String json = "\"GROUP(GEN[ALL],AG[ALL])|SUM(AG)|COUNT(NOME)|AVERAGE(AG)|MIN(GEN)|MAX(NOME)|REVERSE_MODEL_MAP(GENERE=GEN:GENDER=GEN:AGE=AG:NAME=NOME)\""; GroupFields actual = objectMapper.readValue(json, GroupFields.class); assertThat(actual).isEqualTo(expected); @@ -227,7 +262,10 @@ public void testDeserialization() throws JsonProcessingException { @Test public void testGetFieldAggregatorFactory() { GroupFields groupFields = new GroupFields(); - groupFields.setGroupByFields(Sets.newHashSet("AGE", "GENDER")); + Multimap map = HashMultimap.create(); + map.put("AGE", TemporalGranularity.ALL); + map.put("GENDER", TemporalGranularity.ALL); + groupFields.setGroupByFieldMap(map); groupFields.setSumFields(Sets.newHashSet("AGE")); groupFields.setCountFields(Sets.newHashSet("NAME")); groupFields.setAverageFields(Sets.newHashSet("HEIGHT")); diff --git a/warehouse/query-core/src/test/java/datawave/query/common/grouping/GroupingAttributeTest.java b/warehouse/query-core/src/test/java/datawave/query/common/grouping/GroupingAttributeTest.java new file mode 100644 index 00000000000..d54e813af7b --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/common/grouping/GroupingAttributeTest.java @@ -0,0 +1,79 @@ +package datawave.query.common.grouping; + +import java.math.BigDecimal; + +import org.apache.accumulo.core.data.Key; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import datawave.data.type.LcNoDiacriticsListType; +import datawave.data.type.NumberType; + +@SuppressWarnings({"unchecked", "rawtypes"}) +class GroupingAttributeTest { + + @Test + void testEqualsWithSameRowAndValue() { + GroupingAttribute attr1 = new GroupingAttribute<>(new NumberType("123"), new Key("FOO.1"), true); + GroupingAttribute attr2 = new GroupingAttribute<>(new NumberType("123"), new Key("FOO.1"), true); + + Assertions.assertEquals(attr1, attr2); + } + + @Test + void testEqualsWithSameRowAndDifferentValue() { + GroupingAttribute attr1 = new GroupingAttribute<>(new NumberType("123"), new Key("FOO.1"), true); + GroupingAttribute attr2 = new GroupingAttribute<>(new NumberType("456"), new Key("FOO.1"), true); + + Assertions.assertNotEquals(attr1, attr2); + } + + @Test + void testEqualsWithDifferentRowAndSameValue() { + GroupingAttribute attr1 = new GroupingAttribute<>(new NumberType("123"), new Key("FOO.1"), true); + GroupingAttribute attr2 = new GroupingAttribute<>(new NumberType("123"), new Key("BAR.1"), true); + + Assertions.assertNotEquals(attr1, attr2); + } + + @Test + void testEqualsWithSameRowSameTypeSameOverrideValue() { + GroupingAttribute attr1 = new GroupingAttribute<>(new NumberType("12300000"), new Key("FOO.1"), true, "123"); + GroupingAttribute attr2 = new GroupingAttribute<>(new NumberType("12355555"), new Key("FOO.1"), true, "123"); + + Assertions.assertEquals(attr1, attr2); + } + + @Test + void testEqualsWithSameRowDifferentTypeSameOverrideValue() { + GroupingAttribute attr1 = new GroupingAttribute<>(new NumberType("12300000"), new Key("FOO.1"), true, "123"); + GroupingAttribute attr2 = new GroupingAttribute<>(new LcNoDiacriticsListType("12355555"), new Key("FOO.1"), true, "123"); + + Assertions.assertNotEquals(attr1, attr2); + } + + @Test + void testEqualsWithDifferentRowSameTypeSameOverrideValue() { + GroupingAttribute attr1 = new GroupingAttribute<>(new NumberType("12300000"), new Key("FOO.1"), true, "123"); + GroupingAttribute attr2 = new GroupingAttribute<>(new NumberType("12355555"), new Key("BAR.1"), true, "123"); + + Assertions.assertNotEquals(attr1, attr2); + } + + @Test + void testEqualsWithSameRowSameTypeDifferentOverrideValue() { + GroupingAttribute attr1 = new GroupingAttribute<>(new NumberType("12300000"), new Key("FOO.1"), true, "123"); + GroupingAttribute attr2 = new GroupingAttribute<>(new NumberType("12355555"), new Key("FOO.1"), true, "12"); + + Assertions.assertNotEquals(attr1, attr2); + } + + @Test + void testHashCodeIgnoresRowAndType() { + GroupingAttribute attr1 = new GroupingAttribute<>(new NumberType("123"), new Key("FOO.1"), true); + GroupingAttribute attr2 = new GroupingAttribute<>(new LcNoDiacriticsListType("123"), new Key("BAR.1"), true); + + Assertions.assertEquals(attr1.hashCode(), attr2.hashCode()); + + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java index 6a0e4bf1950..3087ed1d06f 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java @@ -50,8 +50,33 @@ public void testGroupByFunction() throws ParseException { assertResult("f:groupby()", ""); assertOption(QueryParameters.GROUP_FIELDS, ""); + // Verify that fields of no specified granularity are added with the default ALL granularity. assertResult("f:groupby('field1','field2','field3')", ""); - assertOption(QueryParameters.GROUP_FIELDS, "field1,field2,field3"); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL],FIELD2[ALL],FIELD3[ALL])"); + + // Verify that fields with DAY granularity are added as such. + assertResult("f:groupby('field1[DAY]','field2[DAY]','field3[DAY]')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[DAY],FIELD2[DAY],FIELD3[DAY])"); + + // Verify that fields with HOUR granularity are added as such. + assertResult("f:groupby('field1[HOUR]','field2[HOUR]','field3[HOUR]')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[HOUR],FIELD2[HOUR],FIELD3[HOUR])"); + + // Verify that fields with MINUTE granularity are added as such. + assertResult("f:groupby('field1[MINUTE]','field2[MINUTE]','field3[MINUTE]')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[MINUTE],FIELD2[MINUTE],FIELD3[MINUTE])"); + + // Verify that fields from multiple groupby functions are merged together. + assertResult("f:groupby('field1','field2') AND f:groupby('field2[DAY]','field3[DAY]') AND f:groupby('field4')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL],FIELD2[ALL,DAY],FIELD3[DAY],FIELD4[ALL])"); + + // Verify more complex fields with multiple granularity levels are merged together. + assertResult("f:groupby('field1[DAY]','field2[DAY,HOUR]','field3[HOUR,MINUTE]','field4[ALL,MINUTE]','field5')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[DAY],FIELD2[DAY,HOUR],FIELD3[HOUR,MINUTE],FIELD4[ALL,MINUTE],FIELD5[ALL])"); + + // Lucene will parse comma-delimited granularity levels into separate strings. Ensure it still parses correctly. + assertResult("f:groupby('field1[DAY]','field2[DAY','HOUR]','field3[HOUR','MINUTE]','field4[ALL','MINUTE]','field5')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[DAY],FIELD2[DAY,HOUR],FIELD3[HOUR,MINUTE],FIELD4[ALL,MINUTE],FIELD5[ALL])"); } @Test @@ -360,11 +385,131 @@ public void testNonFunctionNodesWithJunctions() throws ParseException { // Verify that AND nodes are cleaned up. assertResult("(FOO == 'bar' OR (BAR == 'foo' AND f:groupby('field1','field2')))", "(FOO == 'bar' OR (BAR == 'foo'))"); - assertOption(QueryParameters.GROUP_FIELDS, "field1,field2"); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL],FIELD2[ALL])"); // Verify that OR nodes are cleaned up. assertResult("(FOO == 'bar' AND (BAR == 'foo' OR f:groupby('field1','field2')))", "(FOO == 'bar' AND (BAR == 'foo'))"); - assertOption(QueryParameters.GROUP_FIELDS, "field1,field2"); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL],FIELD2[ALL])"); + } + + @Test + public void testGroupByDay() throws ParseException { + // Verify an empty function results in an empty groupby parameter. + assertResult("f:groupby_day()", ""); + assertOption(QueryParameters.GROUP_FIELDS, ""); + + // Verify fields are added with the DAY granularity. + assertResult("f:groupby_day('field1','field2','field3')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[DAY],FIELD2[DAY],FIELD3[DAY])"); + + // Verify fields from multiple functions are merged. + assertResult("f:groupby('field1','field2[HOUR]') AND f:groupby_day('field1','field2','field3') AND f:groupby_day('field4')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL,DAY],FIELD2[DAY,HOUR],FIELD3[DAY],FIELD4[DAY])"); + } + + @Test + public void testGroupByHour() throws ParseException { + // Verify an empty function results in an empty groupby parameter. + assertResult("f:groupby_hour()", ""); + assertOption(QueryParameters.GROUP_FIELDS, ""); + + // Verify fields are added with the HOUR granularity. + assertResult("f:groupby_hour('field1','field2','field3')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[HOUR],FIELD2[HOUR],FIELD3[HOUR])"); + + // Verify fields from multiple functions are merged. + assertResult("f:groupby('field1','field2[DAY]') AND f:groupby_hour('field1','field2','field3') AND f:groupby_hour('field4')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL,HOUR],FIELD2[DAY,HOUR],FIELD3[HOUR],FIELD4[HOUR])"); + } + + @Test + public void testGroupByMonth() throws ParseException { + // Verify an empty function results in an empty groupby parameter. + assertResult("f:groupby_month()", ""); + assertOption(QueryParameters.GROUP_FIELDS, ""); + + // Verify fields are added with the HOUR granularity. + assertResult("f:groupby_month('field1','field2','field3')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[MONTH],FIELD2[MONTH],FIELD3[MONTH])"); + + // Verify fields from multiple functions are merged. + assertResult("f:groupby('field1','field2[DAY]') AND f:groupby_month('field1','field2','field3') AND f:groupby_month('field4')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL,MONTH],FIELD2[DAY,MONTH],FIELD3[MONTH],FIELD4[MONTH])"); + } + + @Test + public void testGroupBySecond() throws ParseException { + // Verify an empty function results in an empty groupby parameter. + assertResult("f:groupby_second()", ""); + assertOption(QueryParameters.GROUP_FIELDS, ""); + + // Verify fields are added with the HOUR granularity. + assertResult("f:groupby_second('field1','field2','field3')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[SECOND],FIELD2[SECOND],FIELD3[SECOND])"); + + // Verify fields from multiple functions are merged. + assertResult("f:groupby('field1','field2[DAY]') AND f:groupby_second('field1','field2','field3') AND f:groupby_second('field4')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL,SECOND],FIELD2[DAY,SECOND],FIELD3[SECOND],FIELD4[SECOND])"); + } + + @Test + public void testGroupByMillisecond() throws ParseException { + // Verify an empty function results in an empty groupby parameter. + assertResult("f:groupby_millisecond()", ""); + assertOption(QueryParameters.GROUP_FIELDS, ""); + + // Verify fields are added with the HOUR granularity. + assertResult("f:groupby_millisecond('field1','field2','field3')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[MILLISECOND],FIELD2[MILLISECOND],FIELD3[MILLISECOND])"); + + // Verify fields from multiple functions are merged. + assertResult("f:groupby('field1','field2[DAY]') AND f:groupby_millisecond('field1','field2','field3') AND f:groupby_millisecond('field4')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL,MILLISECOND],FIELD2[DAY,MILLISECOND],FIELD3[MILLISECOND],FIELD4[MILLISECOND])"); + } + + @Test + public void testGroupByYear() throws ParseException { + // Verify an empty function results in an empty groupby parameter. + assertResult("f:groupby_year()", ""); + assertOption(QueryParameters.GROUP_FIELDS, ""); + + // Verify fields are added with the MINUTE granularity. + assertResult("f:groupby_year('field1','field2','field3')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[YEAR],FIELD2[YEAR],FIELD3[YEAR])"); + + // Verify fields from multiple functions are merged. + assertResult("f:groupby('field1','field2[DAY]') AND f:groupby_year('field1','field2','field3') AND f:groupby_year('field4')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL,YEAR],FIELD2[DAY,YEAR],FIELD3[YEAR],FIELD4[YEAR])"); + } + + @Test + public void testGroupByMinute() throws ParseException { + // Verify an empty function results in an empty groupby parameter. + assertResult("f:groupby_minute()", ""); + assertOption(QueryParameters.GROUP_FIELDS, ""); + + // Verify fields are added with the MINUTE granularity. + assertResult("f:groupby_minute('field1','field2','field3')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[MINUTE],FIELD2[MINUTE],FIELD3[MINUTE])"); + + // Verify fields from multiple functions are merged. + assertResult("f:groupby('field1','field2[DAY]') AND f:groupby_minute('field1','field2','field3') AND f:groupby_minute('field4')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL,MINUTE],FIELD2[DAY,MINUTE],FIELD3[MINUTE],FIELD4[MINUTE])"); + } + + @Test + public void testGroupByTenth() throws ParseException { + // Verify an empty function results in an empty groupby parameter. + assertResult("f:groupby_tenth_of_hour()", ""); + assertOption(QueryParameters.GROUP_FIELDS, ""); + + // Verify fields are added with the MINUTE granularity. + assertResult("f:groupby_tenth_of_hour('field1','field2','field3')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[TENTH_OF_HOUR],FIELD2[TENTH_OF_HOUR],FIELD3[TENTH_OF_HOUR])"); + + // Verify fields from multiple functions are merged. + assertResult("f:groupby('field1','field2[DAY]') AND f:groupby_tenth_of_hour('field1','field2','field3') AND f:groupby_tenth_of_hour('field4')", ""); + assertOption(QueryParameters.GROUP_FIELDS, "GROUP(FIELD1[ALL,TENTH_OF_HOUR],FIELD2[DAY,TENTH_OF_HOUR],FIELD3[TENTH_OF_HOUR],FIELD4[TENTH_OF_HOUR])"); } private void assertOption(String option, String value) { diff --git a/warehouse/query-core/src/test/java/datawave/query/language/functions/jexl/GroupByTest.java b/warehouse/query-core/src/test/java/datawave/query/language/functions/jexl/GroupByTest.java new file mode 100644 index 00000000000..437c790b676 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/language/functions/jexl/GroupByTest.java @@ -0,0 +1,68 @@ +package datawave.query.language.functions.jexl; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; + +import com.google.common.collect.Lists; + +class GroupByTest { + + /** + * Verify that {@link GroupBy#validate()} throws no error for the query {@code #GROUPBY(field1,field2,field3)}. + */ + @Test + public void testValidateWithParameters() { + GroupBy groupBy = new GroupBy(); + groupBy.setParameterList(Lists.newArrayList("field1", "field2", "field3")); + groupBy.validate(); + } + + /** + * Verify that {@link GroupBy#validate()} throws an error for the query {@code #GROUPBY()}. + */ + @Test + public void testValidateWithNoParameters() { + GroupBy groupBy = new GroupBy(); + Exception exception = assertThrows(IllegalArgumentException.class, groupBy::validate); + assertEquals("datawave.webservice.query.exception.BadRequestQueryException: Invalid arguments to function. groupby requires at least one argument", + exception.getMessage()); + } + + /** + * Verify that {@link GroupBy#validate()} throws no error for the query {@code #GROUPBY(field1[DAY],field2[HOUR,MINUTE],field3[ALL,DAY])}. + */ + @Test + public void testValidateWithComplexParameters() { + GroupBy groupBy = new GroupBy(); + groupBy.setParameterList(Lists.newArrayList("field1[DAY]", "field2[HOUR,MINUTE]", "field3[ALL,DAY]")); + groupBy.validate(); + } + + /** + * Verify that {@link GroupBy#validate()} throws an error for the query {@code #GROUPBY(field1[BAD_TRANSFORMER],field2[HOUR,MINUTE],field3[ALL,DAY])}. + */ + @Test + public void testValidateWithInvalidTransformer() { + GroupBy groupBy = new GroupBy(); + groupBy.setParameterList(Lists.newArrayList("field1[BAD_TRANSFORMER]", "field2[HOUR,MINUTE]", "field3[ALL,DAY]")); + Exception exception = assertThrows(IllegalArgumentException.class, groupBy::validate); + assertEquals("datawave.webservice.query.exception.BadRequestQueryException: Invalid arguments to function. Unable to parse fields from arguments for function groupby", + exception.getMessage()); + } + + @Test + public void testToStringWithNoParameters() { + GroupBy groupBy = new GroupBy(); + assertEquals("f:groupby()", groupBy.toString()); + } + + @Test + public void testToStringWithParameters() { + GroupBy groupBy = new GroupBy(); + groupBy.setParameterList(Lists.newArrayList("field1", "field2[HOUR]", "field3[DAY]")); + assertEquals("f:groupby('field1','field2[HOUR]','field3[DAY]')", groupBy.toString()); + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTest.java index 866d2dfabad..c6aeea573b5 100644 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTest.java @@ -238,7 +238,7 @@ public Aggregate withMax(String max) { } private static final String COUNT_FIELD = "COUNT"; - private static final Set FIELDS_OF_INTEREST = ImmutableSet.of("GENDER", "GEN", "BIRTHDAY", "AGE", "AG", "RECORD"); + private static final Set FIELDS_OF_INTEREST = ImmutableSet.of("GENDER", "GEN", "BIRTHDAY", "AGE", "AG", "RECORD", "BIRTH_DATE"); private static final Logger log = Logger.getLogger(GroupingTest.class); private static final String COLVIS_MARKING = "columnVisibility"; private static final String REDUCED_COLVIS = "ALL&E&I"; @@ -1088,4 +1088,73 @@ public void testFilteringOutDuplicateDatumAfterModelMapping() throws Exception { // Verify the results. assertGroups(); } + + /** + * Verify that when specifying that a field should be truncated to year when grouping via a Lucene function, that the correct grouping is performed. + */ + @Test + public void testGroupingWhileTruncatingToYearViaLucene() throws Exception { + givenNonModelData(); + + givenQuery("(UUID:CORLEONE) and #GROUPBY('GENDER','BIRTH_DATE[YEAR]')"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); + + givenLuceneParserForLogic(); + + expectGroup(Group.of("Tue Dec 01 00:00:05 GMT 1925", "FEMALE").withCount(1)); + expectGroup(Group.of("Sat Dec 12 00:00:05 GMT 1925", "MALE").withCount(1)); + expectGroup(Group.of("Thu Dec 01 00:00:05 GMT 1910", "MALE").withCount(3)); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); + } + + /** + * Verify that when specifying that a field should be truncated to year when grouping via a Jexl function, that the correct grouping is performed. + */ + @Test + public void testGroupingWhileTruncatingToYearViaJexl() throws Exception { + givenNonModelData(); + + givenQuery("UUID =~ 'CORLEONE' && f:groupby('GENDER','BIRTH_DATE[YEAR]')"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); + + expectGroup(Group.of("Tue Dec 01 00:00:05 GMT 1925", "FEMALE").withCount(1)); + expectGroup(Group.of("Sat Dec 12 00:00:05 GMT 1925", "MALE").withCount(1)); + expectGroup(Group.of("Thu Dec 01 00:00:05 GMT 1910", "MALE").withCount(3)); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); + } + + /** + * Verify that when specifying that a field should be truncated to year when grouping via a query parameter, that the correct grouping is performed. + */ + @Test + public void testGroupingWhileTruncatingToYearViaQueryParameter() throws Exception { + givenNonModelData(); + + givenQuery("UUID =~ 'CORLEONE'"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS, "GENDER,BIRTH_DATE[YEAR]"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); + + expectGroup(Group.of("Tue Dec 01 00:00:05 GMT 1925", "FEMALE").withCount(1)); + expectGroup(Group.of("Sat Dec 12 00:00:05 GMT 1925", "MALE").withCount(1)); + expectGroup(Group.of("Thu Dec 01 00:00:05 GMT 1910", "MALE").withCount(3)); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); + } } diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformMostRecentTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformMostRecentTest.java index f309f4a2b9c..6f7913bc165 100644 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformMostRecentTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformMostRecentTest.java @@ -12,7 +12,7 @@ import org.junit.rules.TemporaryFolder; import datawave.microservice.query.QueryImpl; -import datawave.query.attributes.UniqueGranularity; +import datawave.query.attributes.TemporalGranularity; import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; import datawave.query.tables.ShardQueryLogic; import datawave.query.util.sortedset.FileSortedSet; @@ -78,7 +78,7 @@ public void testMostRecentUniqueness() { givenInputDocument(2).withKeyValue("attr2", randomValues.get(0)).isExpectedToBeUnique(); givenInputDocument(3).withKeyValue("attr2", randomValues.get(4)).isExpectedToBeUnique(); - givenValueTransformerForFields(UniqueGranularity.ALL, "attr0", "Attr1", "ATTR2"); + givenValueTransformerForFields(TemporalGranularity.ALL, "attr0", "Attr1", "ATTR2"); assertUniqueDocuments(); } diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java index 60749416648..1b5db7d4098 100644 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java @@ -41,15 +41,14 @@ import com.google.common.collect.TreeMultimap; import com.google.common.primitives.Longs; -import datawave.ingest.time.Now; import datawave.query.attributes.Attribute; import datawave.query.attributes.Attributes; import datawave.query.attributes.DiacriticContent; import datawave.query.attributes.Document; import datawave.query.attributes.DocumentKey; +import datawave.query.attributes.TemporalGranularity; import datawave.query.attributes.TimingMetadata; import datawave.query.attributes.UniqueFields; -import datawave.query.attributes.UniqueGranularity; import datawave.query.function.LogTiming; import datawave.query.iterator.profile.FinalDocumentTrackingIterator; import datawave.query.jexl.JexlASTHelper; @@ -84,7 +83,7 @@ public void tearDown() throws Exception { @Test public void testTransformingNullReturnsNull() { - givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0"); + givenValueTransformerForFields(TemporalGranularity.ALL, "Attr0"); UniqueTransform uniqueTransform = getUniqueTransform(); @@ -109,7 +108,7 @@ public void testUniquenessWithRandomDocuments() { expectedUniqueDocuments = countUniqueness(inputDocuments, fields); } - givenValueTransformerForFields(UniqueGranularity.ALL, fields.toArray(new String[0])); + givenValueTransformerForFields(TemporalGranularity.ALL, fields.toArray(new String[0])); List uniqueDocuments = getUniqueDocuments(inputDocuments); assertEquals(expectedUniqueDocuments, uniqueDocuments.size()); @@ -168,7 +167,7 @@ public void testUniquenessForCaseInsensitivity() { givenInputDocument().withKeyValue("ATTR2", randomValues.get(0)).isExpectedToBeUnique(); givenInputDocument().withKeyValue("ATTR2", randomValues.get(4)); - givenValueTransformerForFields(UniqueGranularity.ALL, "attr0", "Attr1", "ATTR2"); + givenValueTransformerForFields(TemporalGranularity.ALL, "attr0", "Attr1", "ATTR2"); assertUniqueDocuments(); } @@ -184,7 +183,7 @@ public void testUniquenessWithValueTransformer_DAY() { givenInputDocument().withKeyValue("ATTR0", "2001-03-12 05:04:20").isExpectedToBeUnique(); givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); - givenValueTransformerForFields(UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY, "Attr0"); + givenValueTransformerForFields(TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY, "Attr0"); assertUniqueDocuments(); } @@ -200,7 +199,7 @@ public void testUniquenessWithValueTransformer_HOUR() { givenInputDocument().withKeyValue("ATTR0", "2001-03-10 05:04:30"); givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); - givenValueTransformerForFields(UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR, "Attr0"); + givenValueTransformerForFields(TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR, "Attr0"); assertUniqueDocuments(); } @@ -216,7 +215,7 @@ public void testUniquenessWithValueTransformer_MINUTE() { givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:04:15"); givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); - givenValueTransformerForFields(UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE, "Attr0"); + givenValueTransformerForFields(TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE, "Attr0"); assertUniqueDocuments(); } @@ -240,9 +239,9 @@ public void testUniquenessWithMixedValueTransformersForDifferentFields() { givenInputDocument().withKeyValue("ATTR2", "2001-03-10 10:04:20").isExpectedToBeUnique(); givenInputDocument().withKeyValue("ATTR2", "2001-03-10 10:04:15"); - givenValueTransformerForFields(UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY, "Attr0"); - givenValueTransformerForFields(UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR, "Attr1"); - givenValueTransformerForFields(UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE, "Attr2"); + givenValueTransformerForFields(TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY, "Attr0"); + givenValueTransformerForFields(TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR, "Attr1"); + givenValueTransformerForFields(TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE, "Attr2"); assertUniqueDocuments(); } @@ -260,7 +259,7 @@ public void testThatValueTransformer_ALL_Supersedes_MINUTE() { givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:15:04"); givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); - givenValueTransformersForField("Attr0", UniqueGranularity.ALL, UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); + givenValueTransformersForField("Attr0", TemporalGranularity.ALL, TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); assertUniqueDocuments(); } @@ -278,7 +277,7 @@ public void testThatValueTransformer_MINUTE_Supersedes_HOUR() { givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:04:20"); givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); - givenValueTransformersForField("Attr0", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE, UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); + givenValueTransformersForField("Attr0", TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE, TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR); assertUniqueDocuments(); } @@ -296,7 +295,7 @@ public void testThatValueTransformer_HOUR_Supersedes_DAY() { givenInputDocument().withKeyValue("ATTR0", "2001-03-10 13:20:15"); givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); - givenValueTransformersForField("Attr0", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR, UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); + givenValueTransformersForField("Attr0", TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR, TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY); assertUniqueDocuments(); } @@ -315,7 +314,7 @@ public void testUniquenessWithTimingMetric() { givenInputDocument().withKeyValue("ATTR1", randomValues.get(1)).isExpectedToBeUnique(); givenInputDocument().withKeyValue("ATTR1", randomValues.get(2)); - givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0"); + givenValueTransformerForFields(TemporalGranularity.ALL, "Attr0"); assertUniqueDocuments(); } @@ -345,7 +344,7 @@ public void testUniquenessWithTwoGroups() { .withKeyValue("ATTR1", randomValues.get(3)).build(); // @formatter:on - givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0", "Attr1"); + givenValueTransformerForFields(TemporalGranularity.ALL, "Attr0", "Attr1"); assertOrderedFieldValues(); } @@ -378,7 +377,7 @@ public void testUniquenessWithTwoGroupsAndUngrouped() { .withKeyValue("ATTR3", randomValues.get(4)).build(); // @formatter:on - givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0", "Attr1", "Attr3"); + givenValueTransformerForFields(TemporalGranularity.ALL, "Attr0", "Attr1", "Attr3"); assertOrderedFieldValues(); } @@ -411,7 +410,7 @@ public void testUniquenessWithTwoGroupsAndSeparateGroup() { .withKeyValue("ATTR3", randomValues.get(4)).build(); // @formatter:on - givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0", "Attr1", "Attr3"); + givenValueTransformerForFields(TemporalGranularity.ALL, "Attr0", "Attr1", "Attr3"); assertOrderedFieldValues(); } @@ -447,7 +446,7 @@ public void testUniquenessWithTwoGroupsAndSeparateGroups() { .withKeyValue("ATTR3", randomValues.get(0)).build(); // @formatter:on - givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0", "Attr1", "Attr3"); + givenValueTransformerForFields(TemporalGranularity.ALL, "Attr0", "Attr1", "Attr3"); assertOrderedFieldValues(); } @@ -479,15 +478,15 @@ public void testUniquenessWithTwoGroupsAndPartialGroups() { .withKeyValue("ATTR3", randomValues.get(0)).build(); // @formatter:on - givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0", "Attr1", "Attr3"); + givenValueTransformerForFields(TemporalGranularity.ALL, "Attr0", "Attr1", "Attr3"); assertOrderedFieldValues(); } @Test public void testFinalDocIgnored() { - SortedSetMultimap fieldMap = TreeMultimap.create(); - fieldMap.put("FIELD", UniqueGranularity.ALL); + SortedSetMultimap fieldMap = TreeMultimap.create(); + fieldMap.put("FIELD", TemporalGranularity.ALL); UniqueFields fields = new UniqueFields(fieldMap); UniqueTransform transform = new UniqueTransform(fields, 10000000L); Key key = new Key("shard", "dt\u0000uid", FinalDocumentTrackingIterator.MARKER_TEXT.toString()); @@ -500,8 +499,8 @@ public void testFinalDocIgnored() { @Test public void testIntermediateIgnored() { - SortedSetMultimap fieldMap = TreeMultimap.create(); - fieldMap.put("FIELD", UniqueGranularity.ALL); + SortedSetMultimap fieldMap = TreeMultimap.create(); + fieldMap.put("FIELD", TemporalGranularity.ALL); UniqueFields fields = new UniqueFields(fieldMap); UniqueTransform transform = new UniqueTransform(fields, 10000000L); Key key = new Key("shard", "dt\u0000uid"); @@ -565,11 +564,11 @@ protected void assertOrderedFieldValues() { } } - protected void givenValueTransformerForFields(UniqueGranularity transformer, String... fields) { + protected void givenValueTransformerForFields(TemporalGranularity transformer, String... fields) { Arrays.stream(fields).forEach((field) -> uniqueFields.put(field, transformer)); } - protected void givenValueTransformersForField(String field, UniqueGranularity... transformers) { + protected void givenValueTransformersForField(String field, TemporalGranularity... transformers) { Arrays.stream(transformers).forEach((transformer) -> uniqueFields.put(field, transformer)); } diff --git a/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngest.java b/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngest.java index 8c12c2f3a9f..68eab7cd5df 100644 --- a/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngest.java +++ b/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngest.java @@ -14,6 +14,7 @@ import datawave.data.ColumnFamilyConstants; import datawave.data.hash.UID; +import datawave.data.type.DateType; import datawave.data.type.LcNoDiacriticsType; import datawave.data.type.NumberType; import datawave.data.type.Type; @@ -29,6 +30,7 @@ public enum WhatKindaRange { private static final Type lcNoDiacriticsType = new LcNoDiacriticsType(); private static final Type numberType = new NumberType(); + private static final Type dateType = new DateType(); protected static final String datatype = "test"; protected static final String date = "20130101"; @@ -82,7 +84,17 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.FOO.3" + "\u0000" + "4", columnVisibilityItalian, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.FOO.4" + "\u0000" + "5", columnVisibilityItalian, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.FOO.5" + "\u0000" + "22", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "UUID.FOO.0" + "\u0000" + "CORLEONE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTH_DATE.FOO.0" + "\u0000" + "1910-12-01T00:00:05.000Z", columnVisibility, timeStamp, + emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTH_DATE.FOO.1" + "\u0000" + "1910-12-12T00:00:05.000Z", columnVisibility, timeStamp, + emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTH_DATE.FOO.2" + "\u0000" + "1910-12-15T00:00:05.000Z", columnVisibility, timeStamp, + emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTH_DATE.FOO.3" + "\u0000" + "1925-12-01T00:00:05.000Z", columnVisibility, timeStamp, + emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTH_DATE.FOO.4" + "\u0000" + "1925-12-12T00:00:05.000Z", columnVisibility, timeStamp, + emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "UUID.FOO.FOO.0" + "\u0000" + "CORLEONE", columnVisibilityItalian, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + corleoneUID, "GROUP" + "\u0000" + "MAFIA", columnVisibilityItalian, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + corleoneUID, "RECORD" + "\u0000" + "1", columnVisibilityItalian, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + corleoneUID, "RECORD" + "\u0000" + "2", columnVisibilityItalian, timeStamp, emptyValue); @@ -402,6 +414,14 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw mutation.put(ColumnFamilyConstants.COLF_T, new Text(datatype + "\u0000" + numberType.getClass().getName()), emptyValue); bw.addMutation(mutation); + mutation = new Mutation("BIRTH_DATE"); + mutation.put(ColumnFamilyConstants.COLF_E, new Text(datatype), emptyValue); + mutation.put(ColumnFamilyConstants.COLF_F, new Text(datatype + "\u0000" + date), new Value(SummingCombiner.VAR_LEN_ENCODER.encode(12L))); + mutation.put(ColumnFamilyConstants.COLF_I, new Text(datatype), emptyValue); + mutation.put(ColumnFamilyConstants.COLF_RI, new Text(datatype), emptyValue); + mutation.put(ColumnFamilyConstants.COLF_T, new Text(datatype + "\u0000" + dateType.getClass().getName()), emptyValue); + bw.addMutation(mutation); + mutation = new Mutation("GROUP"); mutation.put(ColumnFamilyConstants.COLF_E, new Text(datatype), emptyValue); mutation.put(ColumnFamilyConstants.COLF_F, new Text(datatype + "\u0000" + date), new Value(SummingCombiner.VAR_LEN_ENCODER.encode(3L))); diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml index e8b9e2bbb84..82ff4638d0c 100644 --- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml +++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml @@ -29,6 +29,14 @@ + + + + + + + + diff --git a/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml b/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml index ad53f615676..626aeef659d 100644 --- a/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml +++ b/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml @@ -39,6 +39,14 @@ + + + + + + + +