use of datawave.data.type.DiscreteIndexType in project datawave by NationalSecurityAgency.
the class CompositeSeeker method nextLowerBound.
String nextLowerBound(List<String> fields, List<String> values, String separator, List<String> startValues, boolean isStartInclusive, List<String> endValues, boolean isEndInclusive) {
String[] newValues = new String[fields.size()];
boolean carryOver = false;
for (int i = fields.size() - 1; i >= 0; i--) {
DiscreteIndexType discreteIndexType = fieldToDiscreteIndexType.get(fields.get(i));
String value = (i < values.size()) ? values.get(i) : null;
String start = (i < startValues.size()) ? startValues.get(i) : null;
String end = (i < endValues.size()) ? endValues.get(i) : null;
boolean isStartValueInclusive = (i != startValues.size() - 1) || isStartInclusive;
boolean isEndValueInclusive = (i != endValues.size() - 1) || isEndInclusive;
// if start and end are equal, and one side is exclusive while the other is inclusive, just mark both as inclusive for our purposes
if (start != null && end != null && isStartValueInclusive != isEndValueInclusive && start.equals(end)) {
isStartValueInclusive = true;
isEndValueInclusive = true;
}
if (value != null) {
// if it's not fixed length, check to see if we are in range
if (discreteIndexType == null) {
// value precedes start value. need to seek forward.
if (start != null && !isStartValid(value, start, isStartValueInclusive)) {
newValues[i] = start;
// subsequent values set to start
for (int j = i + 1; j < startValues.size(); j++) newValues[j] = startValues.get(j);
} else // value exceeds end value. need to seek forward, and carry over.
if (end != null && !isEndValid(value, end, isEndValueInclusive)) {
newValues[i] = start;
carryOver = true;
// subsequent values set to start
for (int j = i + 1; j < startValues.size(); j++) newValues[j] = startValues.get(j);
} else // value is in range.
{
newValues[i] = values.get(i);
}
} else // if it's fixed length, determine whether or not we need to increment
{
// carry over means we need to increase our value
if (carryOver) {
// value precedes start value. just seek forward and ignore previous carry over.
if (start != null && !isStartValid(value, start, isStartValueInclusive)) {
newValues[i] = start;
carryOver = false;
// subsequent values set to start
for (int j = i + 1; j < startValues.size(); j++) newValues[j] = startValues.get(j);
} else // value is at or exceeds end value. need to seek forward, and maintain carry over.
if (end != null && !isEndValid(value, end, false)) {
newValues[i] = start;
carryOver = true;
// subsequent values set to start
for (int j = i + 1; j < startValues.size(); j++) newValues[j] = startValues.get(j);
} else // value is in range. just increment, and finish carry over
{
newValues[i] = discreteIndexType.incrementIndex(values.get(i));
carryOver = false;
}
} else {
// value precedes start value. need to seek forward.
if (start != null && !isStartValid(value, start, isStartValueInclusive)) {
newValues[i] = start;
// subsequent values set to start
for (int j = i + 1; j < startValues.size(); j++) newValues[j] = startValues.get(j);
} else // value exceeds end value. need to seek forward, and carry over.
if (end != null && !isEndValid(value, end, isEndValueInclusive)) {
newValues[i] = start;
carryOver = true;
// subsequent values set to start
for (int j = i + 1; j < startValues.size(); j++) newValues[j] = startValues.get(j);
} else // value is in range.
{
newValues[i] = values.get(i);
}
}
}
}
}
StringBuilder builder = new StringBuilder();
for (int i = 0; i < newValues.length; i++) {
if (newValues[i] != null)
if (i > 0)
builder.append(separator).append(newValues[i]);
else
builder.append(newValues[i]);
else
break;
}
return builder.toString();
}
use of datawave.data.type.DiscreteIndexType in project datawave by NationalSecurityAgency.
the class CompositeSeekingIterator method init.
@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException {
super.init(source, options, env);
final String compFields = options.get(COMPONENT_FIELDS);
if (compFields != null)
this.fieldNames = Arrays.asList(compFields.split(","));
for (String fieldName : fieldNames) {
DiscreteIndexType type = null;
String typeClass = options.get(fieldName + DISCRETE_INDEX_TYPE);
if (typeClass != null) {
try {
type = Class.forName(typeClass).asSubclass(DiscreteIndexType.class).newInstance();
} catch (Exception e) {
log.warn("Unable to create DiscreteIndexType for class name: " + typeClass);
}
}
if (type != null)
fieldToDiscreteIndexType.put(fieldName, type);
}
this.separator = options.get(SEPARATOR);
this.seekThreshold = Integer.parseInt(options.getOrDefault(SEEK_THRESHOLD, "10"));
compositeSeeker = new ShardIndexCompositeSeeker(fieldNames, separator, fieldToDiscreteIndexType);
}
use of datawave.data.type.DiscreteIndexType in project datawave by NationalSecurityAgency.
the class ExpandCompositeTerms method createCompositeNode.
/**
* Attempts to form a jexl node from the composite
*
* @param composite
* A list of composites from which jexl nodes should be created
* @return A list of jexl nodes created from the given composite
*/
private JexlNode createCompositeNode(Composite composite) {
List<Class<? extends JexlNode>> nodeClasses = new ArrayList<>();
List<String> appendedExpressions = new ArrayList<>();
boolean includeOldData = false;
if (config.getCompositeTransitionDates().containsKey(composite.getCompositeName())) {
Date transitionDate = config.getCompositeTransitionDates().get(composite.getCompositeName());
if (config.getBeginDate().compareTo(transitionDate) < 0)
includeOldData = true;
}
composite.getNodesAndExpressions(nodeClasses, appendedExpressions, config.getFieldToDiscreteIndexTypes(), includeOldData);
// if this is true, then it indicates that we are dealing with a query containing an overloaded composite
// field which only contained the first component term. This means that we are running a query against
// the base composite term, and thus need to expand our ranges to fully include both the composite and
// non-composite events in our range.
boolean expandRangeForBaseTerm = CompositeIngest.isOverloadedCompositeField(config.getCompositeToFieldMap(), composite.getCompositeName()) && composite.getJexlNodeList().size() == 1;
DiscreteIndexType<?> baseTermDiscreteIndexType = config.getFieldToDiscreteIndexTypes().get(composite.getFieldNameList().get(0));
List<JexlNode> finalNodes = new ArrayList<>();
for (int i = 0; i < nodeClasses.size(); i++) {
Class<? extends JexlNode> nodeClass = nodeClasses.get(i);
String appendedExpression = appendedExpressions.get(i);
JexlNode newNode = null;
if (nodeClass.equals(ASTGTNode.class)) {
if (expandRangeForBaseTerm)
newNode = JexlNodeFactory.buildNode((ASTGENode) null, composite.getCompositeName(), CompositeUtils.getInclusiveLowerBound(appendedExpression, baseTermDiscreteIndexType));
else
newNode = JexlNodeFactory.buildNode((ASTGTNode) null, composite.getCompositeName(), appendedExpression);
} else if (nodeClass.equals(ASTGENode.class)) {
newNode = JexlNodeFactory.buildNode((ASTGENode) null, composite.getCompositeName(), appendedExpression);
} else if (nodeClass.equals(ASTLTNode.class)) {
newNode = JexlNodeFactory.buildNode((ASTLTNode) null, composite.getCompositeName(), appendedExpression);
} else if (nodeClass.equals(ASTLENode.class)) {
if (expandRangeForBaseTerm)
newNode = JexlNodeFactory.buildNode((ASTLTNode) null, composite.getCompositeName(), CompositeUtils.getExclusiveUpperBound(appendedExpression, baseTermDiscreteIndexType));
else
newNode = JexlNodeFactory.buildNode((ASTLENode) null, composite.getCompositeName(), appendedExpression);
} else if (nodeClass.equals(ASTERNode.class)) {
newNode = JexlNodeFactory.buildERNode(composite.getCompositeName(), appendedExpression);
} else if (nodeClass.equals(ASTNENode.class)) {
newNode = JexlNodeFactory.buildNode((ASTNENode) null, composite.getCompositeName(), appendedExpression);
} else if (nodeClass.equals(ASTEQNode.class)) {
// if this is for an overloaded composite field, which only includes the base term, convert to range
if (expandRangeForBaseTerm) {
JexlNode lowerBound = JexlNodeFactory.buildNode((ASTGENode) null, composite.getCompositeName(), appendedExpression);
JexlNode upperBound = JexlNodeFactory.buildNode((ASTLTNode) null, composite.getCompositeName(), CompositeUtils.getExclusiveUpperBound(appendedExpression, baseTermDiscreteIndexType));
newNode = createUnwrappedAndNode(Arrays.asList(lowerBound, upperBound));
} else {
newNode = JexlNodeFactory.buildEQNode(composite.getCompositeName(), appendedExpression);
}
} else {
log.error("Invalid or unknown node type for composite map.");
}
finalNodes.add(newNode);
}
JexlNode finalNode;
if (finalNodes.size() > 1) {
finalNode = createUnwrappedAndNode(finalNodes);
if (composite.getJexlNodeList().size() > 1) {
JexlNode delayedNode = ASTEvaluationOnly.create(createUnwrappedAndNode(composite.getJexlNodeList().stream().map(node -> JexlNodeFactory.wrap(copy(node))).collect(Collectors.toList())));
finalNode = createUnwrappedAndNode(Arrays.asList(JexlNodeFactory.wrap(finalNode), delayedNode));
}
} else {
finalNode = finalNodes.get(0);
if (composite.getJexlNodeList().size() > 1 && !(finalNode instanceof ASTEQNode)) {
JexlNode delayedNode = ASTEvaluationOnly.create(createUnwrappedAndNode(composite.getJexlNodeList().stream().map(node -> JexlNodeFactory.wrap(copy(node))).collect(Collectors.toList())));
finalNode = createUnwrappedAndNode(Arrays.asList(finalNode, delayedNode));
}
}
if (!CompositeIngest.isOverloadedCompositeField(config.getCompositeToFieldMap(), composite.getCompositeName())) {
config.getIndexedFields().add(composite.getCompositeName());
config.getQueryFieldsDatatypes().put(composite.getCompositeName(), new NoOpType());
}
// save a mapping of generated composites to their component parts for later processing
jexlNodeToCompMap.put(finalNode, composite);
return finalNode;
}
use of datawave.data.type.DiscreteIndexType in project datawave by NationalSecurityAgency.
the class ShardQueryConfigurationTest method testDeepCopyConstructor.
/**
* Test that for a given set of collections, stored in a ShardQueryConfiguration, will in fact be deep-copied into a new ShardQueryConfiguration object.
*/
@Test
public void testDeepCopyConstructor() {
// Instantiate a 'other' ShardQueryConfiguration
ShardQueryConfiguration other = ShardQueryConfiguration.create();
// Setup collections for deep copy
List<String> realmSuffixExclusionPatterns = Lists.newArrayList("somePattern");
SimpleDateFormat shardDateFormatter = new SimpleDateFormat("yyyyMMdd");
List<String> enricherClassNames = Lists.newArrayList("enricherClassNameA");
List<String> filterClassNames = Lists.newArrayList("filterClassNameA");
List<String> indexFilteringClassNames = Lists.newArrayList("indexFilteringClassNameA");
Set<String> nonEventKeyPrefixes = Sets.newHashSet("nonEventKeyPrefixA");
Set<String> unevaluatedFields = Sets.newHashSet("unevaluatedFieldA");
Set<String> dataTypeFilter = Sets.newHashSet("dataTypeFilterA");
IndexHole indexHole = new IndexHole(new String[] { "0", "1" }, new String[] { "2", "3" });
List<IndexHole> indexHoles = Lists.newArrayList(indexHole);
Set<String> projectFields = Sets.newHashSet("projectFieldA");
Set<String> blacklistedFields = Sets.newHashSet("blacklistedFieldA");
Set<String> indexedFields = Sets.newHashSet("indexedFieldA");
Set<String> normalizedFields = Sets.newHashSet("normalizedFieldA");
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.put("K001", new NoOpType("V"));
Multimap<String, Type<?>> queryFieldsDatatypes = HashMultimap.create();
queryFieldsDatatypes.put("K002", new NoOpType("V"));
Multimap<String, Type<?>> normalizedFieldsDatatypes = HashMultimap.create();
normalizedFieldsDatatypes.put("K003", new NoOpType("V"));
Multimap<String, String> compositeToFieldMap = HashMultimap.create();
compositeToFieldMap.put("K004", "V");
Map<String, DiscreteIndexType<?>> fieldToDiscreteIndexType = Maps.newHashMap();
fieldToDiscreteIndexType.put("GEO", new GeometryType());
Map<String, Date> compositeTransitionDates = Maps.newHashMap();
Date transitionDate = new Date();
compositeTransitionDates.put("K005", transitionDate);
Map<String, String> compositeFieldSeparators = Maps.newHashMap();
compositeFieldSeparators.put("GEO", " ");
Set<String> queryTermFrequencyFields = Sets.newHashSet("fieldA");
Set<String> limitFields = Sets.newHashSet("limitFieldA");
Map<String, String> hierarchyFieldOptions = Maps.newHashMap();
hierarchyFieldOptions.put("K006", "V");
List<String> documentPermutations = Lists.newArrayList(DocumentPermutation.class.getName());
QueryModel queryModel = new QueryModel();
QueryImpl query = new QueryImpl();
Set<String> groupFields = Sets.newHashSet("groupFieldA");
UniqueFields uniqueFields = new UniqueFields();
uniqueFields.put("uniqueFieldA", UniqueGranularity.ALL);
List<String> contentFieldNames = Lists.newArrayList("fieldA");
Set<String> noExpansionFields = Sets.newHashSet("NoExpansionFieldA");
Set<String> disallowedRegexPatterns = Sets.newHashSet(".*", ".*?");
// Set collections on 'other' ShardQueryConfiguration
other.setRealmSuffixExclusionPatterns(realmSuffixExclusionPatterns);
other.setShardDateFormatter(shardDateFormatter);
other.setEnricherClassNames(enricherClassNames);
other.setFilterClassNames(filterClassNames);
other.setIndexFilteringClassNames(indexFilteringClassNames);
other.setNonEventKeyPrefixes(nonEventKeyPrefixes);
other.setUnevaluatedFields(unevaluatedFields);
other.setDatatypeFilter(dataTypeFilter);
other.setIndexHoles(indexHoles);
other.setProjectFields(projectFields);
other.setBlacklistedFields(blacklistedFields);
other.setIndexedFields(indexedFields);
other.setNormalizedFields(normalizedFields);
other.setDataTypes(dataTypes);
other.setQueryFieldsDatatypes(queryFieldsDatatypes);
other.setNormalizedFieldsDatatypes(normalizedFieldsDatatypes);
other.setCompositeToFieldMap(compositeToFieldMap);
other.setFieldToDiscreteIndexTypes(fieldToDiscreteIndexType);
other.setCompositeTransitionDates(compositeTransitionDates);
other.setCompositeFieldSeparators(compositeFieldSeparators);
other.setQueryTermFrequencyFields(queryTermFrequencyFields);
other.setLimitFields(limitFields);
other.setHierarchyFieldOptions(hierarchyFieldOptions);
other.setDocumentPermutations(documentPermutations);
other.setQueryModel(queryModel);
other.setQuery(query);
other.setGroupFields(groupFields);
other.setUniqueFields(uniqueFields);
other.setContentFieldNames(contentFieldNames);
other.setNoExpansionFields(noExpansionFields);
other.setDisallowedRegexPatterns(disallowedRegexPatterns);
// Copy 'other' ShardQueryConfiguration into a new config
ShardQueryConfiguration config = ShardQueryConfiguration.create(other);
// Modify original collections
realmSuffixExclusionPatterns.add("anotherPattern");
shardDateFormatter = new SimpleDateFormat("yyyyMMdd-mm:SS");
enricherClassNames.add("enricherClassNameB");
filterClassNames.add("filterClassNameB");
indexFilteringClassNames.add("indexFilteringClassNameB");
nonEventKeyPrefixes.add("nonEventKeyPrefixB");
unevaluatedFields.add("unevaluatedFieldB");
dataTypeFilter.add("dataTypeFilterB");
IndexHole otherIndexHole = new IndexHole(new String[] { "4", "5" }, new String[] { "6", "7" });
indexHoles.add(otherIndexHole);
projectFields.add("projectFieldB");
blacklistedFields.add("blacklistedFieldB");
indexedFields.add("indexedFieldB");
normalizedFields.add("normalizedFieldB");
dataTypes.put("K2", new NoOpType("V2"));
queryFieldsDatatypes.put("K", new NoOpType("V2"));
normalizedFieldsDatatypes.put("K2", new NoOpType("V2"));
compositeToFieldMap.put("K2", "V2");
queryTermFrequencyFields.add("fieldB");
limitFields.add("limitFieldB");
hierarchyFieldOptions.put("K2", "V2");
documentPermutations.add(DocumentProjection.class.getName());
queryModel.addTermToModel("aliasA", "diskNameA");
query.setId(UUID.randomUUID());
groupFields.add("groupFieldB");
uniqueFields.put("uniqueFieldB", UniqueGranularity.ALL);
contentFieldNames.add("fieldB");
disallowedRegexPatterns.add("blah");
// Assert that copied collections were deep copied and remain unchanged
Assert.assertEquals(Lists.newArrayList("somePattern"), config.getRealmSuffixExclusionPatterns());
Assert.assertEquals(new SimpleDateFormat("yyyyMMdd"), config.getShardDateFormatter());
Assert.assertEquals(Lists.newArrayList("enricherClassNameA"), config.getEnricherClassNames());
Assert.assertEquals(Lists.newArrayList("filterClassNameA"), config.getFilterClassNames());
Assert.assertEquals(Lists.newArrayList("indexFilteringClassNameA"), config.getIndexFilteringClassNames());
Assert.assertEquals(Sets.newHashSet("nonEventKeyPrefixA"), config.getNonEventKeyPrefixes());
Assert.assertEquals(Sets.newHashSet("unevaluatedFieldA"), config.getUnevaluatedFields());
Assert.assertEquals(Sets.newHashSet("dataTypeFilterA"), config.getDatatypeFilter());
IndexHole expectedIndexHole = new IndexHole(new String[] { "0", "1" }, new String[] { "2", "3" });
Assert.assertEquals(Lists.newArrayList(expectedIndexHole), config.getIndexHoles());
Assert.assertEquals(Sets.newHashSet("projectFieldA"), config.getProjectFields());
Assert.assertEquals(Sets.newHashSet("blacklistedFieldA"), config.getBlacklistedFields());
Assert.assertEquals(Sets.newHashSet("indexedFieldA"), config.getIndexedFields());
// This assert is different from the setter as setNormalizedFieldsAsDatatypes will overwrite the normalizedFields with
// a new keyset.
Assert.assertEquals(Sets.newHashSet("K003"), config.getNormalizedFields());
Multimap<String, Type<?>> expectedDataTypes = HashMultimap.create();
expectedDataTypes.put("K001", new NoOpType("V"));
Assert.assertEquals(expectedDataTypes, config.getDataTypes());
Multimap<String, Type<?>> expectedQueryFieldsDatatypes = HashMultimap.create();
expectedQueryFieldsDatatypes.put("K002", new NoOpType("V"));
Assert.assertEquals(expectedQueryFieldsDatatypes, config.getQueryFieldsDatatypes());
Multimap<String, Type<?>> expectedNormalizedFieldsDatatypes = HashMultimap.create();
expectedNormalizedFieldsDatatypes.put("K003", new NoOpType("V"));
Assert.assertEquals(expectedNormalizedFieldsDatatypes, config.getNormalizedFieldsDatatypes());
Multimap<String, String> expectedCompositeToFieldMap = ArrayListMultimap.create();
expectedCompositeToFieldMap.put("K004", "V");
Assert.assertEquals(expectedCompositeToFieldMap, config.getCompositeToFieldMap());
Map<String, DiscreteIndexType<?>> expectedFieldToDiscreteIndexType = Maps.newHashMap();
expectedFieldToDiscreteIndexType.put("GEO", new GeometryType());
Assert.assertEquals(expectedFieldToDiscreteIndexType, config.getFieldToDiscreteIndexTypes());
Map<String, Date> expectedCompositeTransitionDates = Maps.newHashMap();
expectedCompositeTransitionDates.put("K005", transitionDate);
Assert.assertEquals(expectedCompositeTransitionDates, config.getCompositeTransitionDates());
Map<String, String> expectedCompositeFieldSeparators = Maps.newHashMap();
expectedCompositeFieldSeparators.put("GEO", " ");
Assert.assertEquals(expectedCompositeFieldSeparators, config.getCompositeFieldSeparators());
Assert.assertEquals(Sets.newHashSet("fieldA"), config.getQueryTermFrequencyFields());
Assert.assertEquals(Sets.newHashSet("limitFieldA"), config.getLimitFields());
Map<String, String> expectedHierarchyFieldOptions = Maps.newHashMap();
expectedHierarchyFieldOptions.put("K006", "V");
Assert.assertEquals(expectedHierarchyFieldOptions, config.getHierarchyFieldOptions());
Assert.assertEquals(Lists.newArrayList(DocumentPermutation.class.getName()), config.getDocumentPermutations());
QueryModel expectedQueryModel = new QueryModel();
Assert.assertEquals(expectedQueryModel.getForwardQueryMapping(), config.getQueryModel().getForwardQueryMapping());
Assert.assertEquals(expectedQueryModel.getReverseQueryMapping(), config.getQueryModel().getReverseQueryMapping());
Assert.assertEquals(expectedQueryModel.getUnevaluatedFields(), config.getQueryModel().getUnevaluatedFields());
Assert.assertEquals(Sets.newHashSet(".*", ".*?"), config.getDisallowedRegexPatterns());
// Account for QueryImpl.duplicate() generating a random UUID on the duplicate
QueryImpl expectedQuery = new QueryImpl();
expectedQuery.setId(config.getQuery().getId());
Assert.assertEquals(expectedQuery, config.getQuery());
Assert.assertEquals(Sets.newHashSet("groupFieldA"), config.getGroupFields());
UniqueFields expectedUniqueFields = new UniqueFields();
expectedUniqueFields.put("uniqueFieldA", UniqueGranularity.ALL);
Assert.assertEquals(expectedUniqueFields, config.getUniqueFields());
Assert.assertEquals(Lists.newArrayList("fieldA"), config.getContentFieldNames());
Assert.assertEquals(Sets.newHashSet("NoExpansionFieldA"), config.getNoExpansionFields());
}
Aggregations