use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.
the class FetchDataTypesVisitor method genericVisit.
/**
* Use the MetadataHelper to fetch the Set<Type>'s for each field specified in a query term. Handle the case of spoofing the NumberType for fields
* which are numeric but not indexed.
*
* @param node
* @param data
* @return
*/
private Object genericVisit(JexlNode node, Object data) {
HashMultimap<String, Type<?>> dataTypes = (HashMultimap<String, Type<?>>) data;
JexlASTHelper.IdentifierOpLiteral op = JexlASTHelper.getIdentifierOpLiteral(node);
if (op == null) {
return dataTypes;
}
final String fieldName = op.deconstructIdentifier();
if (!dataTypes.containsKey(fieldName)) {
Set<Type<?>> dataTypesForField = Collections.emptySet();
try {
if (useCache) {
Tuple2<String, Set<String>> cacheKey = new Tuple2<>(fieldName, datatypeFilter);
Set<Type<?>> types = typeCache.getIfPresent(cacheKey);
if (null == types) {
dataTypesForField = this.helper.getDatatypesForField(fieldName, datatypeFilter);
typeCache.put(cacheKey, dataTypesForField);
} else {
dataTypesForField = types;
}
} else
dataTypesForField = this.helper.getDatatypesForField(fieldName, datatypeFilter);
} catch (InstantiationException | TableNotFoundException | IllegalAccessException e) {
log.error(e);
}
if (!dataTypesForField.isEmpty()) {
dataTypes.putAll(fieldName, dataTypesForField);
} else {
if (op.getLiteralValue() instanceof Number) {
// This is a hack to get around the following case:
// 1) A user enters a query with a numeric term on a field
// that isn't indexed: e.g. AGE < 4
// 2) To get proper comparisons during evaluation, the
// NumberType needs to be
// set on the AGE, otherwise lexicographic comparisons will
// occur.
// This causes a problem though, because the
// RangeBuildingVisitor thinks that
// AGE is indexed, it is not, so it incorrectly fails
// queries where "AGE < 4" is
// intersected with an indexed field.
// If this is unindexed (no normalizers for it) and the
// literal is a Number
dataTypes.put(fieldName, new NumberType());
if (log.isTraceEnabled()) {
log.trace("Unindexed numeric field, adding NumberType for " + fieldName);
}
} else {
// add LcNoDiacritics and NoOpType to ensure that we
// query against both forms of the string
dataTypes.put(fieldName, new LcNoDiacriticsType());
dataTypes.put(fieldName, new NoOpType());
if (log.isTraceEnabled()) {
log.trace("Unindexed field, adding LcNoDiacriticsType and NoOpType for " + fieldName);
}
}
}
}
return dataTypes;
}
use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.
the class ExpandMultiNormalizedTermsTest method testMixedCaseWithNumber.
@Test
public void testMixedCaseWithNumber() throws ParseException {
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.put("NUM", new NumberType());
dataTypes.putAll("NAME", Sets.newHashSet(new LcNoDiacriticsType(), new NoOpType()));
helper.setIndexedFields(dataTypes.keySet());
helper.setIndexOnlyFields(dataTypes.keySet());
helper.addTermFrequencyFields(dataTypes.keySet());
config.setQueryFieldsDatatypes(dataTypes);
String original = "(NAME == 'Alice' || NAME == 'BOB') && NUM < '1'";
String expected = "((NAME == 'Alice' || NAME == 'alice') || (NAME == 'bob' || NAME == 'BOB')) && NUM < '+aE1'";
expandTerms(original, expected);
}
use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.
the class ExpandMultiNormalizedTermsTest method testNumber.
@Test
public void testNumber() throws ParseException {
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.putAll("MULTI", Sets.newHashSet(new LcNoDiacriticsType(), new NumberType()));
dataTypes.put("NUM", new NumberType());
helper.setIndexedFields(dataTypes.keySet());
helper.setIndexOnlyFields(dataTypes.keySet());
helper.addTermFrequencyFields(dataTypes.keySet());
config.setQueryFieldsDatatypes(dataTypes);
String original = "NUM == '1'";
String expected = "NUM == '+aE1'";
expandTerms(original, expected);
original = "MULTI == '1'";
expected = "(MULTI == '1' || MULTI == '+aE1')";
expandTerms(original, expected);
}
use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.
the class ExpandMultiNormalizedTermsTest method testFilterFunctionNormalizationWithIndexedNumeric.
@Test
public void testFilterFunctionNormalizationWithIndexedNumeric() throws ParseException {
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.put("NUM", new NumberType());
config.setQueryFieldsDatatypes(dataTypes);
String original = "filter:includeRegex(NUM, '1')";
expandTerms(original, original);
}
use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.
the class GroupingTransform method flush.
@Override
public Entry<Key, Document> flush() {
if (!countingMap.isEmpty()) {
log.trace("flush will use the countingMap: {}", countingMap);
for (Collection<GroupingTypeAttribute<?>> entry : countingMap.keySet()) {
log.trace("from countingMap, got entry: {}", entry);
ColumnVisibility columnVisibility = null;
try {
columnVisibility = toColumnVisibility(fieldVisibilities.get(entry));
} catch (Exception e) {
throw new IllegalStateException("Unable to merge column visibilities: " + fieldVisibilities.get(entry), e);
}
// grab a key from those saved during getListKeyCounts
Assert.notEmpty(keys, "no available keys for grouping results");
// use the last (most recent) key so a new iterator will know where to start
Key docKey = keys.get(keys.size() - 1);
Document d = new Document(docKey, true);
d.setColumnVisibility(columnVisibility);
entry.forEach(base -> d.put(getFieldName(base), base));
NumberType type = new NumberType();
type.setDelegate(new BigDecimal(countingMap.get(entry)));
TypeAttribute<BigDecimal> attr = new TypeAttribute<>(type, new Key("count"), true);
d.put("COUNT", attr);
documents.add(d);
}
if (flatten) {
// flatten to just one document on the tserver.
flatten(documents);
}
}
if (!documents.isEmpty()) {
log.trace("{} will flush first of {} documents: {}", this.hashCode(), documents.size(), documents);
Document d = documents.pop();
Key key;
if (keys.size() > 0 && flatten) {
// use the last (most recent) key so a new iterator will know where to start
key = keys.get(keys.size() - 1);
} else {
key = d.getMetadata();
}
Entry<Key, Document> entry = Maps.immutableEntry(key, d);
log.trace("flushing out {}", entry);
countingMap.clear();
return entry;
}
return null;
}
Aggregations