use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.
the class ExpandMultiNormalizedTermsTest method testBoundedUnNormalizedBoundsCase.
@Test
public void testBoundedUnNormalizedBoundsCase() throws ParseException {
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.put("NEW", new LcNoDiacriticsType());
helper.setIndexedFields(dataTypes.keySet());
config.setQueryFieldsDatatypes(dataTypes);
String original = "NEW == 'boo' && ((_Bounded_ = true) && (NEW > '1' && NEW < '10'))";
String expected = "NEW == 'boo' && ((_Bounded_ = true) && (NEW > '1' && NEW < '10'))";
expandTerms(original, expected);
}
use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.
the class FetchDataTypesVisitorTest method setup.
@BeforeClass
public static void setup() {
// 1. Configure the ShardQueryConfig
config.setBeginDate(new Date(0));
config.setEndDate(new Date(System.currentTimeMillis()));
// 2. Configure the MockMetadataHelper
helper.addNormalizers("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
helper.addNormalizers("FOO2", Sets.newHashSet(new LcType()));
helper.addNormalizers("FOO3", Sets.newHashSet(new NumberType()));
helper.addNormalizers("FOO4", Sets.newHashSet(new LcType(), new LcNoDiacriticsType()));
}
use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.
the class FetchDataTypesVisitor method genericVisit.
/**
* Use the MetadataHelper to fetch the Set<Type>'s for each field specified in a query term. Handle the case of spoofing the NumberType for fields
* which are numeric but not indexed.
*
* @param node
* @param data
* @return
*/
private Object genericVisit(JexlNode node, Object data) {
HashMultimap<String, Type<?>> dataTypes = (HashMultimap<String, Type<?>>) data;
JexlASTHelper.IdentifierOpLiteral op = JexlASTHelper.getIdentifierOpLiteral(node);
if (op == null) {
return dataTypes;
}
final String fieldName = op.deconstructIdentifier();
if (!dataTypes.containsKey(fieldName)) {
Set<Type<?>> dataTypesForField = Collections.emptySet();
try {
if (useCache) {
Tuple2<String, Set<String>> cacheKey = new Tuple2<>(fieldName, datatypeFilter);
Set<Type<?>> types = typeCache.getIfPresent(cacheKey);
if (null == types) {
dataTypesForField = this.helper.getDatatypesForField(fieldName, datatypeFilter);
typeCache.put(cacheKey, dataTypesForField);
} else {
dataTypesForField = types;
}
} else
dataTypesForField = this.helper.getDatatypesForField(fieldName, datatypeFilter);
} catch (InstantiationException | TableNotFoundException | IllegalAccessException e) {
log.error(e);
}
if (!dataTypesForField.isEmpty()) {
dataTypes.putAll(fieldName, dataTypesForField);
} else {
if (op.getLiteralValue() instanceof Number) {
// This is a hack to get around the following case:
// 1) A user enters a query with a numeric term on a field
// that isn't indexed: e.g. AGE < 4
// 2) To get proper comparisons during evaluation, the
// NumberType needs to be
// set on the AGE, otherwise lexicographic comparisons will
// occur.
// This causes a problem though, because the
// RangeBuildingVisitor thinks that
// AGE is indexed, it is not, so it incorrectly fails
// queries where "AGE < 4" is
// intersected with an indexed field.
// If this is unindexed (no normalizers for it) and the
// literal is a Number
dataTypes.put(fieldName, new NumberType());
if (log.isTraceEnabled()) {
log.trace("Unindexed numeric field, adding NumberType for " + fieldName);
}
} else {
// add LcNoDiacritics and NoOpType to ensure that we
// query against both forms of the string
dataTypes.put(fieldName, new LcNoDiacriticsType());
dataTypes.put(fieldName, new NoOpType());
if (log.isTraceEnabled()) {
log.trace("Unindexed field, adding LcNoDiacriticsType and NoOpType for " + fieldName);
}
}
}
}
return dataTypes;
}
use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.
the class QueryModelVisitorTest method testAppliedModelWithNullNoFail.
@Test
public void testAppliedModelWithNullNoFail() throws ParseException {
model.addTermToModel("FOO1", "BAR1");
model.addTermToModel("OTHER", "9_2");
String original = "FOO1 == 'baz' and OTHER == null";
ASTJexlScript groomed = JexlASTHelper.InvertNodeVisitor.invertSwappedNodes(JexlASTHelper.parseJexlQuery(original));
String expected = "BAR1 == 'baz' and $9_2 == null";
ASTJexlScript actualScript = assertResult(JexlStringBuildingVisitor.buildQuery(groomed), expected);
MockMetadataHelper helper = new MockMetadataHelper();
helper.addNormalizers("FOO1", Sets.newHashSet(new LcNoDiacriticsType()));
Multimap<String, String> maps = ArrayListMultimap.create();
maps.put("9_2", "datatype1");
helper.addFieldsToDatatypes(maps);
Multimap<String, Type<?>> types = FetchDataTypesVisitor.fetchDataTypes(helper, Collections.singleton("datatype1"), actualScript);
assertEquals(types.size(), 4);
assertTrue(types.values().stream().allMatch((o) -> o instanceof LcNoDiacriticsType || o instanceof NoOpType));
}
use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.
the class ExpandMultiNormalizedTermsTest method testMixedCaseWithNumber.
@Test
public void testMixedCaseWithNumber() throws ParseException {
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.put("NUM", new NumberType());
dataTypes.putAll("NAME", Sets.newHashSet(new LcNoDiacriticsType(), new NoOpType()));
helper.setIndexedFields(dataTypes.keySet());
helper.setIndexOnlyFields(dataTypes.keySet());
helper.addTermFrequencyFields(dataTypes.keySet());
config.setQueryFieldsDatatypes(dataTypes);
String original = "(NAME == 'Alice' || NAME == 'BOB') && NUM < '1'";
String expected = "((NAME == 'Alice' || NAME == 'alice') || (NAME == 'bob' || NAME == 'BOB')) && NUM < '+aE1'";
expandTerms(original, expected);
}
Aggregations