use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.
the class DefaultExtendedEdgeQueryLogic method normalizeQualifiedSource.
protected Collection<String> normalizeQualifiedSource(String qualifiedSource) {
int qualifierStart = qualifiedSource.lastIndexOf('<');
String source = qualifiedSource;
String normalizedQualifier = "";
if (qualifierStart > 0) {
source = qualifiedSource.substring(0, qualifierStart);
normalizedQualifier = qualifiedSource.substring(qualifierStart).toLowerCase();
}
Set<String> sources = new LinkedHashSet<>();
List<? extends Type<?>> dataTypes = getDataTypes();
if (dataTypes == null) {
dataTypes = Arrays.asList((Type<?>) new LcNoDiacriticsType());
}
for (Type<?> type : dataTypes) {
try {
String normalizedSource = type.normalize(source);
if (normalizedSource == null || "".equals(normalizedSource.trim())) {
continue;
}
String normalizedQualifiedSource = normalizedSource + normalizedQualifier;
sources.add(normalizedQualifiedSource);
} catch (Exception e) {
// ignore -- couldn't normalize with this normalizer
}
}
return sources;
}
use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.
the class RangeStreamTest method testBothIndexedPrune.
@Test
public void testBothIndexedPrune() throws Exception {
String originalQuery = "(FOO == 'barter' || FOO == 'baggy')";
ASTJexlScript script = JexlASTHelper.parseJexlQuery(originalQuery);
config.setBeginDate(new Date(0));
config.setEndDate(new Date(System.currentTimeMillis()));
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
dataTypes.putAll("NUM", Sets.newHashSet(new NumberType()));
config.setQueryFieldsDatatypes(dataTypes);
config.setIndexedFields(dataTypes);
MockMetadataHelper helper = new MockMetadataHelper();
helper.setIndexedFields(dataTypes.keySet());
Range range1 = makeTestRange("20190314_1", "datatype1\u0000123");
Range range2 = makeTestRange("20190314_1", "datatype1\u0000345");
Range range3 = makeTestRange("20190414_1", "datatype1\u0000123");
Range range4 = makeTestRange("20190414_1", "datatype1\u0000345");
Set<Range> expectedRanges = Sets.newHashSet(range1, range2, range3, range4);
RangeStream rangeStream = new RangeStream(config, new ScannerFactory(config.getConnector(), 1), helper).setLimitScanners(true);
for (QueryPlan queryPlan : rangeStream.streamPlans(script)) {
for (Range range : queryPlan.getRanges()) {
assertTrue("Tried to remove unexpected range " + range.toString() + " from expected ranges: " + expectedRanges.toString(), expectedRanges.remove(range));
}
}
assertTrue("Expected ranges not found in query plan: " + expectedRanges.toString(), expectedRanges.isEmpty());
}
use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.
the class RangeStreamTest method testIntersection_NestedUnionOfLowCardinalityTerm_withSeek.
// A && (B || C)
@Test
public void testIntersection_NestedUnionOfLowCardinalityTerm_withSeek() throws Exception {
String originalQuery = "(FOO == 'highest_card' && (FOO == 'low_card' || FOO == 'lowest_card'))";
ASTJexlScript script = JexlASTHelper.parseJexlQuery(originalQuery);
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
config.setBeginDate(sdf.parse("20190310"));
config.setEndDate(sdf.parse("20190320"));
config.setDatatypeFilter(Sets.newHashSet("datatype1", "datatype2"));
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
dataTypes.putAll("LAUGH", Sets.newHashSet(new LcNoDiacriticsType()));
config.setQueryFieldsDatatypes(dataTypes);
config.setIndexedFields(dataTypes);
MockMetadataHelper helper = new MockMetadataHelper();
helper.setIndexedFields(dataTypes.keySet());
Range range1 = makeTestRange("20190310_1", "datatype1\u0000a.b.c");
Range range2 = makeTestRange("20190312_1", "datatype1\u0000a.b.c");
Range range3 = makeTestRange("20190314_22", "datatype1\u0000a.b.c");
Range range4 = makeTestRange("20190315_33", "datatype1\u0000a.b.c");
Range range5 = makeTestRange("20190315_49", "datatype1\u0000a.b.c");
Range range6 = makeTestRange("20190317_1", "datatype1\u0000a.b.c");
Set<Range> expectedRanges = Sets.newHashSet(range1, range2, range3, range4, range5, range6);
RangeStream rangeStream = new RangeStream(config, new ScannerFactory(config.getConnector(), 1), helper);
rangeStream.setLimitScanners(true);
CloseableIterable<QueryPlan> queryPlans = rangeStream.streamPlans(script);
assertEquals(IndexStream.StreamContext.PRESENT, rangeStream.context());
for (QueryPlan queryPlan : queryPlans) {
Iterable<Range> ranges = queryPlan.getRanges();
for (Range range : ranges) {
assertTrue("Tried to remove unexpected range " + range.toString() + "\nfrom expected ranges: " + expectedRanges.toString(), expectedRanges.remove(range));
}
}
assertTrue("Expected ranges not found in query plan: " + expectedRanges.toString(), expectedRanges.isEmpty());
}
use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.
the class RangeStreamTest method testIntersection_ofDayRangesAndShardRange.
// A && B when A term is day ranges and B term is a single shard range within the last day.
@Test
public void testIntersection_ofDayRangesAndShardRange() throws Exception {
String originalQuery = "FOO == 'day_ranges' && FOO == 'shard_range'";
ASTJexlScript script = JexlASTHelper.parseJexlQuery(originalQuery);
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
// config.setBeginDate(new Date(0));
config.setBeginDate(sdf.parse("20190310"));
config.setEndDate(sdf.parse("20190320"));
config.setDatatypeFilter(Sets.newHashSet("datatype1", "datatype2"));
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
dataTypes.putAll("LAUGH", Sets.newHashSet(new LcNoDiacriticsType()));
config.setQueryFieldsDatatypes(dataTypes);
config.setIndexedFields(dataTypes);
config.setShardsPerDayThreshold(2);
MockMetadataHelper helper = new MockMetadataHelper();
helper.setIndexedFields(dataTypes.keySet());
// Create expected ranges verbosely, so it is obvious which shards contribute to the results.
Range range1 = makeTestRange("20190310_21", "datatype1\u0000a.b.c");
// Fun story. It's hard to roll up to a day range when you seek most of the way through the day and don't have all the shards for the day.
// Range range2 = makeTestRange("20190315_51", "datatype1\u0000a.b.c");
Set<Range> expectedRanges = Sets.newHashSet(range1);
RangeStream rangeStream = new RangeStream(config, new ScannerFactory(config.getConnector(), 1), helper);
rangeStream.setLimitScanners(true);
CloseableIterable<QueryPlan> queryPlans = rangeStream.streamPlans(script);
assertEquals(IndexStream.StreamContext.PRESENT, rangeStream.context());
for (QueryPlan queryPlan : queryPlans) {
Iterable<Range> ranges = queryPlan.getRanges();
for (Range range : ranges) {
assertTrue("Tried to remove unexpected range " + range.toString() + "\nfrom expected ranges: " + expectedRanges.toString(), expectedRanges.remove(range));
}
}
assertTrue("Expected ranges not found in query plan: " + expectedRanges.toString(), expectedRanges.isEmpty());
}
use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.
the class RangeStreamTest method testNonExistentFieldInAnd.
@Test
public void testNonExistentFieldInAnd() throws Exception {
String originalQuery = "FOO == 'bag' && CANDY_TYPE == 'candy corn'";
ASTJexlScript script = JexlASTHelper.parseJexlQuery(originalQuery);
config.setBeginDate(new Date(0));
config.setEndDate(new Date(System.currentTimeMillis()));
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
dataTypes.putAll("CANDY_TYPE", Sets.newHashSet(new LcNoDiacriticsType()));
config.setQueryFieldsDatatypes(dataTypes);
config.setIndexedFields(dataTypes);
MockMetadataHelper helper = new MockMetadataHelper();
helper.setIndexedFields(dataTypes.keySet());
RangeStream rangeStream = new RangeStream(config, new ScannerFactory(config.getConnector()), helper).setLimitScanners(true);
rangeStream.streamPlans(script);
// streamPlans(script) to populate the StreamContext.
assertEquals(IndexStream.StreamContext.PRESENT, rangeStream.context());
assertFalse(rangeStream.iterator().hasNext());
}
Aggregations