use of datawave.query.config.ShardQueryConfiguration in project datawave by NationalSecurityAgency.
the class ExpandCompositeTermsTest method test13.
@Test
public void test13() throws Exception {
ShardQueryConfiguration conf = new ShardQueryConfiguration();
Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
compositeToFieldMap.put("GEO", "GEO");
compositeToFieldMap.put("GEO", "WKT_BYTE_LENGTH");
conf.setCompositeToFieldMap(compositeToFieldMap);
Map<String, String> compositeToSeparatorMap = new HashMap<>();
compositeToSeparatorMap.put("GEO", ",");
conf.setCompositeFieldSeparators(compositeToSeparatorMap);
Set<String> indexedFields = new HashSet<>();
indexedFields.add("GEO");
conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
String query = "((_Bounded_ = true) && (GEO >= '0100' && GEO <= '0103')) && WKT_BYTE_LENGTH >= '" + Normalizer.NUMBER_NORMALIZER.normalize("0") + "'";
String expected = "((_Bounded_ = true) && (GEO >= '0100,+AE0' && GEO < '0104')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0100' && GEO <= '0103')) && WKT_BYTE_LENGTH >= '+AE0'))";
runTestQuery(query, expected, indexedFields, conf);
}
use of datawave.query.config.ShardQueryConfiguration in project datawave by NationalSecurityAgency.
the class ExpandCompositeTermsTest method test18b.
// Composite Range testing with an overloaded composite field against legacy data
@Test
public void test18b() throws Exception {
ShardQueryConfiguration conf = new ShardQueryConfiguration();
conf.setBeginDate(new Date(0));
conf.setEndDate(new Date(TimeUnit.DAYS.toMillis(30)));
Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
compositeToFieldMap.put("GEO", "GEO");
compositeToFieldMap.put("GEO", "WKT");
conf.setCompositeToFieldMap(compositeToFieldMap);
Map<String, String> compositeToSeparatorMap = new HashMap<>();
compositeToSeparatorMap.put("GEO", ",");
conf.setCompositeFieldSeparators(compositeToSeparatorMap);
Set<String> indexedFields = new HashSet<>();
indexedFields.add("GEO");
conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
Map<String, Date> compositeWithOldDataMap = new HashMap<>();
compositeWithOldDataMap.put("GEO", new Date(TimeUnit.DAYS.toMillis(15)));
conf.setCompositeTransitionDates(compositeWithOldDataMap);
String upperBound = Normalizer.NUMBER_NORMALIZER.normalize("12345");
// COMPOSITE QUERY AGAINST THE COMPOSITE INDEX
// if incrementing/decrementing is an option
// NOTE: Because we are combining two ranges, our bounds will already include some unwanted composite terms.
// Those will be taken care of via a combination of accumulo iterator filtering against the shard index,
// and field index filtering against the field index within the index iterators.
// GE to GE -> GE
// GE to GT -> GT
// GT to GT -> increment base, GT
// GT to GE -> increment base, GE
// GT to EQ -> increment base, GE
// EQ to GT -> GT
// EQ to GE -> GE
// LE to LE -> LE
// LE to LT -> LT
// LT to LT -> decrement base, LT
// LT to LE -> decrement base, LE
// LT to EQ -> decrement base, LE
// EQ to LT -> LT
// EQ to LE -> LE
// NON-COMPOSITE QUERY AGAINST AN OVERLOADED COMPOSITE INDEX
// if incrementing/decrementing is an option
// NOTE: The proposed solutions only work IFF the underlying data is truly a unicode string
// GE -> GE
// GT -> increment base, GE
// LE -> increment base, LT
// LT -> LT
// EQ -> EQ convert to range, lower bound -> inclusive term, upper bound -> exclusive incremented term
// e.g. GEO == '0202'
// GEO >= '0202' && GEO < '0203'
String query, expected;
// GE to GE, use GE
// LE to LE, use LE
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GE, increment fixed term, use GE
// LE to LE, use LE
query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GE, use GE
// LT to LE, decrement fixed term, use LE
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GE, increment fixed term, use GE
// LT to LE, decrement fixed term, use LE
query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GT, use GT
// LE to LE, use LE
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GT, increment fixed term, use GT
// LE to LE, use LE
query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GT, use GT
// LT to LE, decrement fixed term, use LE
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GT, increment base, use GT
// LT to LE, decrement fixed term, use LE
query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GE, use GE
// LE to LT, use LT
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GE, increment fixed term, use GE
// LE to LT, use LT
query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GE, use GE
// LT to LT, decrement fixed term, use LT
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GE, increment fixed term, use GE
// LT to LT, decrement fixed term, use LT
query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GT, use GT
// LE to LT, use LT
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GT, increment fixed term, use GT
// LE to LT, use LT
query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GT, use GT
// LT to LT, decrement fixed term, use LT
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GT, increment fixed term, use GT
// LT to LT, decrement fixed term, use LT
query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// EQ to GE, use GE
// EQ to LE, use LE
query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// EQ to GE, use GE
// EQ to LT, use LT
query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// EQ to GT, use GT
// EQ to LE, use LE
query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// EQ to GT, use GT
// EQ to LT, use LT
query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// EQ, convert to range [keep base - use GE, increment base - use LT]
query = "GEO == '0202'";
expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '0203'))";
runTestQuery(query, expected, indexedFields, conf);
// Unbounded range w/ composite term
query = "GEO >= '0202' && WKT < '" + upperBound + "'";
expected = "GEO >= '0202' && WKT < '" + upperBound + "'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO >= '0202' && WKT > '" + upperBound + "'";
expected = "GEO >= '0202' && WKT > '" + upperBound + "'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO <= '0202' && WKT < '" + upperBound + "'";
expected = "GEO <= '0202' && WKT < '" + upperBound + "'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO <= '0202' && WKT > '" + upperBound + "'";
expected = "GEO <= '0202' && WKT > '" + upperBound + "'";
runTestQuery(query, expected, indexedFields, conf);
// Unbounded range w/out composite term
query = "GEO >= '0202'";
expected = "GEO >= '0202'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO > '0202'";
expected = "GEO > '0202'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO <= '0202'";
expected = "GEO <= '0202'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO < '0202'";
expected = "GEO < '0202'";
runTestQuery(query, expected, indexedFields, conf);
}
use of datawave.query.config.ShardQueryConfiguration in project datawave by NationalSecurityAgency.
the class ExceededOrThresholdMarkerJexlNodeTest method getResultsIterator.
private Iterator getResultsIterator(String queryString, ShardQueryLogic logic) throws Exception {
MultivaluedMap<String, String> params = new MultivaluedMapImpl<>();
params.putSingle(QUERY_LOGIC_NAME, "EventQuery");
params.putSingle(QUERY_STRING, queryString);
params.putSingle(QUERY_NAME, "geoQuery");
params.putSingle(QUERY_PERSISTENCE, "PERSISTENT");
params.putSingle(QUERY_AUTHORIZATIONS, AUTHS);
params.putSingle(QUERY_EXPIRATION, "20200101 000000.000");
params.putSingle(QUERY_BEGIN, BEGIN_DATE);
params.putSingle(QUERY_END, END_DATE);
QueryParameters queryParams = new QueryParametersImpl();
queryParams.validate(params);
Set<Authorizations> auths = new HashSet<>();
auths.add(new Authorizations(AUTHS));
Query query = new QueryImpl();
query.initialize(USER, Arrays.asList(USER_DN), null, queryParams, null);
ShardQueryConfiguration config = ShardQueryConfiguration.create(logic, query);
logic.initialize(config, instance.getConnector("root", PASSWORD), query, auths);
logic.setupQuery(config);
return logic.getTransformIterator(query);
}
use of datawave.query.config.ShardQueryConfiguration in project datawave by NationalSecurityAgency.
the class DateIndexQueryExpansionVisitorTest method assertExpansion.
private void assertExpansion(String original, String expected) throws ParseException {
ASTJexlScript originalScript = JexlASTHelper.parseJexlQuery(original);
ShardQueryConfiguration config = new ShardQueryConfiguration();
config.setBeginDate(startDate);
config.setEndDate(endDate);
ASTJexlScript result = FunctionIndexQueryExpansionVisitor.expandFunctions(config, metadataHelper, dateIndexHelper, originalScript);
JexlNodeAssert.assertThat(result).isEqualTo(expected).hasValidLineage();
JexlNodeAssert.assertThat(originalScript).isEqualTo(original).hasValidLineage();
}
use of datawave.query.config.ShardQueryConfiguration in project datawave by NationalSecurityAgency.
the class GeoWavePruningVisitorTest method testNonIntersectingTermIsPruned.
@Test
public void testNonIntersectingTermIsPruned() throws ParseException {
String function = "geowave:intersects(GEO_FIELD, 'POLYGON((10 10, 20 10, 20 20, 10 20, 10 10))')";
// Get the expanded geowave terms.
String indexQuery = convertFunctionToIndexQuery(function, new ShardQueryConfiguration());
// Add a term that should be pruned.
String query = function + " && (GEO_FIELD == '0100' || " + indexQuery + ")";
String expected = function + " && (false || " + indexQuery + ")";
Multimap<String, String> expectedPrunedTerms = HashMultimap.create();
expectedPrunedTerms.put("GEO_FIELD", "0100");
assertResult(query, expected, expectedPrunedTerms);
}
Aggregations