use of datawave.data.type.GeometryType in project datawave by NationalSecurityAgency.
the class ExpandCompositeTermsTest method test21.
@Test
public void test21() throws Exception {
ShardQueryConfiguration conf = new ShardQueryConfiguration();
Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
compositeToFieldMap.put("GEO", "GEO");
compositeToFieldMap.put("GEO", "WKT");
conf.setCompositeToFieldMap(compositeToFieldMap);
Map<String, String> compositeToSeparatorMap = new HashMap<>();
compositeToSeparatorMap.put("GEO", ",");
conf.setCompositeFieldSeparators(compositeToSeparatorMap);
Set<String> indexedFields = new HashSet<>();
indexedFields.add("GEO");
conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
String query = "(((((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')))) || ((((_Bounded_ = true) && (GEO >= '030a' && GEO <= '0335')))) || ((((_Bounded_ = true) && (GEO >= '0428' && GEO <= '0483')))) || ((((_Bounded_ = true) && (GEO >= '0500aa' && GEO <= '050355')))) || ((((_Bounded_ = true) && (GEO >= '1f0aaaaaaaaaaaaaaa' && GEO <= '1f36c71c71c71c71c7'))))) && (((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+bE4')))";
String expected = "((((((_Bounded_ = true) && (GEO >= '0202,+AE0' && GEO < '020d,+bE4')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+bE4'))))))) || (((((_Bounded_ = true) && (GEO >= '030a,+AE0' && GEO < '0335,+bE4')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '030a' && GEO <= '0335')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+bE4'))))))) || (((((_Bounded_ = true) && (GEO >= '0428,+AE0' && GEO < '0483,+bE4')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0428' && GEO <= '0483')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+bE4'))))))) || (((((_Bounded_ = true) && (GEO >= '0500aa,+AE0' && GEO < '050355,+bE4')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0500aa' && GEO <= '050355')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+bE4'))))))) || (((((_Bounded_ = true) && (GEO >= '1f0aaaaaaaaaaaaaaa,+AE0' && GEO < '1f36c71c71c71c71c7,+bE4')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '1f0aaaaaaaaaaaaaaa' && GEO <= '1f36c71c71c71c71c7')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+bE4'))))))))";
runTestQuery(query, expected, indexedFields, conf);
}
use of datawave.data.type.GeometryType in project datawave by NationalSecurityAgency.
the class ExpandCompositeTermsTest method test15.
@Test
public void test15() throws Exception {
ShardQueryConfiguration conf = new ShardQueryConfiguration();
Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
compositeToFieldMap.put("GEO", "GEO");
compositeToFieldMap.put("GEO", "WKT_BYTE_LENGTH");
conf.setCompositeToFieldMap(compositeToFieldMap);
Set<String> indexedFields = new HashSet<>();
indexedFields.add("GEO");
conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
String query = "GEO >= '0100' && WKT_BYTE_LENGTH <= '" + Normalizer.NUMBER_NORMALIZER.normalize("12345") + "'";
String expected = "GEO >= '0100' && WKT_BYTE_LENGTH <= '" + Normalizer.NUMBER_NORMALIZER.normalize("12345") + "'";
runTestQuery(query, expected, indexedFields, conf);
}
use of datawave.data.type.GeometryType in project datawave by NationalSecurityAgency.
the class ExpandCompositeTermsTest method test26.
@Test
public void test26() throws Exception {
ShardQueryConfiguration conf = new ShardQueryConfiguration();
Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
compositeToFieldMap.put("GEO", "GEO");
compositeToFieldMap.put("GEO", "WKT");
conf.setCompositeToFieldMap(compositeToFieldMap);
Map<String, String> compositeToSeparatorMap = new HashMap<>();
compositeToSeparatorMap.put("GEO", ",");
conf.setCompositeFieldSeparators(compositeToSeparatorMap);
Set<String> indexedFields = new HashSet<>();
indexedFields.add("GEO");
conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
conf.setBeginDate(new Date(0));
conf.setEndDate(new Date(TimeUnit.DAYS.toMillis(30)));
Map<String, Date> compositeWithOldDataMap = new HashMap<>();
compositeWithOldDataMap.put("GEO", new Date(TimeUnit.DAYS.toMillis(15)));
conf.setCompositeTransitionDates(compositeWithOldDataMap);
String normNum = Normalizer.NUMBER_NORMALIZER.normalize("55");
String query = "(GEO == '0202' || (((_Bounded_ = true) && (GEO >= '030a' && GEO <= '0335')))) && WKT == '" + normNum + "'";
String expected = "((((_Bounded_ = true) && (GEO >= '0202' && GEO <= '0202," + normNum + "')) && ((_Eval_ = true) && (GEO == '0202' && WKT == '" + normNum + "'))) || ((((_Bounded_ = true) && (GEO >= '030a' && GEO <= '0335," + normNum + "')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '030a' && GEO <= '0335')) && WKT == '" + normNum + "')))))";
runTestQuery(query, expected, indexedFields, conf);
}
use of datawave.data.type.GeometryType in project datawave by NationalSecurityAgency.
the class ExpandCompositeTermsTest method test28.
@Test
public void test28() throws Exception {
ShardQueryConfiguration conf = new ShardQueryConfiguration();
Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
compositeToFieldMap.put("GEO", "GEO");
compositeToFieldMap.put("GEO", "WKT");
conf.setCompositeToFieldMap(compositeToFieldMap);
Map<String, String> compositeToSeparatorMap = new HashMap<>();
compositeToSeparatorMap.put("GEO", ",");
conf.setCompositeFieldSeparators(compositeToSeparatorMap);
Set<String> indexedFields = new HashSet<>();
indexedFields.add("GEO");
conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
conf.setBeginDate(new Date(0));
conf.setEndDate(new Date(TimeUnit.DAYS.toMillis(30)));
Map<String, Date> compositeWithOldDataMap = new HashMap<>();
compositeWithOldDataMap.put("GEO", new Date(TimeUnit.DAYS.toMillis(15)));
conf.setCompositeTransitionDates(compositeWithOldDataMap);
String normNum = Normalizer.NUMBER_NORMALIZER.normalize("55");
String query = "(GEO == '0202' || GEO >= '030a') && WKT == '" + normNum + "'";
String expected = "((WKT == '+bE5.5' && GEO >= '030a') || (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '0202,+bE5.5')) && ((_Eval_ = true) && (GEO == '0202' && WKT == '+bE5.5'))))";
runTestQuery(query, expected, indexedFields, conf);
}
use of datawave.data.type.GeometryType in project datawave by NationalSecurityAgency.
the class ExpandCompositeTermsTest method test19.
// Composite Range testing with a normal composite field
@Test
public void test19() throws Exception {
ShardQueryConfiguration conf = new ShardQueryConfiguration();
Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
compositeToFieldMap.put("GEO_WKT", "GEO");
compositeToFieldMap.put("GEO_WKT", "WKT");
conf.setCompositeToFieldMap(compositeToFieldMap);
Map<String, String> compositeToSeparatorMap = new HashMap<>();
compositeToSeparatorMap.put("GEO_WKT", ",");
conf.setCompositeFieldSeparators(compositeToSeparatorMap);
Set<String> indexedFields = new HashSet<>();
indexedFields.add("GEO");
conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
String upperBound = Normalizer.NUMBER_NORMALIZER.normalize("12345");
// COMPOSITE QUERY AGAINST THE COMPOSITE INDEX
// if incrementing/decrementing is an option
// NOTE: Because we are combining two ranges, our bounds will already include some unwanted composite terms.
// Those will be taken care of via a combination of accumulo iterator filtering against the shard index,
// and field index filtering against the field index within the index iterators.
// GE to GE -> GE
// GE to GT -> GT
// GT to GT -> increment base, GT
// GT to GE -> increment base, GE
// GT to EQ -> increment base, GE
// EQ to GT -> GT
// EQ to GE -> GE
// LE to LE -> LE
// LE to LT -> LT
// LT to LT -> decrement base, LT
// LT to LE -> decrement base, LE
// LT to EQ -> decrement base, LE
// EQ to LT -> LT
// EQ to LE -> LE
// NON-COMPOSITE QUERY AGAINST AN OVERLOADED COMPOSITE INDEX
// if incrementing/decrementing is an option
// NOTE: The proposed solutions only work IFF the underlying data is truly a unicode string
// GE -> GE
// GT -> increment base, GE
// LE -> increment base, LT
// LT -> LT
// EQ -> EQ convert to range, lower bound -> inclusive term, upper bound -> exclusive incremented term
// e.g. GEO == '0202'
// GEO >= '0202' && GEO < '0203'
String query, expected;
// GE to GE, use GE
// LE to LE, use LE
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT >= '0202,+AE0' && GEO_WKT <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GE, increment fixed term, use GE
// LE to LE, use LE
query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT >= '0203,+AE0' && GEO_WKT <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GE, use GE
// LT to LE, decrement fixed term, use LE
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT >= '0202,+AE0' && GEO_WKT <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GE, increment fixed term, use GE
// LT to LE, decrement fixed term, use LE
query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT >= '0203,+AE0' && GEO_WKT <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GT, use GT
// LE to LE, use LE
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT > '0202,+AE0' && GEO_WKT <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GT, increment fixed term, use GT
// LE to LE, use LE
query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT > '0203,+AE0' && GEO_WKT <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GT, use GT
// LT to LE, decrement fixed term, use LE
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT > '0202,+AE0' && GEO_WKT <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GT, increment base, use GT
// LT to LE, decrement fixed term, use LE
query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT > '0203,+AE0' && GEO_WKT <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GE, use GE
// LE to LT, use LT
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT >= '0202,+AE0' && GEO_WKT < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GE, increment fixed term, use GE
// LE to LT, use LT
query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT >= '0203,+AE0' && GEO_WKT < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GE, use GE
// LT to LT, decrement fixed term, use LT
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT >= '0202,+AE0' && GEO_WKT < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GE, increment fixed term, use GE
// LT to LT, decrement fixed term, use LT
query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT >= '0203,+AE0' && GEO_WKT < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GT, use GT
// LE to LT, use LT
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT > '0202,+AE0' && GEO_WKT < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GT, increment fixed term, use GT
// LE to LT, use LT
query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT > '0203,+AE0' && GEO_WKT < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GE to GT, use GT
// LT to LT, decrement fixed term, use LT
query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT > '0202,+AE0' && GEO_WKT < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// GT to GT, increment fixed term, use GT
// LT to LT, decrement fixed term, use LT
query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT > '0203,+AE0' && GEO_WKT < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// EQ to GE, use GE
// EQ to LE, use LE
query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT >= '0202,+AE0' && GEO_WKT <= '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// EQ to GE, use GE
// EQ to LT, use LT
query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT >= '0202,+AE0' && GEO_WKT < '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// EQ to GT, use GT
// EQ to LE, use LE
query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT > '0202,+AE0' && GEO_WKT <= '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// EQ to GT, use GT
// EQ to LT, use LT
query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
expected = "((_Bounded_ = true) && (GEO_WKT > '0202,+AE0' && GEO_WKT < '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
runTestQuery(query, expected, indexedFields, conf);
// EQ, for non-overloaded, keep as-is
query = "GEO == '0202'";
expected = "GEO == '0202'";
runTestQuery(query, expected, indexedFields, conf);
// Unbounded range w/ composite term
query = "GEO >= '0202' && WKT < '" + upperBound + "'";
expected = "GEO >= '0202' && WKT < '" + upperBound + "'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO >= '0202' && WKT > '" + upperBound + "'";
expected = "GEO >= '0202' && WKT > '" + upperBound + "'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO <= '0202' && WKT < '" + upperBound + "'";
expected = "GEO <= '0202' && WKT < '" + upperBound + "'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO <= '0202' && WKT > '" + upperBound + "'";
expected = "GEO <= '0202' && WKT > '" + upperBound + "'";
runTestQuery(query, expected, indexedFields, conf);
// Unbounded range w/out composite term
query = "GEO >= '0202'";
expected = "GEO >= '0202'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO > '0202'";
expected = "GEO > '0202'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO <= '0202'";
expected = "GEO <= '0202'";
runTestQuery(query, expected, indexedFields, conf);
query = "GEO < '0202'";
expected = "GEO < '0202'";
runTestQuery(query, expected, indexedFields, conf);
}
Aggregations