Search in sources :

Example 16 with GeometryType

use of datawave.data.type.GeometryType in project datawave by NationalSecurityAgency.

the class ExpandCompositeTermsTest method test18a.

// Composite Range testing with an overloaded composite field
@Test
public void test18a() throws Exception {
    ShardQueryConfiguration conf = new ShardQueryConfiguration();
    Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
    compositeToFieldMap.put("GEO", "GEO");
    compositeToFieldMap.put("GEO", "WKT");
    conf.setCompositeToFieldMap(compositeToFieldMap);
    Map<String, String> compositeToSeparatorMap = new HashMap<>();
    compositeToSeparatorMap.put("GEO", ",");
    conf.setCompositeFieldSeparators(compositeToSeparatorMap);
    Set<String> indexedFields = new HashSet<>();
    indexedFields.add("GEO");
    conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
    String upperBound = Normalizer.NUMBER_NORMALIZER.normalize("12345");
    // COMPOSITE QUERY AGAINST THE COMPOSITE INDEX
    // if incrementing/decrementing is an option
    // NOTE: Because we are combining two ranges, our bounds will already include some unwanted composite terms.
    // Those will be taken care of via a combination of accumulo iterator filtering against the shard index,
    // and field index filtering against the field index within the index iterators.
    // GE to GE -> GE
    // GE to GT -> GT
    // GT to GT -> increment base, GT
    // GT to GE -> increment base, GE
    // GT to EQ -> increment base, GE
    // EQ to GT -> GT
    // EQ to GE -> GE
    // LE to LE -> LE
    // LE to LT -> LT
    // LT to LT -> decrement base, LT
    // LT to LE -> decrement base, LE
    // LT to EQ -> decrement base, LE
    // EQ to LT -> LT
    // EQ to LE -> LE
    // NON-COMPOSITE QUERY AGAINST AN OVERLOADED COMPOSITE INDEX
    // if incrementing/decrementing is an option
    // NOTE: The proposed solutions only work IFF the underlying data is truly a unicode string
    // GE -> GE
    // GT -> increment base, GE
    // LE -> increment base, LT
    // LT -> LT
    // EQ -> EQ convert to range, lower bound -> inclusive term, upper bound -> exclusive incremented term
    // e.g. GEO == '0202'
    // GEO >= '0202' && GEO < '0203'
    String query, expected;
    // GE to GE, use GE
    // LE to LE, use LE
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202,+AE0' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GE, increment fixed term, use GE
    // LE to LE, use LE
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203,+AE0' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GE, use GE
    // LT to LE, decrement fixed term, use LE
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202,+AE0' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GE, increment fixed term, use GE
    // LT to LE, decrement fixed term, use LE
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d') )&& ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203,+AE0' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GT, use GT
    // LE to LE, use LE
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO > '0202,+AE0' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GT, increment fixed term, use GT
    // LE to LE, use LE
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO > '0203,+AE0' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GT, use GT
    // LT to LE, decrement fixed term, use LE
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO > '0202,+AE0' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GT, increment base, use GT
    // LT to LE, decrement fixed term, use LE
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO > '0203,+AE0' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GE, use GE
    // LE to LT, use LT
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202,+AE0' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GE, increment fixed term, use GE
    // LE to LT, use LT
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203,+AE0' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GE, use GE
    // LT to LT, decrement fixed term, use LT
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202,+AE0' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GE, increment fixed term, use GE
    // LT to LT, decrement fixed term, use LT
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203,+AE0' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GT, use GT
    // LE to LT, use LT
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO > '0202,+AE0' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GT, increment fixed term, use GT
    // LE to LT, use LT
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO > '0203,+AE0' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GT, use GT
    // LT to LT, decrement fixed term, use LT
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO > '0202,+AE0' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GT, increment fixed term, use GT
    // LT to LT, decrement fixed term, use LT
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO > '0203,+AE0' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // EQ to GE, use GE
    // EQ to LE, use LE
    query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202,+AE0' && GEO <= '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // EQ to GE, use GE
    // EQ to LT, use LT
    query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202,+AE0' && GEO < '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // EQ to GT, use GT
    // EQ to LE, use LE
    query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO > '0202,+AE0' && GEO <= '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // EQ to GT, use GT
    // EQ to LT, use LT
    query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO > '0202,+AE0' && GEO < '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // EQ, convert to range [keep base - use GE, increment base - use LT]
    query = "GEO == '0202'";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '0203'))";
    runTestQuery(query, expected, indexedFields, conf);
    // Unbounded range w/ composite term
    query = "GEO >= '0202' && WKT < '" + upperBound + "'";
    expected = "GEO >= '0202' && WKT < '" + upperBound + "'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO >= '0202' && WKT > '" + upperBound + "'";
    expected = "GEO >= '0202' && WKT > '" + upperBound + "'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO <= '0202' && WKT < '" + upperBound + "'";
    expected = "GEO <= '0202' && WKT < '" + upperBound + "'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO <= '0202' && WKT > '" + upperBound + "'";
    expected = "GEO <= '0202' && WKT > '" + upperBound + "'";
    runTestQuery(query, expected, indexedFields, conf);
    // Unbounded range w/out composite term
    query = "GEO >= '0202'";
    expected = "GEO >= '0202'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO > '0202'";
    expected = "GEO > '0202'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO <= '0202'";
    expected = "GEO <= '0202'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO < '0202'";
    expected = "GEO < '0202'";
    runTestQuery(query, expected, indexedFields, conf);
}
Also used : GeometryType(datawave.data.type.GeometryType) HashMap(java.util.HashMap) ShardQueryConfiguration(datawave.query.config.ShardQueryConfiguration) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 17 with GeometryType

use of datawave.data.type.GeometryType in project datawave by NationalSecurityAgency.

the class ExpandCompositeTermsTest method test24.

@Test
public void test24() throws Exception {
    ShardQueryConfiguration conf = new ShardQueryConfiguration();
    Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
    compositeToFieldMap.put("GEO_WKT", "GEO");
    compositeToFieldMap.put("GEO_WKT", "WKT");
    conf.setCompositeToFieldMap(compositeToFieldMap);
    Set<String> indexedFields = new HashSet<>();
    indexedFields.add("GEO");
    conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
    String query = "(((((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')))) || ((((_Bounded_ = true) && (GEO >= '030a' && GEO <= '0335')))) || ((((_Bounded_ = true) && (GEO >= '0428' && GEO <= '0483')))) || ((((_Bounded_ = true) && (GEO >= '0500aa' && GEO <= '050355')))) || ((((_Bounded_ = true) && (GEO >= '1f0aaaaaaaaaaaaaaa' && GEO <= '1f36c71c71c71c71c7')))))";
    String expected = "(((((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')))) || ((((_Bounded_ = true) && (GEO >= '030a' && GEO <= '0335')))) || ((((_Bounded_ = true) && (GEO >= '0428' && GEO <= '0483')))) || ((((_Bounded_ = true) && (GEO >= '0500aa' && GEO <= '050355')))) || ((((_Bounded_ = true) && (GEO >= '1f0aaaaaaaaaaaaaaa' && GEO <= '1f36c71c71c71c71c7')))))";
    runTestQuery(query, expected, indexedFields, conf);
}
Also used : GeometryType(datawave.data.type.GeometryType) ShardQueryConfiguration(datawave.query.config.ShardQueryConfiguration) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 18 with GeometryType

use of datawave.data.type.GeometryType in project datawave by NationalSecurityAgency.

the class ExpandCompositeTermsTest method test12.

@Test
public void test12() throws Exception {
    ShardQueryConfiguration conf = new ShardQueryConfiguration();
    Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
    compositeToFieldMap.put("GEO", "GEO");
    compositeToFieldMap.put("GEO", "WKT_BYTE_LENGTH");
    conf.setCompositeToFieldMap(compositeToFieldMap);
    Map<String, String> compositeToSeparatorMap = new HashMap<>();
    compositeToSeparatorMap.put("GEO", ",");
    conf.setCompositeFieldSeparators(compositeToSeparatorMap);
    Set<String> indexedFields = new HashSet<>();
    indexedFields.add("GEO");
    conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
    String query = "(((_Bounded_ = true) && (GEO >= '1f0155640000000000' && GEO <= '1f01556bffffffffff')) || GEO == '00' || ((_Bounded_ = true) && (GEO >= '0100' && GEO <= '0103'))) && ((_Bounded_ = true) && (WKT_BYTE_LENGTH >= '" + Normalizer.NUMBER_NORMALIZER.normalize("0") + "' && WKT_BYTE_LENGTH <= '" + Normalizer.NUMBER_NORMALIZER.normalize("12345") + "'))";
    String expected = "((((_Bounded_ = true) && (GEO >= '1f0155640000000000,+AE0' && GEO <= '1f01556bffffffffff,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '1f0155640000000000' && GEO <= '1f01556bffffffffff')) && ((_Bounded_ = true) && (WKT_BYTE_LENGTH >= '+AE0' && WKT_BYTE_LENGTH <= '+eE1.2345'))))) || (((_Bounded_ = true) && (GEO >= '00,+AE0' && GEO <= '00,+eE1.2345')) && ((_Eval_ = true) && (GEO == '00' && ((_Bounded_ = true) && (WKT_BYTE_LENGTH >= '+AE0' && WKT_BYTE_LENGTH <= '+eE1.2345'))))) || (((_Bounded_ = true) && (GEO >= '0100,+AE0' && GEO <= '0103,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0100' && GEO <= '0103')) && ((_Bounded_ = true) && (WKT_BYTE_LENGTH >= '+AE0' && WKT_BYTE_LENGTH <= '+eE1.2345'))))))";
    runTestQuery(query, expected, indexedFields, conf);
}
Also used : GeometryType(datawave.data.type.GeometryType) HashMap(java.util.HashMap) ShardQueryConfiguration(datawave.query.config.ShardQueryConfiguration) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

GeometryType (datawave.data.type.GeometryType)18 Test (org.junit.Test)18 ShardQueryConfiguration (datawave.query.config.ShardQueryConfiguration)17 HashSet (java.util.HashSet)17 HashMap (java.util.HashMap)14 Date (java.util.Date)4 DateType (datawave.data.type.DateType)1 DiscreteIndexType (datawave.data.type.DiscreteIndexType)1 NoOpType (datawave.data.type.NoOpType)1 StringType (datawave.data.type.StringType)1 Type (datawave.data.type.Type)1 UniqueFields (datawave.query.attributes.UniqueFields)1 DocumentPermutation (datawave.query.function.DocumentPermutation)1 DocumentProjection (datawave.query.function.DocumentProjection)1 QueryModel (datawave.query.model.QueryModel)1 QueryImpl (datawave.webservice.query.QueryImpl)1 SimpleDateFormat (java.text.SimpleDateFormat)1