Search in sources :

Example 41 with ShardQueryConfiguration

use of datawave.query.config.ShardQueryConfiguration in project datawave by NationalSecurityAgency.

the class ExpandCompositeTermsTest method test13.

@Test
public void test13() throws Exception {
    ShardQueryConfiguration conf = new ShardQueryConfiguration();
    Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
    compositeToFieldMap.put("GEO", "GEO");
    compositeToFieldMap.put("GEO", "WKT_BYTE_LENGTH");
    conf.setCompositeToFieldMap(compositeToFieldMap);
    Map<String, String> compositeToSeparatorMap = new HashMap<>();
    compositeToSeparatorMap.put("GEO", ",");
    conf.setCompositeFieldSeparators(compositeToSeparatorMap);
    Set<String> indexedFields = new HashSet<>();
    indexedFields.add("GEO");
    conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
    String query = "((_Bounded_ = true) && (GEO >= '0100' && GEO <= '0103')) && WKT_BYTE_LENGTH >= '" + Normalizer.NUMBER_NORMALIZER.normalize("0") + "'";
    String expected = "((_Bounded_ = true) && (GEO >= '0100,+AE0' && GEO < '0104')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0100' && GEO <= '0103')) && WKT_BYTE_LENGTH >= '+AE0'))";
    runTestQuery(query, expected, indexedFields, conf);
}
Also used : GeometryType(datawave.data.type.GeometryType) HashMap(java.util.HashMap) ShardQueryConfiguration(datawave.query.config.ShardQueryConfiguration) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 42 with ShardQueryConfiguration

use of datawave.query.config.ShardQueryConfiguration in project datawave by NationalSecurityAgency.

the class ExpandCompositeTermsTest method test18b.

// Composite Range testing with an overloaded composite field against legacy data
@Test
public void test18b() throws Exception {
    ShardQueryConfiguration conf = new ShardQueryConfiguration();
    conf.setBeginDate(new Date(0));
    conf.setEndDate(new Date(TimeUnit.DAYS.toMillis(30)));
    Multimap<String, String> compositeToFieldMap = LinkedListMultimap.create();
    compositeToFieldMap.put("GEO", "GEO");
    compositeToFieldMap.put("GEO", "WKT");
    conf.setCompositeToFieldMap(compositeToFieldMap);
    Map<String, String> compositeToSeparatorMap = new HashMap<>();
    compositeToSeparatorMap.put("GEO", ",");
    conf.setCompositeFieldSeparators(compositeToSeparatorMap);
    Set<String> indexedFields = new HashSet<>();
    indexedFields.add("GEO");
    conf.getFieldToDiscreteIndexTypes().put("GEO", new GeometryType());
    Map<String, Date> compositeWithOldDataMap = new HashMap<>();
    compositeWithOldDataMap.put("GEO", new Date(TimeUnit.DAYS.toMillis(15)));
    conf.setCompositeTransitionDates(compositeWithOldDataMap);
    String upperBound = Normalizer.NUMBER_NORMALIZER.normalize("12345");
    // COMPOSITE QUERY AGAINST THE COMPOSITE INDEX
    // if incrementing/decrementing is an option
    // NOTE: Because we are combining two ranges, our bounds will already include some unwanted composite terms.
    // Those will be taken care of via a combination of accumulo iterator filtering against the shard index,
    // and field index filtering against the field index within the index iterators.
    // GE to GE -> GE
    // GE to GT -> GT
    // GT to GT -> increment base, GT
    // GT to GE -> increment base, GE
    // GT to EQ -> increment base, GE
    // EQ to GT -> GT
    // EQ to GE -> GE
    // LE to LE -> LE
    // LE to LT -> LT
    // LT to LT -> decrement base, LT
    // LT to LE -> decrement base, LE
    // LT to EQ -> decrement base, LE
    // EQ to LT -> LT
    // EQ to LE -> LE
    // NON-COMPOSITE QUERY AGAINST AN OVERLOADED COMPOSITE INDEX
    // if incrementing/decrementing is an option
    // NOTE: The proposed solutions only work IFF the underlying data is truly a unicode string
    // GE -> GE
    // GT -> increment base, GE
    // LE -> increment base, LT
    // LT -> LT
    // EQ -> EQ convert to range, lower bound -> inclusive term, upper bound -> exclusive incremented term
    // e.g. GEO == '0202'
    // GEO >= '0202' && GEO < '0203'
    String query, expected;
    // GE to GE, use GE
    // LE to LE, use LE
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GE, increment fixed term, use GE
    // LE to LE, use LE
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GE, use GE
    // LT to LE, decrement fixed term, use LE
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GE, increment fixed term, use GE
    // LT to LE, decrement fixed term, use LE
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GT, use GT
    // LE to LE, use LE
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GT, increment fixed term, use GT
    // LE to LE, use LE
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO <= '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GT, use GT
    // LT to LE, decrement fixed term, use LE
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GT, increment base, use GT
    // LT to LE, decrement fixed term, use LE
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO <= '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GE, use GE
    // LE to LT, use LT
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GE, increment fixed term, use GE
    // LE to LT, use LT
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GE, use GE
    // LT to LT, decrement fixed term, use LT
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GE, increment fixed term, use GE
    // LT to LT, decrement fixed term, use LT
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GT, use GT
    // LE to LT, use LT
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GT, increment fixed term, use GT
    // LE to LT, use LT
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO < '020d,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO <= '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GE to GT, use GT
    // LT to LT, decrement fixed term, use LT
    query = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO >= '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // GT to GT, increment fixed term, use GT
    // LT to LT, decrement fixed term, use LT
    query = "((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0203' && GEO < '020c,+eE1.2345')) && ((_Eval_ = true) && (((_Bounded_ = true) && (GEO > '0202' && GEO < '020d')) && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // EQ to GE, use GE
    // EQ to LE, use LE
    query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // EQ to GE, use GE
    // EQ to LT, use LT
    query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT >= '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // EQ to GT, use GT
    // EQ to LE, use LE
    query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO <= '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT > '+AE0' && WKT <= '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // EQ to GT, use GT
    // EQ to LT, use LT
    query = "(GEO == '0202') && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '" + upperBound + "'))";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '0202,+eE1.2345')) && ((_Eval_ = true) && (GEO == '0202' && ((_Bounded_ = true) && (WKT > '+AE0' && WKT < '+eE1.2345'))))";
    runTestQuery(query, expected, indexedFields, conf);
    // EQ, convert to range [keep base - use GE, increment base - use LT]
    query = "GEO == '0202'";
    expected = "((_Bounded_ = true) && (GEO >= '0202' && GEO < '0203'))";
    runTestQuery(query, expected, indexedFields, conf);
    // Unbounded range w/ composite term
    query = "GEO >= '0202' && WKT < '" + upperBound + "'";
    expected = "GEO >= '0202' && WKT < '" + upperBound + "'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO >= '0202' && WKT > '" + upperBound + "'";
    expected = "GEO >= '0202' && WKT > '" + upperBound + "'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO <= '0202' && WKT < '" + upperBound + "'";
    expected = "GEO <= '0202' && WKT < '" + upperBound + "'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO <= '0202' && WKT > '" + upperBound + "'";
    expected = "GEO <= '0202' && WKT > '" + upperBound + "'";
    runTestQuery(query, expected, indexedFields, conf);
    // Unbounded range w/out composite term
    query = "GEO >= '0202'";
    expected = "GEO >= '0202'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO > '0202'";
    expected = "GEO > '0202'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO <= '0202'";
    expected = "GEO <= '0202'";
    runTestQuery(query, expected, indexedFields, conf);
    query = "GEO < '0202'";
    expected = "GEO < '0202'";
    runTestQuery(query, expected, indexedFields, conf);
}
Also used : GeometryType(datawave.data.type.GeometryType) HashMap(java.util.HashMap) ShardQueryConfiguration(datawave.query.config.ShardQueryConfiguration) Date(java.util.Date) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 43 with ShardQueryConfiguration

use of datawave.query.config.ShardQueryConfiguration in project datawave by NationalSecurityAgency.

the class ExceededOrThresholdMarkerJexlNodeTest method getResultsIterator.

private Iterator getResultsIterator(String queryString, ShardQueryLogic logic) throws Exception {
    MultivaluedMap<String, String> params = new MultivaluedMapImpl<>();
    params.putSingle(QUERY_LOGIC_NAME, "EventQuery");
    params.putSingle(QUERY_STRING, queryString);
    params.putSingle(QUERY_NAME, "geoQuery");
    params.putSingle(QUERY_PERSISTENCE, "PERSISTENT");
    params.putSingle(QUERY_AUTHORIZATIONS, AUTHS);
    params.putSingle(QUERY_EXPIRATION, "20200101 000000.000");
    params.putSingle(QUERY_BEGIN, BEGIN_DATE);
    params.putSingle(QUERY_END, END_DATE);
    QueryParameters queryParams = new QueryParametersImpl();
    queryParams.validate(params);
    Set<Authorizations> auths = new HashSet<>();
    auths.add(new Authorizations(AUTHS));
    Query query = new QueryImpl();
    query.initialize(USER, Arrays.asList(USER_DN), null, queryParams, null);
    ShardQueryConfiguration config = ShardQueryConfiguration.create(logic, query);
    logic.initialize(config, instance.getConnector("root", PASSWORD), query, auths);
    logic.setupQuery(config);
    return logic.getTransformIterator(query);
}
Also used : Authorizations(org.apache.accumulo.core.security.Authorizations) QueryImpl(datawave.webservice.query.QueryImpl) Query(datawave.webservice.query.Query) MultivaluedMapImpl(org.jboss.resteasy.specimpl.MultivaluedMapImpl) QueryParameters(datawave.webservice.query.QueryParameters) QueryParametersImpl(datawave.webservice.query.QueryParametersImpl) ShardQueryConfiguration(datawave.query.config.ShardQueryConfiguration) HashSet(java.util.HashSet)

Example 44 with ShardQueryConfiguration

use of datawave.query.config.ShardQueryConfiguration in project datawave by NationalSecurityAgency.

the class DateIndexQueryExpansionVisitorTest method assertExpansion.

private void assertExpansion(String original, String expected) throws ParseException {
    ASTJexlScript originalScript = JexlASTHelper.parseJexlQuery(original);
    ShardQueryConfiguration config = new ShardQueryConfiguration();
    config.setBeginDate(startDate);
    config.setEndDate(endDate);
    ASTJexlScript result = FunctionIndexQueryExpansionVisitor.expandFunctions(config, metadataHelper, dateIndexHelper, originalScript);
    JexlNodeAssert.assertThat(result).isEqualTo(expected).hasValidLineage();
    JexlNodeAssert.assertThat(originalScript).isEqualTo(original).hasValidLineage();
}
Also used : ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) ShardQueryConfiguration(datawave.query.config.ShardQueryConfiguration)

Example 45 with ShardQueryConfiguration

use of datawave.query.config.ShardQueryConfiguration in project datawave by NationalSecurityAgency.

the class GeoWavePruningVisitorTest method testNonIntersectingTermIsPruned.

@Test
public void testNonIntersectingTermIsPruned() throws ParseException {
    String function = "geowave:intersects(GEO_FIELD, 'POLYGON((10 10, 20 10, 20 20, 10 20, 10 10))')";
    // Get the expanded geowave terms.
    String indexQuery = convertFunctionToIndexQuery(function, new ShardQueryConfiguration());
    // Add a term that should be pruned.
    String query = function + " && (GEO_FIELD == '0100' || " + indexQuery + ")";
    String expected = function + " && (false || " + indexQuery + ")";
    Multimap<String, String> expectedPrunedTerms = HashMultimap.create();
    expectedPrunedTerms.put("GEO_FIELD", "0100");
    assertResult(query, expected, expectedPrunedTerms);
}
Also used : ShardQueryConfiguration(datawave.query.config.ShardQueryConfiguration) Test(org.junit.Test)

Aggregations

ShardQueryConfiguration (datawave.query.config.ShardQueryConfiguration)108 Test (org.junit.Test)75 ASTJexlScript (org.apache.commons.jexl2.parser.ASTJexlScript)48 HashSet (java.util.HashSet)42 Date (java.util.Date)38 GeometryType (datawave.data.type.GeometryType)17 MetadataHelper (datawave.query.util.MetadataHelper)17 HashMap (java.util.HashMap)16 CompositeFunctionsTest (datawave.query.CompositeFunctionsTest)13 Query (datawave.webservice.query.Query)10 Authorizations (org.apache.accumulo.core.security.Authorizations)10 Before (org.junit.Before)10 QueryImpl (datawave.webservice.query.QueryImpl)9 QueryParameters (datawave.webservice.query.QueryParameters)9 QueryParametersImpl (datawave.webservice.query.QueryParametersImpl)9 MultivaluedMapImpl (org.jboss.resteasy.specimpl.MultivaluedMapImpl)8 MockMetadataHelper (datawave.query.util.MockMetadataHelper)7 JexlNode (org.apache.commons.jexl2.parser.JexlNode)6 ExceededOrThresholdMarkerJexlNode (datawave.query.jexl.nodes.ExceededOrThresholdMarkerJexlNode)5 ExceededValueThresholdMarkerJexlNode (datawave.query.jexl.nodes.ExceededValueThresholdMarkerJexlNode)5