Search in sources :

Example 16 with ScannerFactory

use of datawave.query.tables.ScannerFactory in project datawave by NationalSecurityAgency.

the class RangeStreamTest method testDropTwoPredicates.

@Test
public void testDropTwoPredicates() throws Exception {
    String originalQuery = "LAUGH == 'bahahaha' && ( FOO == 'boohoo' || FOO == 'idontexist' || FOO == 'neitherdoi!' )";
    ASTJexlScript script = JexlASTHelper.parseJexlQuery(originalQuery);
    SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
    config.setBeginDate(sdf.parse("20190314"));
    config.setEndDate(sdf.parse("20190315"));
    config.setShardsPerDayThreshold(3);
    config.setDatatypeFilter(Sets.newHashSet("datatype1", "datatype2"));
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
    dataTypes.putAll("LAUGH", Sets.newHashSet(new LcNoDiacriticsType()));
    config.setQueryFieldsDatatypes(dataTypes);
    config.setIndexedFields(dataTypes);
    MockMetadataHelper helper = new MockMetadataHelper();
    helper.setIndexedFields(dataTypes.keySet());
    helper.addFields(Arrays.asList("FOO", "LAUGH"));
    // Create expected ranges verbosely, so it is obvious which shards contribute to the results.
    Range range1 = makeTestRange("20190314_0", "datatype1\u0000345");
    Range range2 = makeTestRange("20190314_0", "datatype1\u0000456");
    Range range3 = makeTestRange("20190314_0", "datatype1\u0000567");
    Range range4 = makeTestRange("20190314_0", "datatype1\u00001345");
    Range range5 = makeTestRange("20190314_0", "datatype1\u00002456");
    Range range6 = makeTestRange("20190314_0", "datatype1\u00003567");
    Set<Range> shard0 = Sets.newHashSet(range1, range2, range3, range4, range5, range6);
    range1 = makeTestRange("20190314_1", "datatype1\u0000345");
    range2 = makeTestRange("20190314_1", "datatype1\u0000456");
    range3 = makeTestRange("20190314_1", "datatype1\u0000567");
    range4 = makeTestRange("20190314_1", "datatype1\u00001345");
    range5 = makeTestRange("20190314_1", "datatype1\u00002456");
    range6 = makeTestRange("20190314_1", "datatype1\u00003567");
    Set<Range> shard1 = Sets.newHashSet(range1, range2, range3, range4, range5, range6);
    range1 = makeTestRange("20190314_10", "datatype2\u0000345");
    range2 = makeTestRange("20190314_10", "datatype2\u0000456");
    range3 = makeTestRange("20190314_10", "datatype2\u0000567");
    range4 = makeTestRange("20190314_10", "datatype2\u00001345");
    range5 = makeTestRange("20190314_10", "datatype2\u00002456");
    range6 = makeTestRange("20190314_10", "datatype2\u00003567");
    Set<Range> shard10 = Sets.newHashSet(range1, range2, range3, range4, range5, range6);
    range1 = makeTestRange("20190314_100", "datatype2\u0000345");
    range2 = makeTestRange("20190314_100", "datatype2\u0000456");
    range3 = makeTestRange("20190314_100", "datatype2\u0000567");
    range4 = makeTestRange("20190314_100", "datatype2\u00001345");
    range5 = makeTestRange("20190314_100", "datatype2\u00002456");
    range6 = makeTestRange("20190314_100", "datatype2\u00003567");
    Set<Range> shard100 = Sets.newHashSet(range1, range2, range3, range4, range5, range6);
    range1 = makeTestRange("20190314_9", "datatype1\u0000345");
    range2 = makeTestRange("20190314_9", "datatype1\u0000456");
    range3 = makeTestRange("20190314_9", "datatype1\u0000567");
    range4 = makeTestRange("20190314_9", "datatype1\u00001345");
    range5 = makeTestRange("20190314_9", "datatype1\u00002456");
    range6 = makeTestRange("20190314_9", "datatype1\u00003567");
    Set<Range> shard9 = Sets.newHashSet(range1, range2, range3, range4, range5, range6);
    Set<Range> expectedRanges = Sets.newHashSet();
    expectedRanges.addAll(shard0);
    expectedRanges.addAll(shard1);
    expectedRanges.addAll(shard10);
    expectedRanges.addAll(shard100);
    expectedRanges.addAll(shard9);
    RangeStream rangeStream = new RangeStream(config, new ScannerFactory(config.getConnector(), 1), helper).setLimitScanners(true);
    CloseableIterable<QueryPlan> queryPlans = rangeStream.streamPlans(script);
    // streamPlans(script) to populate the StreamContext.
    assertEquals(IndexStream.StreamContext.PRESENT, rangeStream.context());
    for (QueryPlan queryPlan : queryPlans) {
        for (Range range : queryPlan.getRanges()) {
            assertTrue("Tried to remove unexpected range " + range.toString() + "\nfrom expected ranges: " + expectedRanges.toString(), expectedRanges.remove(range));
        }
    }
    assertTrue("Expected ranges not found in query plan: " + expectedRanges.toString(), expectedRanges.isEmpty());
}
Also used : ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) RangeFactoryForTests.makeTestRange(datawave.common.test.utils.query.RangeFactoryForTests.makeTestRange) Range(org.apache.accumulo.core.data.Range) ScannerFactory(datawave.query.tables.ScannerFactory) QueryPlan(datawave.query.planner.QueryPlan) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) MockMetadataHelper(datawave.query.util.MockMetadataHelper) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) Type(datawave.data.type.Type) SimpleDateFormat(java.text.SimpleDateFormat) Test(org.junit.Test)

Example 17 with ScannerFactory

use of datawave.query.tables.ScannerFactory in project datawave by NationalSecurityAgency.

the class RangeStreamTest method testOrBothIndexed.

@Test
public void testOrBothIndexed() throws Exception {
    String originalQuery = "(FOO == 'bag' || FOO == 'ba')";
    ASTJexlScript script = JexlASTHelper.parseJexlQuery(originalQuery);
    config.setBeginDate(new Date(0));
    config.setEndDate(new Date(System.currentTimeMillis()));
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
    dataTypes.putAll("NUM", Sets.newHashSet(new NumberType()));
    config.setQueryFieldsDatatypes(dataTypes);
    config.setIndexedFields(dataTypes);
    MockMetadataHelper helper = new MockMetadataHelper();
    helper.setIndexedFields(dataTypes.keySet());
    Range range1 = makeTestRange("20190314", "datatype1\u0000234");
    Range range2 = makeTestRange("20190314", "datatype1\u0000345");
    Range range3 = makeTestRange("20190314", "datatype1\u0000123");
    Set<Range> expectedRanges = Sets.newHashSet(range1, range2, range3);
    for (QueryPlan queryPlan : new RangeStream(config, new ScannerFactory(config.getConnector()), helper).streamPlans(script)) {
        for (Range range : queryPlan.getRanges()) {
            assertTrue("Tried to remove unexpected range " + range.toString() + " from expected ranges: " + expectedRanges.toString(), expectedRanges.remove(range));
        }
    }
    assertTrue("Expected ranges not found in query plan: " + expectedRanges.toString(), expectedRanges.isEmpty());
}
Also used : ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) RangeFactoryForTests.makeTestRange(datawave.common.test.utils.query.RangeFactoryForTests.makeTestRange) Range(org.apache.accumulo.core.data.Range) QueryPlan(datawave.query.planner.QueryPlan) ScannerFactory(datawave.query.tables.ScannerFactory) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) MockMetadataHelper(datawave.query.util.MockMetadataHelper) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) Type(datawave.data.type.Type) NumberType(datawave.data.type.NumberType) Test(org.junit.Test)

Example 18 with ScannerFactory

use of datawave.query.tables.ScannerFactory in project datawave by NationalSecurityAgency.

the class RangeStreamTest method testUnion_OfTwoNestedIntersections_LeftLowCardTerms_withSeek.

// (A && B) || (C && D)
@Test
public void testUnion_OfTwoNestedIntersections_LeftLowCardTerms_withSeek() throws Exception {
    String originalQuery = "(FOO == 'low_card' && FOO == 'lowest_card') || (FOO == 'high_card' && FOO == 'highest_card')";
    ASTJexlScript script = JexlASTHelper.parseJexlQuery(originalQuery);
    SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
    config.setBeginDate(sdf.parse("20190310"));
    config.setEndDate(sdf.parse("20190320"));
    config.setDatatypeFilter(Sets.newHashSet("datatype1", "datatype2"));
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
    dataTypes.putAll("LAUGH", Sets.newHashSet(new LcNoDiacriticsType()));
    config.setQueryFieldsDatatypes(dataTypes);
    config.setIndexedFields(dataTypes);
    MockMetadataHelper helper = new MockMetadataHelper();
    helper.setIndexedFields(dataTypes.keySet());
    Set<Range> expectedRanges = new HashSet<>();
    for (int day = 0; day < 8; day += 2) {
        for (int ii = 1; ii < 50; ii++) {
            expectedRanges.add(makeTestRange("2019031" + day + "_" + ii, "datatype1\u0000a.b.c"));
            expectedRanges.add(makeTestRange("2019031" + day + "_" + ii, "datatype1\u0000d.e.f"));
        }
    }
    RangeStream rangeStream = new RangeStream(config, new ScannerFactory(config.getConnector(), 1), helper);
    rangeStream.setLimitScanners(true);
    CloseableIterable<QueryPlan> queryPlans = rangeStream.streamPlans(script);
    assertEquals(IndexStream.StreamContext.PRESENT, rangeStream.context());
    for (QueryPlan queryPlan : queryPlans) {
        Iterable<Range> ranges = queryPlan.getRanges();
        for (Range range : ranges) {
            assertTrue("Tried to remove unexpected range " + range.toString() + "\nfrom expected ranges: " + expectedRanges.toString(), expectedRanges.remove(range));
        }
    }
    assertTrue("Expected ranges not found in query plan: " + expectedRanges.toString(), expectedRanges.isEmpty());
}
Also used : ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) RangeFactoryForTests.makeTestRange(datawave.common.test.utils.query.RangeFactoryForTests.makeTestRange) Range(org.apache.accumulo.core.data.Range) ScannerFactory(datawave.query.tables.ScannerFactory) QueryPlan(datawave.query.planner.QueryPlan) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) MockMetadataHelper(datawave.query.util.MockMetadataHelper) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) Type(datawave.data.type.Type) SimpleDateFormat(java.text.SimpleDateFormat) Test(org.junit.Test)

Example 19 with ScannerFactory

use of datawave.query.tables.ScannerFactory in project datawave by NationalSecurityAgency.

the class RangeStreamTest method testNestedOr.

@Test
public void testNestedOr() throws Exception {
    String originalQuery = "(FOO == 'bag' || FOO == 'ba' || FOO == 'barglegarglebarsh')";
    ASTJexlScript script = JexlASTHelper.parseJexlQuery(originalQuery);
    config.setBeginDate(new Date(0));
    config.setEndDate(new Date(System.currentTimeMillis()));
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
    dataTypes.putAll("NUM", Sets.newHashSet(new NumberType()));
    config.setQueryFieldsDatatypes(dataTypes);
    config.setIndexedFields(dataTypes);
    MockMetadataHelper helper = new MockMetadataHelper();
    helper.setIndexedFields(dataTypes.keySet());
    Range range1 = makeTestRange("20190314", "datatype1\u0000234");
    Range range2 = makeTestRange("20190314", "datatype1\u0000345");
    Range range3 = makeTestRange("20190314", "datatype1\u0000123");
    Set<Range> expectedRanges = Sets.newHashSet(range1, range2, range3);
    RangeStream rangeStream = new RangeStream(config, new ScannerFactory(config.getConnector(), 1), helper);
    rangeStream.setLimitScanners(true);
    for (QueryPlan queryPlan : rangeStream.streamPlans(script)) {
        for (Range range : queryPlan.getRanges()) {
            assertTrue("Tried to remove unexpected range " + range.toString() + " from expected ranges: " + expectedRanges.toString(), expectedRanges.remove(range));
        }
    }
    assertTrue("Expected ranges not found in query plan: " + expectedRanges.toString(), expectedRanges.isEmpty());
}
Also used : ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) RangeFactoryForTests.makeTestRange(datawave.common.test.utils.query.RangeFactoryForTests.makeTestRange) Range(org.apache.accumulo.core.data.Range) ScannerFactory(datawave.query.tables.ScannerFactory) QueryPlan(datawave.query.planner.QueryPlan) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) MockMetadataHelper(datawave.query.util.MockMetadataHelper) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) Type(datawave.data.type.Type) NumberType(datawave.data.type.NumberType) Test(org.junit.Test)

Example 20 with ScannerFactory

use of datawave.query.tables.ScannerFactory in project datawave by NationalSecurityAgency.

the class DiscoveryLogic method initialize.

@Override
public GenericQueryConfiguration initialize(Connector connection, Query settings, Set<Authorizations> auths) throws Exception {
    DiscoveryQueryConfiguration config = new DiscoveryQueryConfiguration(this, settings);
    this.scannerFactory = new ScannerFactory(connection);
    this.metadataHelper = initializeMetadataHelper(connection, config.getMetadataTableName(), auths);
    if (StringUtils.isEmpty(settings.getQuery())) {
        throw new IllegalArgumentException("Query cannot be null");
    }
    if (log.isDebugEnabled()) {
        log.debug("Query parameters set to " + settings.getParameters());
    }
    // Check if the default modelName and modelTableNames have been overriden by custom parameters.
    if (null != settings.findParameter(QueryParameters.PARAMETER_MODEL_NAME) && !settings.findParameter(QueryParameters.PARAMETER_MODEL_NAME).getParameterValue().trim().isEmpty()) {
        modelName = settings.findParameter(QueryParameters.PARAMETER_MODEL_NAME).getParameterValue().trim();
    }
    if (null != settings.findParameter(QueryParameters.PARAMETER_MODEL_TABLE_NAME) && !settings.findParameter(QueryParameters.PARAMETER_MODEL_TABLE_NAME).getParameterValue().trim().isEmpty()) {
        modelTableName = settings.findParameter(QueryParameters.PARAMETER_MODEL_TABLE_NAME).getParameterValue().trim();
    }
    // Check if user would like counts separated by column visibility
    if (null != settings.findParameter(SEPARATE_COUNTS_BY_COLVIS) && !settings.findParameter(SEPARATE_COUNTS_BY_COLVIS).getParameterValue().trim().isEmpty()) {
        separateCountsByColVis = Boolean.valueOf(settings.findParameter(SEPARATE_COUNTS_BY_COLVIS).getParameterValue().trim());
        config.setSeparateCountsByColVis(separateCountsByColVis);
    }
    // Check if user would like to show reference counts instead of term counts
    if (null != settings.findParameter(SHOW_REFERENCE_COUNT) && !settings.findParameter(SHOW_REFERENCE_COUNT).getParameterValue().trim().isEmpty()) {
        showReferenceCount = Boolean.valueOf(settings.findParameter(SHOW_REFERENCE_COUNT).getParameterValue().trim());
        config.setShowReferenceCount(showReferenceCount);
    }
    this.queryModel = metadataHelper.getQueryModel(modelTableName, modelName, null);
    // get the data type filter set if any
    if (null != settings.findParameter(QueryParameters.DATATYPE_FILTER_SET) && !settings.findParameter(QueryParameters.DATATYPE_FILTER_SET).getParameterValue().trim().isEmpty()) {
        Set<String> dataTypeFilter = new HashSet<>(Arrays.asList(StringUtils.split(settings.findParameter(QueryParameters.DATATYPE_FILTER_SET).getParameterValue().trim(), Constants.PARAM_VALUE_SEP)));
        config.setDatatypeFilter(dataTypeFilter);
        if (log.isDebugEnabled()) {
            log.debug("Data type filter set to " + dataTypeFilter);
        }
    }
    // Set the connector
    config.setConnector(connection);
    // Set the auths
    config.setAuthorizations(auths);
    // set the table names
    if (getIndexTableName() != null) {
        config.setIndexTableName(getIndexTableName());
    }
    if (getReverseIndexTableName() != null) {
        config.setReverseIndexTableName(getReverseIndexTableName());
    }
    // Get the ranges
    config.setBeginDate(settings.getBeginDate());
    config.setEndDate(settings.getEndDate());
    if (null == config.getBeginDate() || null == config.getEndDate()) {
        config.setBeginDate(new Date(0));
        config.setEndDate(new Date(Long.MAX_VALUE));
        log.warn("Dates not specified, using entire date range");
    }
    // start with a trimmed version of the query, converted to JEXL
    LuceneToJexlQueryParser parser = new LuceneToJexlQueryParser();
    parser.setAllowLeadingWildCard(this.isAllowLeadingWildcard());
    QueryNode node = parser.parse(settings.getQuery().trim());
    // TODO: Validate that this is a simple list of terms type of query
    config.setQueryString(node.getOriginalQuery());
    if (log.isDebugEnabled()) {
        log.debug("Original Query = " + settings.getQuery().trim());
        log.debug("JEXL Query = " + node.getOriginalQuery());
    }
    // Parse & flatten the query
    ASTJexlScript script = JexlASTHelper.parseAndFlattenJexlQuery(config.getQueryString());
    script = CaseSensitivityVisitor.upperCaseIdentifiers(config, metadataHelper, script);
    Set<String> dataTypes = config.getDatatypeFilter();
    Set<String> allFields;
    allFields = metadataHelper.getAllFields(dataTypes);
    script = QueryModelVisitor.applyModel(script, getQueryModel(), allFields);
    QueryValues literalsAndPatterns = FindLiteralsAndPatternsVisitor.find(script);
    Stopwatch timer = new Stopwatch();
    timer.start();
    // no caching for getAllNormalizers, so try some magic with getFields...
    Multimap<String, Type<?>> dataTypeMap = ArrayListMultimap.create(metadataHelper.getFieldsToDatatypes(config.getDatatypeFilter()));
    /*
         * we have a mapping of FIELD->DataType, but not a mapping of ANYFIELD->DataType which should be all dataTypes
         */
    dataTypeMap.putAll(Constants.ANY_FIELD, uniqueByType(dataTypeMap.values()));
    timer.stop();
    log.debug("Took " + timer.elapsed(TimeUnit.MILLISECONDS) + "ms to get all the dataTypes.");
    config.setLiterals(normalize(new LiteralNormalization(), literalsAndPatterns.getLiterals(), dataTypeMap));
    config.setPatterns(normalize(new PatternNormalization(), literalsAndPatterns.getPatterns(), dataTypeMap));
    config.setRanges(normalizeRanges(new LiteralNormalization(), literalsAndPatterns.getRanges(), dataTypeMap));
    if (log.isDebugEnabled()) {
        log.debug("Normalized Literals = " + config.getLiterals());
        log.debug("Normalized Patterns = " + config.getPatterns());
    }
    return config;
}
Also used : QueryValues(datawave.query.discovery.FindLiteralsAndPatternsVisitor.QueryValues) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) Stopwatch(com.google.common.base.Stopwatch) ScannerFactory(datawave.query.tables.ScannerFactory) LuceneToJexlQueryParser(datawave.query.language.parser.jexl.LuceneToJexlQueryParser) Date(java.util.Date) Type(datawave.data.type.Type) QueryNode(datawave.query.language.tree.QueryNode) HashSet(java.util.HashSet)

Aggregations

ScannerFactory (datawave.query.tables.ScannerFactory)40 Type (datawave.data.type.Type)36 ASTJexlScript (org.apache.commons.jexl2.parser.ASTJexlScript)35 LcNoDiacriticsType (datawave.data.type.LcNoDiacriticsType)34 MockMetadataHelper (datawave.query.util.MockMetadataHelper)34 NoOpType (datawave.data.type.NoOpType)33 NumberType (datawave.data.type.NumberType)33 Test (org.junit.Test)32 Range (org.apache.accumulo.core.data.Range)26 RangeFactoryForTests.makeTestRange (datawave.common.test.utils.query.RangeFactoryForTests.makeTestRange)25 QueryPlan (datawave.query.planner.QueryPlan)25 SimpleDateFormat (java.text.SimpleDateFormat)9 ShardQueryConfiguration (datawave.query.config.ShardQueryConfiguration)2 MetadataHelper (datawave.query.util.MetadataHelper)2 Stopwatch (com.google.common.base.Stopwatch)1 ContentQueryConfiguration (datawave.query.config.ContentQueryConfiguration)1 EdgeQueryConfiguration (datawave.query.config.EdgeQueryConfiguration)1 QueryValues (datawave.query.discovery.FindLiteralsAndPatternsVisitor.QueryValues)1 RangeStream (datawave.query.index.lookup.RangeStream)1 LuceneToJexlQueryParser (datawave.query.language.parser.jexl.LuceneToJexlQueryParser)1