Search in sources :

Example 1 with ShardIndexQueryConfiguration

use of datawave.query.config.ShardIndexQueryConfiguration in project datawave by NationalSecurityAgency.

the class ShardIndexQueryTable method setupQuery.

@Override
public void setupQuery(GenericQueryConfiguration genericConfig) throws QueryException, TableNotFoundException, IOException, ExecutionException {
    if (!genericConfig.getClass().getName().equals(ShardIndexQueryConfiguration.class.getName())) {
        throw new QueryException("Did not receive a ShardIndexQueryConfiguration instance!!");
    }
    ShardIndexQueryConfiguration config = (ShardIndexQueryConfiguration) genericConfig;
    final List<Entry<BatchScanner, Boolean>> batchscanners = Lists.newLinkedList();
    for (Entry<String, String> termEntry : config.getNormalizedTerms().entries()) {
        // scan the table
        BatchScanner bs = configureBatchScannerForDiscovery(config, this.scannerFactory, TableName.SHARD_INDEX, Collections.singleton(config.getRangesForTerms().get(termEntry)), Collections.singleton(termEntry.getValue()), Collections.emptySet(), config.getTableName().equals(config.getReverseIndexTableName()), false, Collections.singleton(termEntry.getKey()));
        batchscanners.add(Maps.immutableEntry(bs, false));
    }
    for (Entry<String, String> patternEntry : config.getNormalizedPatterns().entries()) {
        Entry<Range, Boolean> rangeEntry = config.getRangesForPatterns().get(patternEntry);
        String tName = rangeEntry.getValue() ? TableName.SHARD_RINDEX : TableName.SHARD_INDEX;
        // scan the table
        BatchScanner bs = configureBatchScannerForDiscovery(config, this.scannerFactory, tName, Collections.singleton(rangeEntry.getKey()), Collections.emptySet(), Collections.singleton(patternEntry.getValue()), rangeEntry.getValue(), false, Collections.singleton(patternEntry.getKey()));
        batchscanners.add(Maps.immutableEntry(bs, rangeEntry.getValue()));
    }
    final Iterator<Entry<BatchScanner, Boolean>> batchScannerIterator = batchscanners.iterator();
    this.iterator = concat(transform(new CloseableIterator(batchScannerIterator), new Function<Entry<Key, Value>, Iterator<DiscoveredThing>>() {

        DataInputBuffer in = new DataInputBuffer();

        @Override
        public Iterator<DiscoveredThing> apply(Entry<Key, Value> from) {
            Value value = from.getValue();
            in.reset(value.get(), value.getSize());
            ArrayWritable aw = new ArrayWritable(DiscoveredThing.class);
            try {
                aw.readFields(in);
            } catch (IOException e) {
                return null;
            }
            ArrayList<DiscoveredThing> thangs = Lists.newArrayListWithCapacity(aw.get().length);
            for (Writable w : aw.get()) {
                thangs.add((DiscoveredThing) w);
            }
            return thangs.iterator();
        }
    }));
    this.scanner = new ScannerBase() {

        @Override
        public void addScanIterator(IteratorSetting cfg) {
        }

        @Override
        public void clearColumns() {
        }

        @Override
        public void clearScanIterators() {
        }

        @Override
        public void close() {
        }

        @Override
        public Authorizations getAuthorizations() {
            return null;
        }

        @Override
        public void setSamplerConfiguration(SamplerConfiguration samplerConfiguration) {
        }

        @Override
        public SamplerConfiguration getSamplerConfiguration() {
            return null;
        }

        @Override
        public void clearSamplerConfiguration() {
        }

        @Override
        public void setBatchTimeout(long l, TimeUnit timeUnit) {
        }

        @Override
        public long getBatchTimeout(TimeUnit timeUnit) {
            return 0;
        }

        @Override
        public void setClassLoaderContext(String s) {
        }

        @Override
        public void clearClassLoaderContext() {
        }

        @Override
        public String getClassLoaderContext() {
            return null;
        }

        @Override
        public void fetchColumn(Text colFam, Text colQual) {
        }

        @Override
        public void fetchColumn(IteratorSetting.Column column) {
        }

        @Override
        public void fetchColumnFamily(Text col) {
        }

        @Override
        public long getTimeout(TimeUnit timeUnit) {
            return 0;
        }

        @Override
        public Iterator<Entry<Key, Value>> iterator() {
            return null;
        }

        @Override
        public void removeScanIterator(String iteratorName) {
        }

        @Override
        public void setTimeout(long timeOut, TimeUnit timeUnit) {
        }

        @Override
        public void updateScanIteratorOption(String iteratorName, String key, String value) {
        }
    };
}
Also used : DiscoveredThing(datawave.query.discovery.DiscoveredThing) BatchScanner(org.apache.accumulo.core.client.BatchScanner) ArrayList(java.util.ArrayList) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) Writable(org.apache.hadoop.io.Writable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) ShardIndexQueryConfiguration(datawave.query.config.ShardIndexQueryConfiguration) Entry(java.util.Map.Entry) ArrayWritable(org.apache.hadoop.io.ArrayWritable) Iterator(java.util.Iterator) DiscoveryIterator(datawave.query.discovery.DiscoveryIterator) TimeUnit(java.util.concurrent.TimeUnit) Authorizations(org.apache.accumulo.core.security.Authorizations) ScannerBase(org.apache.accumulo.core.client.ScannerBase) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) LongRange(org.apache.commons.lang.math.LongRange) Range(org.apache.accumulo.core.data.Range) QueryException(datawave.webservice.query.exception.QueryException) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key)

Example 2 with ShardIndexQueryConfiguration

use of datawave.query.config.ShardIndexQueryConfiguration in project datawave by NationalSecurityAgency.

the class ShardIndexQueryTable method initialize.

@Override
public GenericQueryConfiguration initialize(Connector connection, Query settings, Set<Authorizations> auths) throws Exception {
    ShardIndexQueryConfiguration config = new ShardIndexQueryConfiguration(this, settings);
    this.scannerFactory = new ScannerFactory(connection);
    MetadataHelper metadataHelper = initializeMetadataHelper(connection, config.getMetadataTableName(), auths);
    if (StringUtils.isEmpty(settings.getQuery())) {
        throw new IllegalArgumentException("Query cannot be null");
    }
    if (log.isDebugEnabled()) {
        log.debug("Query parameters set to " + settings.getParameters());
    }
    String tModelName = getTrimmedOrNull(settings, QueryParameters.PARAMETER_MODEL_NAME);
    if (tModelName != null) {
        modelName = tModelName;
    }
    String tModelTableName = getTrimmedOrNull(settings, QueryParameters.PARAMETER_MODEL_TABLE_NAME);
    if (tModelTableName != null) {
        modelTableName = tModelTableName;
    }
    queryModel = metadataHelper.getQueryModel(modelTableName, modelName, null);
    String datatypeFilterString = getTrimmedOrNull(settings, QueryParameters.DATATYPE_FILTER_SET);
    if (datatypeFilterString != null) {
        config.setDatatypeFilter(new HashSet<>(Arrays.asList(datatypeFilterString.split(PARAM_VALUE_SEP_STR))));
        if (log.isDebugEnabled()) {
            log.debug("Data type filter set to " + config.getDatatypeFilterAsString());
        }
    }
    config.setConnector(connection);
    config.setAuthorizations(auths);
    if (indexTableName != null) {
        config.setIndexTableName(indexTableName);
    }
    if (reverseIndexTableName != null) {
        config.setReverseIndexTableName(reverseIndexTableName);
    }
    if (settings.getBeginDate() != null) {
        config.setBeginDate(settings.getBeginDate());
    } else {
        config.setBeginDate(new Date(0));
        if (log.isDebugEnabled()) {
            log.debug("No begin date supplied in settings.");
        }
    }
    if (settings.getEndDate() != null) {
        config.setEndDate(settings.getEndDate());
    } else {
        config.setEndDate(new Date(Long.MAX_VALUE));
        if (log.isDebugEnabled()) {
            log.debug("No end date supplied in settings.");
        }
    }
    // start with a trimmed version of the query, converted to JEXL
    LuceneToJexlQueryParser parser = new LuceneToJexlQueryParser();
    parser.setAllowLeadingWildCard(this.isAllowLeadingWildcard());
    QueryNode node = parser.parse(settings.getQuery().trim());
    // TODO: Validate that this is a simple list of terms type of query
    config.setQueryString(node.getOriginalQuery());
    if (log.isDebugEnabled()) {
        log.debug("Original Query = " + settings.getQuery().trim());
        log.debug("JEXL Query = " + node.getOriginalQuery());
    }
    // Parse & flatten the query.
    ASTJexlScript origScript = JexlASTHelper.parseAndFlattenJexlQuery(config.getQueryString());
    ASTJexlScript script;
    try {
        script = UnfieldedIndexExpansionVisitor.expandUnfielded(config, this.scannerFactory, metadataHelper, origScript);
    } catch (EmptyUnfieldedTermExpansionException e) {
        Multimap<String, String> emptyMap = Multimaps.unmodifiableMultimap(HashMultimap.create());
        config.setNormalizedTerms(emptyMap);
        config.setNormalizedPatterns(emptyMap);
        return config;
    }
    Set<String> dataTypes = config.getDatatypeFilter();
    Set<String> allFields = metadataHelper.getAllFields(dataTypes);
    script = QueryModelVisitor.applyModel(script, queryModel, allFields);
    if (log.isTraceEnabled()) {
        log.trace("fetching dataTypes from FetchDataTypesVisitor");
    }
    Multimap<String, Type<?>> fieldToDataTypeMap = FetchDataTypesVisitor.fetchDataTypes(metadataHelper, config.getDatatypeFilter(), script);
    config.setDataTypes(fieldToDataTypeMap);
    config.setQueryFieldsDatatypes(fieldToDataTypeMap);
    final Set<String> indexedFields = metadataHelper.getIndexedFields(dataTypes);
    config.setIndexedFields(indexedFields);
    final Set<String> reverseIndexedFields = metadataHelper.getReverseIndexedFields(dataTypes);
    config.setReverseIndexedFields(reverseIndexedFields);
    final Multimap<String, Type<?>> normalizedFields = metadataHelper.getFieldsToDatatypes(dataTypes);
    config.setNormalizedFieldsDatatypes(normalizedFields);
    if (log.isTraceEnabled()) {
        log.trace("Normalizers:");
        for (String field : fieldToDataTypeMap.keySet()) {
            log.trace(field + ": " + fieldToDataTypeMap.get(field));
        }
    }
    script = ExpandMultiNormalizedTerms.expandTerms(config, metadataHelper, script);
    Multimap<String, String> literals = LiteralNodeVisitor.getLiterals(script);
    Multimap<String, String> patterns = PatternNodeVisitor.getPatterns(script);
    Map<Entry<String, String>, Range> rangesForTerms = Maps.newHashMap();
    Map<Entry<String, String>, Entry<Range, Boolean>> rangesForPatterns = Maps.newHashMap();
    config.setNormalizedTerms(literals);
    config.setNormalizedPatterns(patterns);
    if (log.isDebugEnabled()) {
        log.debug("Normalized Literals = " + literals);
        log.debug("Normalized Patterns = " + patterns);
    }
    for (Entry<String, String> entry : literals.entries()) {
        rangesForTerms.put(entry, ShardIndexQueryTableStaticMethods.getLiteralRange(entry));
    }
    for (Entry<String, String> entry : patterns.entries()) {
        ShardIndexQueryTableStaticMethods.RefactoredRangeDescription r = ShardIndexQueryTableStaticMethods.getRegexRange(entry, isFullTableScanEnabled(), metadataHelper, config);
        rangesForPatterns.put(entry, Maps.immutableEntry(r.range, r.isForReverseIndex));
    }
    config.setRangesForTerms(rangesForTerms);
    config.setRangesForPatterns(rangesForPatterns);
    return config;
}
Also used : ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) LuceneToJexlQueryParser(datawave.query.language.parser.jexl.LuceneToJexlQueryParser) LongRange(org.apache.commons.lang.math.LongRange) Range(org.apache.accumulo.core.data.Range) Date(java.util.Date) HashMultimap(com.google.common.collect.HashMultimap) Multimap(com.google.common.collect.Multimap) ShardIndexQueryConfiguration(datawave.query.config.ShardIndexQueryConfiguration) MetadataHelper(datawave.query.util.MetadataHelper) Type(datawave.data.type.Type) Entry(java.util.Map.Entry) ShardIndexQueryTableStaticMethods(datawave.query.jexl.lookups.ShardIndexQueryTableStaticMethods) QueryNode(datawave.query.language.tree.QueryNode) EmptyUnfieldedTermExpansionException(datawave.query.exceptions.EmptyUnfieldedTermExpansionException)

Aggregations

ShardIndexQueryConfiguration (datawave.query.config.ShardIndexQueryConfiguration)2 Entry (java.util.Map.Entry)2 Range (org.apache.accumulo.core.data.Range)2 LongRange (org.apache.commons.lang.math.LongRange)2 HashMultimap (com.google.common.collect.HashMultimap)1 Multimap (com.google.common.collect.Multimap)1 Type (datawave.data.type.Type)1 DiscoveredThing (datawave.query.discovery.DiscoveredThing)1 DiscoveryIterator (datawave.query.discovery.DiscoveryIterator)1 EmptyUnfieldedTermExpansionException (datawave.query.exceptions.EmptyUnfieldedTermExpansionException)1 ShardIndexQueryTableStaticMethods (datawave.query.jexl.lookups.ShardIndexQueryTableStaticMethods)1 LuceneToJexlQueryParser (datawave.query.language.parser.jexl.LuceneToJexlQueryParser)1 QueryNode (datawave.query.language.tree.QueryNode)1 MetadataHelper (datawave.query.util.MetadataHelper)1 QueryException (datawave.webservice.query.exception.QueryException)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1 Iterator (java.util.Iterator)1 TimeUnit (java.util.concurrent.TimeUnit)1