Search in sources :

Example 1 with AggregatedDfs

use of org.elasticsearch.search.dfs.AggregatedDfs in project elasticsearch by elastic.

the class DfsQueryPhase method run.

@Override
public void run() throws IOException {
    // TODO we can potentially also consume the actual per shard results from the initial phase here in the aggregateDfs
    // to free up memory early
    final AggregatedDfs dfs = searchPhaseController.aggregateDfs(dfsSearchResults);
    final CountedCollector<QuerySearchResultProvider> counter = new CountedCollector<>(queryResult::consumeResult, dfsSearchResults.asList().size(), () -> {
        context.executeNextPhase(this, nextPhaseFactory.apply(queryResult));
    }, context);
    for (final AtomicArray.Entry<DfsSearchResult> entry : dfsSearchResults.asList()) {
        DfsSearchResult dfsResult = entry.value;
        final int shardIndex = entry.index;
        final SearchShardTarget searchShardTarget = dfsResult.shardTarget();
        Transport.Connection connection = context.getConnection(searchShardTarget.getNodeId());
        QuerySearchRequest querySearchRequest = new QuerySearchRequest(context.getRequest(), dfsResult.id(), dfs);
        searchTransportService.sendExecuteQuery(connection, querySearchRequest, context.getTask(), ActionListener.wrap(result -> counter.onResult(shardIndex, result, searchShardTarget), exception -> {
            try {
                if (context.getLogger().isDebugEnabled()) {
                    context.getLogger().debug((Supplier<?>) () -> new ParameterizedMessage("[{}] Failed to execute query phase", querySearchRequest.id()), exception);
                }
                counter.onFailure(shardIndex, searchShardTarget, exception);
            } finally {
                context.sendReleaseSearchContext(querySearchRequest.id(), connection);
            }
        }));
    }
}
Also used : SearchShardTarget(org.elasticsearch.search.SearchShardTarget) Transport(org.elasticsearch.transport.Transport) Supplier(org.apache.logging.log4j.util.Supplier) AtomicArray(org.elasticsearch.common.util.concurrent.AtomicArray) AggregatedDfs(org.elasticsearch.search.dfs.AggregatedDfs) IOException(java.io.IOException) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) QuerySearchRequest(org.elasticsearch.search.query.QuerySearchRequest) Function(java.util.function.Function) ActionListener(org.elasticsearch.action.ActionListener) DfsSearchResult(org.elasticsearch.search.dfs.DfsSearchResult) QuerySearchResultProvider(org.elasticsearch.search.query.QuerySearchResultProvider) AtomicArray(org.elasticsearch.common.util.concurrent.AtomicArray) QuerySearchResultProvider(org.elasticsearch.search.query.QuerySearchResultProvider) DfsSearchResult(org.elasticsearch.search.dfs.DfsSearchResult) AggregatedDfs(org.elasticsearch.search.dfs.AggregatedDfs) SearchShardTarget(org.elasticsearch.search.SearchShardTarget) QuerySearchRequest(org.elasticsearch.search.query.QuerySearchRequest) Supplier(org.apache.logging.log4j.util.Supplier) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Transport(org.elasticsearch.transport.Transport)

Example 2 with AggregatedDfs

use of org.elasticsearch.search.dfs.AggregatedDfs in project elasticsearch by elastic.

the class SearchPhaseController method aggregateDfs.

public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) {
    ObjectObjectHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap();
    ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap();
    long aggMaxDoc = 0;
    for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) {
        final Term[] terms = lEntry.value.terms();
        final TermStatistics[] stats = lEntry.value.termStatistics();
        assert terms.length == stats.length;
        for (int i = 0; i < terms.length; i++) {
            assert terms[i] != null;
            TermStatistics existing = termStatistics.get(terms[i]);
            if (existing != null) {
                assert terms[i].bytes().equals(existing.term());
                // totalTermFrequency is an optional statistic we need to check if either one or both
                // are set to -1 which means not present and then set it globally to -1
                termStatistics.put(terms[i], new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(), optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq())));
            } else {
                termStatistics.put(terms[i], stats[i]);
            }
        }
        assert !lEntry.value.fieldStatistics().containsKey(null);
        final Object[] keys = lEntry.value.fieldStatistics().keys;
        final Object[] values = lEntry.value.fieldStatistics().values;
        for (int i = 0; i < keys.length; i++) {
            if (keys[i] != null) {
                String key = (String) keys[i];
                CollectionStatistics value = (CollectionStatistics) values[i];
                assert key != null;
                CollectionStatistics existing = fieldStatistics.get(key);
                if (existing != null) {
                    CollectionStatistics merged = new CollectionStatistics(key, existing.maxDoc() + value.maxDoc(), optionalSum(existing.docCount(), value.docCount()), optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()), optionalSum(existing.sumDocFreq(), value.sumDocFreq()));
                    fieldStatistics.put(key, merged);
                } else {
                    fieldStatistics.put(key, value);
                }
            }
        }
        aggMaxDoc += lEntry.value.maxDoc();
    }
    return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
}
Also used : AtomicArray(org.elasticsearch.common.util.concurrent.AtomicArray) DfsSearchResult(org.elasticsearch.search.dfs.DfsSearchResult) Term(org.apache.lucene.index.Term) TermStatistics(org.apache.lucene.search.TermStatistics) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) AggregatedDfs(org.elasticsearch.search.dfs.AggregatedDfs)

Example 3 with AggregatedDfs

use of org.elasticsearch.search.dfs.AggregatedDfs in project elasticsearch by elastic.

the class TermVectorsService method getTermVectors.

static TermVectorsResponse getTermVectors(IndexShard indexShard, TermVectorsRequest request, LongSupplier nanoTimeSupplier) {
    final long startTime = nanoTimeSupplier.getAsLong();
    final TermVectorsResponse termVectorsResponse = new TermVectorsResponse(indexShard.shardId().getIndex().getName(), request.type(), request.id());
    final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id()));
    Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm).version(request.version()).versionType(request.versionType()));
    Fields termVectorsByField = null;
    AggregatedDfs dfs = null;
    TermVectorsFilter termVectorsFilter = null;
    /* handle potential wildcards in fields */
    if (request.selectedFields() != null) {
        handleFieldWildcards(indexShard, request);
    }
    final Engine.Searcher searcher = indexShard.acquireSearcher("term_vector");
    try {
        Fields topLevelFields = MultiFields.getFields(get.searcher() != null ? get.searcher().reader() : searcher.reader());
        Versions.DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
        /* from an artificial document */
        if (request.doc() != null) {
            termVectorsByField = generateTermVectorsFromDoc(indexShard, request);
            // if no document indexed in shard, take the queried document itself for stats
            if (topLevelFields == null) {
                topLevelFields = termVectorsByField;
            }
            termVectorsResponse.setArtificial(true);
            termVectorsResponse.setExists(true);
        } else /* or from an existing document */
        if (docIdAndVersion != null) {
            // fields with stored term vectors
            termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
            Set<String> selectedFields = request.selectedFields();
            // generate tvs for fields where analyzer is overridden
            if (selectedFields == null && request.perFieldAnalyzer() != null) {
                selectedFields = getFieldsToGenerate(request.perFieldAnalyzer(), termVectorsByField);
            }
            // fields without term vectors
            if (selectedFields != null) {
                termVectorsByField = addGeneratedTermVectors(indexShard, get, termVectorsByField, request, selectedFields);
            }
            termVectorsResponse.setDocVersion(docIdAndVersion.version);
            termVectorsResponse.setExists(true);
        } else /* no term vectors generated or found */
        {
            termVectorsResponse.setExists(false);
        }
        /* if there are term vectors, optional compute dfs and/or terms filtering */
        if (termVectorsByField != null) {
            if (request.filterSettings() != null) {
                termVectorsFilter = new TermVectorsFilter(termVectorsByField, topLevelFields, request.selectedFields(), dfs);
                termVectorsFilter.setSettings(request.filterSettings());
                try {
                    termVectorsFilter.selectBestTerms();
                } catch (IOException e) {
                    throw new ElasticsearchException("failed to select best terms", e);
                }
            }
            // write term vectors
            termVectorsResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields, dfs, termVectorsFilter);
        }
        termVectorsResponse.setTookInMillis(TimeUnit.NANOSECONDS.toMillis(nanoTimeSupplier.getAsLong() - startTime));
    } catch (Exception ex) {
        throw new ElasticsearchException("failed to execute term vector request", ex);
    } finally {
        searcher.close();
        get.release();
    }
    return termVectorsResponse;
}
Also used : TermVectorsResponse(org.elasticsearch.action.termvectors.TermVectorsResponse) HashSet(java.util.HashSet) Set(java.util.Set) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) ElasticsearchException(org.elasticsearch.ElasticsearchException) ElasticsearchException(org.elasticsearch.ElasticsearchException) IOException(java.io.IOException) TermVectorsFilter(org.elasticsearch.action.termvectors.TermVectorsFilter) Fields(org.apache.lucene.index.Fields) MultiFields(org.apache.lucene.index.MultiFields) Versions(org.elasticsearch.common.lucene.uid.Versions) AggregatedDfs(org.elasticsearch.search.dfs.AggregatedDfs) Engine(org.elasticsearch.index.engine.Engine)

Aggregations

AggregatedDfs (org.elasticsearch.search.dfs.AggregatedDfs)3 IOException (java.io.IOException)2 Term (org.apache.lucene.index.Term)2 AtomicArray (org.elasticsearch.common.util.concurrent.AtomicArray)2 DfsSearchResult (org.elasticsearch.search.dfs.DfsSearchResult)2 HashSet (java.util.HashSet)1 Set (java.util.Set)1 Function (java.util.function.Function)1 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)1 Supplier (org.apache.logging.log4j.util.Supplier)1 Fields (org.apache.lucene.index.Fields)1 MultiFields (org.apache.lucene.index.MultiFields)1 CollectionStatistics (org.apache.lucene.search.CollectionStatistics)1 TermStatistics (org.apache.lucene.search.TermStatistics)1 ElasticsearchException (org.elasticsearch.ElasticsearchException)1 ActionListener (org.elasticsearch.action.ActionListener)1 TermVectorsFilter (org.elasticsearch.action.termvectors.TermVectorsFilter)1 TermVectorsResponse (org.elasticsearch.action.termvectors.TermVectorsResponse)1 Versions (org.elasticsearch.common.lucene.uid.Versions)1 Engine (org.elasticsearch.index.engine.Engine)1