Examples with DocValueFormat - org.elasticsearch.search.DocValueFormat

Example 6 with DocValueFormat

use of org.elasticsearch.search.DocValueFormat in project elasticsearch by elastic.

the class StringTermsTests method createTestInstance.

@Override
protected InternalTerms<?, ?> createTestInstance(String name, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
    Terms.Order order = Terms.Order.count(false);
    long minDocCount = 1;
    int requiredSize = 3;
    int shardSize = requiredSize + 2;
    DocValueFormat format = DocValueFormat.RAW;
    boolean showTermDocCountError = false;
    long docCountError = -1;
    long otherDocCount = 0;
    List<StringTerms.Bucket> buckets = new ArrayList<>();
    final int numBuckets = randomInt(shardSize);
    Set<BytesRef> terms = new HashSet<>();
    for (int i = 0; i < numBuckets; ++i) {
        BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAsciiOfLength(10)));
        int docCount = randomIntBetween(1, 100);
        buckets.add(new StringTerms.Bucket(term, docCount, InternalAggregations.EMPTY, showTermDocCountError, docCountError, format));
    }
    return new StringTerms(name, order, requiredSize, minDocCount, pipelineAggregators, metaData, format, shardSize, showTermDocCountError, otherDocCount, buckets, docCountError);
}

Also used : DocValueFormat(org.elasticsearch.search.DocValueFormat) ArrayList(java.util.ArrayList) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet)

Example 7 with DocValueFormat

use of org.elasticsearch.search.DocValueFormat in project elasticsearch by elastic.

the class SortBuilder method buildSort.

public static Optional<SortAndFormats> buildSort(List<SortBuilder<?>> sortBuilders, QueryShardContext context) throws IOException {
    List<SortField> sortFields = new ArrayList<>(sortBuilders.size());
    List<DocValueFormat> sortFormats = new ArrayList<>(sortBuilders.size());
    for (SortBuilder<?> builder : sortBuilders) {
        SortFieldAndFormat sf = builder.build(context);
        sortFields.add(sf.field);
        sortFormats.add(sf.format);
    }
    if (!sortFields.isEmpty()) {
        // optimize if we just sort on score non reversed, we don't really
        // need sorting
        boolean sort;
        if (sortFields.size() > 1) {
            sort = true;
        } else {
            SortField sortField = sortFields.get(0);
            if (sortField.getType() == SortField.Type.SCORE && !sortField.getReverse()) {
                sort = false;
            } else {
                sort = true;
            }
        }
        if (sort) {
            return Optional.of(new SortAndFormats(new Sort(sortFields.toArray(new SortField[sortFields.size()])), sortFormats.toArray(new DocValueFormat[sortFormats.size()])));
        }
    }
    return Optional.empty();
}

Also used : DocValueFormat(org.elasticsearch.search.DocValueFormat) ArrayList(java.util.ArrayList) Sort(org.apache.lucene.search.Sort) SortField(org.apache.lucene.search.SortField)

Example 8 with DocValueFormat

use of org.elasticsearch.search.DocValueFormat in project elasticsearch by elastic.

the class SignificantTermsAggregatorFactory method doCreateInternal.

@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
    if (collectsFromSingleBucket == false) {
        return asMultiBucketAggregator(this, context, parent);
    }
    numberOfAggregatorsCreated++;
    BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
    if (bucketCountThresholds.getShardSize() == SignificantTermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
        // The user has not made a shardSize selection .
        // Use default heuristic to avoid any wrong-ranking caused by
        // distributed counting
        // but request double the usual amount.
        // We typically need more than the number of "top" terms requested
        // by other aggregations
        // as the significance algorithm is in less of a position to
        // down-select at shard-level -
        // some of the things we want to find have only one occurrence on
        // each shard and as
        // such are impossible to differentiate from non-significant terms
        // at that early stage.
        bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
    }
    if (valuesSource instanceof ValuesSource.Bytes) {
        ExecutionMode execution = null;
        if (executionHint != null) {
            execution = ExecutionMode.fromString(executionHint);
        }
        if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
            execution = ExecutionMode.MAP;
        }
        if (execution == null) {
            if (Aggregator.descendsFromBucketAggregator(parent)) {
                execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
            } else {
                execution = ExecutionMode.GLOBAL_ORDINALS;
            }
        }
        assert execution != null;
        DocValueFormat format = config.format();
        if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
            throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
        }
        return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic, this, pipelineAggregators, metaData);
    }
    if ((includeExclude != null) && (includeExclude.isRegexBased())) {
        throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
    }
    if (valuesSource instanceof ValuesSource.Numeric) {
        if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
            throw new UnsupportedOperationException("No support for examining floating point numerics");
        }
        IncludeExclude.LongFilter longFilter = null;
        if (includeExclude != null) {
            longFilter = includeExclude.convertToLongFilter(config.format());
        }
        return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), bucketCountThresholds, context, parent, significanceHeuristic, this, longFilter, pipelineAggregators, metaData);
    }
    throw new AggregationExecutionException("significant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}

Also used : BucketCountThresholds(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds) DocValueFormat(org.elasticsearch.search.DocValueFormat) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) ValuesSource(org.elasticsearch.search.aggregations.support.ValuesSource) AggregationExecutionException(org.elasticsearch.search.aggregations.AggregationExecutionException)

Example 9 with DocValueFormat

use of org.elasticsearch.search.DocValueFormat in project elasticsearch by elastic.

the class DerivativePipelineAggregationBuilder method createInternal.

@Override
protected PipelineAggregator createInternal(Map<String, Object> metaData) throws IOException {
    DocValueFormat formatter;
    if (format != null) {
        formatter = new DocValueFormat.Decimal(format);
    } else {
        formatter = DocValueFormat.RAW;
    }
    Long xAxisUnits = null;
    if (units != null) {
        DateTimeUnit dateTimeUnit = DateHistogramAggregationBuilder.DATE_FIELD_UNITS.get(units);
        if (dateTimeUnit != null) {
            xAxisUnits = dateTimeUnit.field(DateTimeZone.UTC).getDurationField().getUnitMillis();
        } else {
            TimeValue timeValue = TimeValue.parseTimeValue(units, null, getClass().getSimpleName() + ".unit");
            if (timeValue != null) {
                xAxisUnits = timeValue.getMillis();
            }
        }
    }
    return new DerivativePipelineAggregator(name, bucketsPaths, formatter, gapPolicy, xAxisUnits, metaData);
}

Also used : DocValueFormat(org.elasticsearch.search.DocValueFormat) DateTimeUnit(org.elasticsearch.common.rounding.DateTimeUnit) TimeValue(org.elasticsearch.common.unit.TimeValue)

Example 10 with DocValueFormat

use of org.elasticsearch.search.DocValueFormat in project elasticsearch by elastic.

the class QueryPhase method execute.

/**
     * In a package-private method so that it can be tested without having to
     * wire everything (mapperService, etc.)
     * @return whether the rescoring phase should be executed
     */
static boolean execute(SearchContext searchContext, final IndexSearcher searcher) throws QueryPhaseExecutionException {
    QuerySearchResult queryResult = searchContext.queryResult();
    queryResult.searchTimedOut(false);
    final boolean doProfile = searchContext.getProfilers() != null;
    final SearchType searchType = searchContext.searchType();
    boolean rescore = false;
    try {
        queryResult.from(searchContext.from());
        queryResult.size(searchContext.size());
        Query query = searchContext.query();
        final int totalNumDocs = searcher.getIndexReader().numDocs();
        int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
        Collector collector;
        Callable<TopDocs> topDocsCallable;
        DocValueFormat[] sortValueFormats = new DocValueFormat[0];
        // already rewritten
        assert query == searcher.rewrite(query);
        if (searchContext.size() == 0) {
            // no matter what the value of from is
            final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
            collector = totalHitCountCollector;
            if (searchContext.getProfilers() != null) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_COUNT, Collections.emptyList());
            }
            topDocsCallable = new Callable<TopDocs>() {

                @Override
                public TopDocs call() throws Exception {
                    return new TopDocs(totalHitCountCollector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0);
                }
            };
        } else {
            // Perhaps have a dedicated scroll phase?
            final ScrollContext scrollContext = searchContext.scrollContext();
            assert (scrollContext != null) == (searchContext.request().scroll() != null);
            final Collector topDocsCollector;
            ScoreDoc after = null;
            if (searchContext.request().scroll() != null) {
                numDocs = Math.min(searchContext.size(), totalNumDocs);
                after = scrollContext.lastEmittedDoc;
                if (returnsDocsInOrder(query, searchContext.sort())) {
                    if (scrollContext.totalHits == -1) {
                        // first round
                        assert scrollContext.lastEmittedDoc == null;
                    // there is not much that we can optimize here since we want to collect all
                    // documents in order to get the total number of hits
                    } else {
                        // skip to the desired doc and stop collecting after ${size} matches
                        if (scrollContext.lastEmittedDoc != null) {
                            BooleanQuery bq = new BooleanQuery.Builder().add(query, BooleanClause.Occur.MUST).add(new MinDocQuery(after.doc + 1), BooleanClause.Occur.FILTER).build();
                            query = bq;
                        }
                        searchContext.terminateAfter(numDocs);
                    }
                }
            } else {
                after = searchContext.searchAfter();
            }
            if (totalNumDocs == 0) {
                // top collectors don't like a size of 0
                numDocs = 1;
            }
            assert numDocs > 0;
            if (searchContext.collapse() == null) {
                if (searchContext.sort() != null) {
                    SortAndFormats sf = searchContext.sort();
                    topDocsCollector = TopFieldCollector.create(sf.sort, numDocs, (FieldDoc) after, true, searchContext.trackScores(), searchContext.trackScores());
                    sortValueFormats = sf.formats;
                } else {
                    rescore = !searchContext.rescore().isEmpty();
                    for (RescoreSearchContext rescoreContext : searchContext.rescore()) {
                        numDocs = Math.max(rescoreContext.window(), numDocs);
                    }
                    topDocsCollector = TopScoreDocCollector.create(numDocs, after);
                }
            } else {
                Sort sort = Sort.RELEVANCE;
                if (searchContext.sort() != null) {
                    sort = searchContext.sort().sort;
                }
                CollapseContext collapse = searchContext.collapse();
                topDocsCollector = collapse.createTopDocs(sort, numDocs, searchContext.trackScores());
                if (searchContext.sort() == null) {
                    sortValueFormats = new DocValueFormat[] { DocValueFormat.RAW };
                } else {
                    sortValueFormats = searchContext.sort().formats;
                }
            }
            collector = topDocsCollector;
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TOP_HITS, Collections.emptyList());
            }
            topDocsCallable = new Callable<TopDocs>() {

                @Override
                public TopDocs call() throws Exception {
                    final TopDocs topDocs;
                    if (topDocsCollector instanceof TopDocsCollector) {
                        topDocs = ((TopDocsCollector<?>) topDocsCollector).topDocs();
                    } else if (topDocsCollector instanceof CollapsingTopDocsCollector) {
                        topDocs = ((CollapsingTopDocsCollector) topDocsCollector).getTopDocs();
                    } else {
                        throw new IllegalStateException("Unknown top docs collector " + topDocsCollector.getClass().getName());
                    }
                    if (scrollContext != null) {
                        if (scrollContext.totalHits == -1) {
                            // first round
                            scrollContext.totalHits = topDocs.totalHits;
                            scrollContext.maxScore = topDocs.getMaxScore();
                        } else {
                            // subsequent round: the total number of hits and
                            // the maximum score were computed on the first round
                            topDocs.totalHits = scrollContext.totalHits;
                            topDocs.setMaxScore(scrollContext.maxScore);
                        }
                        if (searchContext.request().numberOfShards() == 1) {
                            // if we fetch the document in the same roundtrip, we already know the last emitted doc
                            if (topDocs.scoreDocs.length > 0) {
                                // set the last emitted doc
                                scrollContext.lastEmittedDoc = topDocs.scoreDocs[topDocs.scoreDocs.length - 1];
                            }
                        }
                    }
                    return topDocs;
                }
            };
        }
        final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER;
        if (terminateAfterSet) {
            final Collector child = collector;
            // throws Lucene.EarlyTerminationException when given count is reached
            collector = Lucene.wrapCountBasedEarlyTerminatingCollector(collector, searchContext.terminateAfter());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TERMINATE_AFTER_COUNT, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (searchContext.parsedPostFilter() != null) {
            final Collector child = collector;
            // this will only get applied to the actual search collector and not
            // to any scoped collectors, also, it will only be applied to the main collector
            // since that is where the filter should only work
            final Weight filterWeight = searcher.createNormalizedWeight(searchContext.parsedPostFilter().query(), false);
            collector = new FilteredCollector(collector, filterWeight);
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_POST_FILTER, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        // plug in additional collectors, like aggregations
        final List<Collector> subCollectors = new ArrayList<>();
        subCollectors.add(collector);
        subCollectors.addAll(searchContext.queryCollectors().values());
        collector = MultiCollector.wrap(subCollectors);
        if (doProfile && collector instanceof InternalProfileCollector == false) {
            // When there is a single collector to wrap, MultiCollector returns it
            // directly, so only wrap in the case that there are several sub collectors
            final List<InternalProfileCollector> children = new AbstractList<InternalProfileCollector>() {

                @Override
                public InternalProfileCollector get(int index) {
                    return (InternalProfileCollector) subCollectors.get(index);
                }

                @Override
                public int size() {
                    return subCollectors.size();
                }
            };
            collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MULTI, children);
        }
        // apply the minimum score after multi collector so we filter aggs as well
        if (searchContext.minimumScore() != null) {
            final Collector child = collector;
            collector = new MinimumScoreCollector(collector, searchContext.minimumScore());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MIN_SCORE, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (collector.getClass() == TotalHitCountCollector.class) {
            // instead of using a collector
            while (true) {
                // a constant_score query
                if (query instanceof ConstantScoreQuery) {
                    query = ((ConstantScoreQuery) query).getQuery();
                } else {
                    break;
                }
            }
            if (query.getClass() == MatchAllDocsQuery.class) {
                collector = null;
                topDocsCallable = new Callable<TopDocs>() {

                    @Override
                    public TopDocs call() throws Exception {
                        int count = searcher.getIndexReader().numDocs();
                        return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
                    }
                };
            } else if (query.getClass() == TermQuery.class && searcher.getIndexReader().hasDeletions() == false) {
                final Term term = ((TermQuery) query).getTerm();
                collector = null;
                topDocsCallable = new Callable<TopDocs>() {

                    @Override
                    public TopDocs call() throws Exception {
                        int count = 0;
                        for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
                            count += context.reader().docFreq(term);
                        }
                        return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
                    }
                };
            }
        }
        final boolean timeoutSet = searchContext.timeout() != null && !searchContext.timeout().equals(SearchService.NO_TIMEOUT);
        if (timeoutSet && collector != null) {
            // collector might be null if no collection is actually needed
            final Collector child = collector;
            // TODO: change to use our own counter that uses the scheduler in ThreadPool
            // throws TimeLimitingCollector.TimeExceededException when timeout has reached
            collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeout().millis());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TIMEOUT, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (collector != null) {
            final Collector child = collector;
            collector = new CancellableCollector(searchContext.getTask()::isCancelled, searchContext.lowLevelCancellation(), collector);
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_CANCELLED, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        try {
            if (collector != null) {
                if (doProfile) {
                    searchContext.getProfilers().getCurrentQueryProfiler().setCollector((InternalProfileCollector) collector);
                }
                searcher.search(query, collector);
            }
        } catch (TimeLimitingCollector.TimeExceededException e) {
            assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
            queryResult.searchTimedOut(true);
        } catch (Lucene.EarlyTerminationException e) {
            assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set";
            queryResult.terminatedEarly(true);
        } finally {
            searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
        }
        if (terminateAfterSet && queryResult.terminatedEarly() == null) {
            queryResult.terminatedEarly(false);
        }
        queryResult.topDocs(topDocsCallable.call(), sortValueFormats);
        if (searchContext.getProfilers() != null) {
            ProfileShardResult shardResults = SearchProfileShardResults.buildShardResults(searchContext.getProfilers());
            searchContext.queryResult().profileResults(shardResults);
        }
        return rescore;
    } catch (Exception e) {
        throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e);
    }
}

Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) MinDocQuery(org.apache.lucene.queries.MinDocQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) FieldDoc(org.apache.lucene.search.FieldDoc) RescoreSearchContext(org.elasticsearch.search.rescore.RescoreSearchContext) ArrayList(java.util.ArrayList) TimeLimitingCollector(org.apache.lucene.search.TimeLimitingCollector) Lucene(org.elasticsearch.common.lucene.Lucene) Callable(java.util.concurrent.Callable) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MinDocQuery(org.apache.lucene.queries.MinDocQuery) FilteredCollector(org.elasticsearch.common.lucene.search.FilteredCollector) MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector) TimeLimitingCollector(org.apache.lucene.search.TimeLimitingCollector) FilteredCollector(org.elasticsearch.common.lucene.search.FilteredCollector) MultiCollector(org.apache.lucene.search.MultiCollector) InternalProfileCollector(org.elasticsearch.search.profile.query.InternalProfileCollector) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) Collector(org.apache.lucene.search.Collector) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) Sort(org.apache.lucene.search.Sort) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) SearchType(org.elasticsearch.action.search.SearchType) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) ProfileShardResult(org.elasticsearch.search.profile.ProfileShardResult) AbstractList(java.util.AbstractList) DocValueFormat(org.elasticsearch.search.DocValueFormat) ScrollContext(org.elasticsearch.search.internal.ScrollContext) Term(org.apache.lucene.index.Term) SortAndFormats(org.elasticsearch.search.sort.SortAndFormats) Weight(org.apache.lucene.search.Weight) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) InternalProfileCollector(org.elasticsearch.search.profile.query.InternalProfileCollector) CollapseContext(org.elasticsearch.search.collapse.CollapseContext)

Aggregations

DocValueFormat (org.elasticsearch.search.DocValueFormat)18 ArrayList (java.util.ArrayList)10 HashSet (java.util.HashSet)5 FieldDoc (org.apache.lucene.search.FieldDoc)2 ScoreDoc (org.apache.lucene.search.ScoreDoc)2 Sort (org.apache.lucene.search.Sort)2 SortField (org.apache.lucene.search.SortField)2 TopDocs (org.apache.lucene.search.TopDocs)2 BytesRef (org.apache.lucene.util.BytesRef)2 IOException (java.io.IOException)1 UncheckedIOException (java.io.UncheckedIOException)1 AbstractList (java.util.AbstractList)1 Callable (java.util.concurrent.Callable)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 DoubleHistogram (org.HdrHistogram.DoubleHistogram)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 Term (org.apache.lucene.index.Term)1 MinDocQuery (org.apache.lucene.queries.MinDocQuery)1 BooleanQuery (org.apache.lucene.search.BooleanQuery)1 Collector (org.apache.lucene.search.Collector)1