use of org.elasticsearch.search.DocValueFormat in project elasticsearch by elastic.
the class StringTermsTests method createTestInstance.
@Override
protected InternalTerms<?, ?> createTestInstance(String name, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
Terms.Order order = Terms.Order.count(false);
long minDocCount = 1;
int requiredSize = 3;
int shardSize = requiredSize + 2;
DocValueFormat format = DocValueFormat.RAW;
boolean showTermDocCountError = false;
long docCountError = -1;
long otherDocCount = 0;
List<StringTerms.Bucket> buckets = new ArrayList<>();
final int numBuckets = randomInt(shardSize);
Set<BytesRef> terms = new HashSet<>();
for (int i = 0; i < numBuckets; ++i) {
BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAsciiOfLength(10)));
int docCount = randomIntBetween(1, 100);
buckets.add(new StringTerms.Bucket(term, docCount, InternalAggregations.EMPTY, showTermDocCountError, docCountError, format));
}
return new StringTerms(name, order, requiredSize, minDocCount, pipelineAggregators, metaData, format, shardSize, showTermDocCountError, otherDocCount, buckets, docCountError);
}
use of org.elasticsearch.search.DocValueFormat in project elasticsearch by elastic.
the class SortBuilder method buildSort.
public static Optional<SortAndFormats> buildSort(List<SortBuilder<?>> sortBuilders, QueryShardContext context) throws IOException {
List<SortField> sortFields = new ArrayList<>(sortBuilders.size());
List<DocValueFormat> sortFormats = new ArrayList<>(sortBuilders.size());
for (SortBuilder<?> builder : sortBuilders) {
SortFieldAndFormat sf = builder.build(context);
sortFields.add(sf.field);
sortFormats.add(sf.format);
}
if (!sortFields.isEmpty()) {
// optimize if we just sort on score non reversed, we don't really
// need sorting
boolean sort;
if (sortFields.size() > 1) {
sort = true;
} else {
SortField sortField = sortFields.get(0);
if (sortField.getType() == SortField.Type.SCORE && !sortField.getReverse()) {
sort = false;
} else {
sort = true;
}
}
if (sort) {
return Optional.of(new SortAndFormats(new Sort(sortFields.toArray(new SortField[sortFields.size()])), sortFormats.toArray(new DocValueFormat[sortFormats.size()])));
}
}
return Optional.empty();
}
use of org.elasticsearch.search.DocValueFormat in project elasticsearch by elastic.
the class SignificantTermsAggregatorFactory method doCreateInternal.
@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
if (collectsFromSingleBucket == false) {
return asMultiBucketAggregator(this, context, parent);
}
numberOfAggregatorsCreated++;
BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
if (bucketCountThresholds.getShardSize() == SignificantTermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
// The user has not made a shardSize selection .
// Use default heuristic to avoid any wrong-ranking caused by
// distributed counting
// but request double the usual amount.
// We typically need more than the number of "top" terms requested
// by other aggregations
// as the significance algorithm is in less of a position to
// down-select at shard-level -
// some of the things we want to find have only one occurrence on
// each shard and as
// such are impossible to differentiate from non-significant terms
// at that early stage.
bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
}
if (valuesSource instanceof ValuesSource.Bytes) {
ExecutionMode execution = null;
if (executionHint != null) {
execution = ExecutionMode.fromString(executionHint);
}
if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
execution = ExecutionMode.MAP;
}
if (execution == null) {
if (Aggregator.descendsFromBucketAggregator(parent)) {
execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
} else {
execution = ExecutionMode.GLOBAL_ORDINALS;
}
}
assert execution != null;
DocValueFormat format = config.format();
if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
}
return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic, this, pipelineAggregators, metaData);
}
if ((includeExclude != null) && (includeExclude.isRegexBased())) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
}
if (valuesSource instanceof ValuesSource.Numeric) {
if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
throw new UnsupportedOperationException("No support for examining floating point numerics");
}
IncludeExclude.LongFilter longFilter = null;
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter(config.format());
}
return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), bucketCountThresholds, context, parent, significanceHeuristic, this, longFilter, pipelineAggregators, metaData);
}
throw new AggregationExecutionException("significant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}
use of org.elasticsearch.search.DocValueFormat in project elasticsearch by elastic.
the class DerivativePipelineAggregationBuilder method createInternal.
@Override
protected PipelineAggregator createInternal(Map<String, Object> metaData) throws IOException {
DocValueFormat formatter;
if (format != null) {
formatter = new DocValueFormat.Decimal(format);
} else {
formatter = DocValueFormat.RAW;
}
Long xAxisUnits = null;
if (units != null) {
DateTimeUnit dateTimeUnit = DateHistogramAggregationBuilder.DATE_FIELD_UNITS.get(units);
if (dateTimeUnit != null) {
xAxisUnits = dateTimeUnit.field(DateTimeZone.UTC).getDurationField().getUnitMillis();
} else {
TimeValue timeValue = TimeValue.parseTimeValue(units, null, getClass().getSimpleName() + ".unit");
if (timeValue != null) {
xAxisUnits = timeValue.getMillis();
}
}
}
return new DerivativePipelineAggregator(name, bucketsPaths, formatter, gapPolicy, xAxisUnits, metaData);
}
use of org.elasticsearch.search.DocValueFormat in project elasticsearch by elastic.
the class QueryPhase method execute.
/**
* In a package-private method so that it can be tested without having to
* wire everything (mapperService, etc.)
* @return whether the rescoring phase should be executed
*/
static boolean execute(SearchContext searchContext, final IndexSearcher searcher) throws QueryPhaseExecutionException {
QuerySearchResult queryResult = searchContext.queryResult();
queryResult.searchTimedOut(false);
final boolean doProfile = searchContext.getProfilers() != null;
final SearchType searchType = searchContext.searchType();
boolean rescore = false;
try {
queryResult.from(searchContext.from());
queryResult.size(searchContext.size());
Query query = searchContext.query();
final int totalNumDocs = searcher.getIndexReader().numDocs();
int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
Collector collector;
Callable<TopDocs> topDocsCallable;
DocValueFormat[] sortValueFormats = new DocValueFormat[0];
// already rewritten
assert query == searcher.rewrite(query);
if (searchContext.size() == 0) {
// no matter what the value of from is
final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
collector = totalHitCountCollector;
if (searchContext.getProfilers() != null) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_COUNT, Collections.emptyList());
}
topDocsCallable = new Callable<TopDocs>() {
@Override
public TopDocs call() throws Exception {
return new TopDocs(totalHitCountCollector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0);
}
};
} else {
// Perhaps have a dedicated scroll phase?
final ScrollContext scrollContext = searchContext.scrollContext();
assert (scrollContext != null) == (searchContext.request().scroll() != null);
final Collector topDocsCollector;
ScoreDoc after = null;
if (searchContext.request().scroll() != null) {
numDocs = Math.min(searchContext.size(), totalNumDocs);
after = scrollContext.lastEmittedDoc;
if (returnsDocsInOrder(query, searchContext.sort())) {
if (scrollContext.totalHits == -1) {
// first round
assert scrollContext.lastEmittedDoc == null;
// there is not much that we can optimize here since we want to collect all
// documents in order to get the total number of hits
} else {
// skip to the desired doc and stop collecting after ${size} matches
if (scrollContext.lastEmittedDoc != null) {
BooleanQuery bq = new BooleanQuery.Builder().add(query, BooleanClause.Occur.MUST).add(new MinDocQuery(after.doc + 1), BooleanClause.Occur.FILTER).build();
query = bq;
}
searchContext.terminateAfter(numDocs);
}
}
} else {
after = searchContext.searchAfter();
}
if (totalNumDocs == 0) {
// top collectors don't like a size of 0
numDocs = 1;
}
assert numDocs > 0;
if (searchContext.collapse() == null) {
if (searchContext.sort() != null) {
SortAndFormats sf = searchContext.sort();
topDocsCollector = TopFieldCollector.create(sf.sort, numDocs, (FieldDoc) after, true, searchContext.trackScores(), searchContext.trackScores());
sortValueFormats = sf.formats;
} else {
rescore = !searchContext.rescore().isEmpty();
for (RescoreSearchContext rescoreContext : searchContext.rescore()) {
numDocs = Math.max(rescoreContext.window(), numDocs);
}
topDocsCollector = TopScoreDocCollector.create(numDocs, after);
}
} else {
Sort sort = Sort.RELEVANCE;
if (searchContext.sort() != null) {
sort = searchContext.sort().sort;
}
CollapseContext collapse = searchContext.collapse();
topDocsCollector = collapse.createTopDocs(sort, numDocs, searchContext.trackScores());
if (searchContext.sort() == null) {
sortValueFormats = new DocValueFormat[] { DocValueFormat.RAW };
} else {
sortValueFormats = searchContext.sort().formats;
}
}
collector = topDocsCollector;
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TOP_HITS, Collections.emptyList());
}
topDocsCallable = new Callable<TopDocs>() {
@Override
public TopDocs call() throws Exception {
final TopDocs topDocs;
if (topDocsCollector instanceof TopDocsCollector) {
topDocs = ((TopDocsCollector<?>) topDocsCollector).topDocs();
} else if (topDocsCollector instanceof CollapsingTopDocsCollector) {
topDocs = ((CollapsingTopDocsCollector) topDocsCollector).getTopDocs();
} else {
throw new IllegalStateException("Unknown top docs collector " + topDocsCollector.getClass().getName());
}
if (scrollContext != null) {
if (scrollContext.totalHits == -1) {
// first round
scrollContext.totalHits = topDocs.totalHits;
scrollContext.maxScore = topDocs.getMaxScore();
} else {
// subsequent round: the total number of hits and
// the maximum score were computed on the first round
topDocs.totalHits = scrollContext.totalHits;
topDocs.setMaxScore(scrollContext.maxScore);
}
if (searchContext.request().numberOfShards() == 1) {
// if we fetch the document in the same roundtrip, we already know the last emitted doc
if (topDocs.scoreDocs.length > 0) {
// set the last emitted doc
scrollContext.lastEmittedDoc = topDocs.scoreDocs[topDocs.scoreDocs.length - 1];
}
}
}
return topDocs;
}
};
}
final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER;
if (terminateAfterSet) {
final Collector child = collector;
// throws Lucene.EarlyTerminationException when given count is reached
collector = Lucene.wrapCountBasedEarlyTerminatingCollector(collector, searchContext.terminateAfter());
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TERMINATE_AFTER_COUNT, Collections.singletonList((InternalProfileCollector) child));
}
}
if (searchContext.parsedPostFilter() != null) {
final Collector child = collector;
// this will only get applied to the actual search collector and not
// to any scoped collectors, also, it will only be applied to the main collector
// since that is where the filter should only work
final Weight filterWeight = searcher.createNormalizedWeight(searchContext.parsedPostFilter().query(), false);
collector = new FilteredCollector(collector, filterWeight);
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_POST_FILTER, Collections.singletonList((InternalProfileCollector) child));
}
}
// plug in additional collectors, like aggregations
final List<Collector> subCollectors = new ArrayList<>();
subCollectors.add(collector);
subCollectors.addAll(searchContext.queryCollectors().values());
collector = MultiCollector.wrap(subCollectors);
if (doProfile && collector instanceof InternalProfileCollector == false) {
// When there is a single collector to wrap, MultiCollector returns it
// directly, so only wrap in the case that there are several sub collectors
final List<InternalProfileCollector> children = new AbstractList<InternalProfileCollector>() {
@Override
public InternalProfileCollector get(int index) {
return (InternalProfileCollector) subCollectors.get(index);
}
@Override
public int size() {
return subCollectors.size();
}
};
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MULTI, children);
}
// apply the minimum score after multi collector so we filter aggs as well
if (searchContext.minimumScore() != null) {
final Collector child = collector;
collector = new MinimumScoreCollector(collector, searchContext.minimumScore());
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MIN_SCORE, Collections.singletonList((InternalProfileCollector) child));
}
}
if (collector.getClass() == TotalHitCountCollector.class) {
// instead of using a collector
while (true) {
// a constant_score query
if (query instanceof ConstantScoreQuery) {
query = ((ConstantScoreQuery) query).getQuery();
} else {
break;
}
}
if (query.getClass() == MatchAllDocsQuery.class) {
collector = null;
topDocsCallable = new Callable<TopDocs>() {
@Override
public TopDocs call() throws Exception {
int count = searcher.getIndexReader().numDocs();
return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
}
};
} else if (query.getClass() == TermQuery.class && searcher.getIndexReader().hasDeletions() == false) {
final Term term = ((TermQuery) query).getTerm();
collector = null;
topDocsCallable = new Callable<TopDocs>() {
@Override
public TopDocs call() throws Exception {
int count = 0;
for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
count += context.reader().docFreq(term);
}
return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
}
};
}
}
final boolean timeoutSet = searchContext.timeout() != null && !searchContext.timeout().equals(SearchService.NO_TIMEOUT);
if (timeoutSet && collector != null) {
// collector might be null if no collection is actually needed
final Collector child = collector;
// TODO: change to use our own counter that uses the scheduler in ThreadPool
// throws TimeLimitingCollector.TimeExceededException when timeout has reached
collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeout().millis());
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TIMEOUT, Collections.singletonList((InternalProfileCollector) child));
}
}
if (collector != null) {
final Collector child = collector;
collector = new CancellableCollector(searchContext.getTask()::isCancelled, searchContext.lowLevelCancellation(), collector);
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_CANCELLED, Collections.singletonList((InternalProfileCollector) child));
}
}
try {
if (collector != null) {
if (doProfile) {
searchContext.getProfilers().getCurrentQueryProfiler().setCollector((InternalProfileCollector) collector);
}
searcher.search(query, collector);
}
} catch (TimeLimitingCollector.TimeExceededException e) {
assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
queryResult.searchTimedOut(true);
} catch (Lucene.EarlyTerminationException e) {
assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set";
queryResult.terminatedEarly(true);
} finally {
searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
}
if (terminateAfterSet && queryResult.terminatedEarly() == null) {
queryResult.terminatedEarly(false);
}
queryResult.topDocs(topDocsCallable.call(), sortValueFormats);
if (searchContext.getProfilers() != null) {
ProfileShardResult shardResults = SearchProfileShardResults.buildShardResults(searchContext.getProfilers());
searchContext.queryResult().profileResults(shardResults);
}
return rescore;
} catch (Exception e) {
throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e);
}
}
Aggregations