Search in sources :

Example 1 with BucketCountThresholds

use of org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds in project elasticsearch by elastic.

the class SignificantTermsAggregatorFactory method doCreateInternal.

@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
    if (collectsFromSingleBucket == false) {
        return asMultiBucketAggregator(this, context, parent);
    }
    numberOfAggregatorsCreated++;
    BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
    if (bucketCountThresholds.getShardSize() == SignificantTermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
        // The user has not made a shardSize selection .
        // Use default heuristic to avoid any wrong-ranking caused by
        // distributed counting
        // but request double the usual amount.
        // We typically need more than the number of "top" terms requested
        // by other aggregations
        // as the significance algorithm is in less of a position to
        // down-select at shard-level -
        // some of the things we want to find have only one occurrence on
        // each shard and as
        // such are impossible to differentiate from non-significant terms
        // at that early stage.
        bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
    }
    if (valuesSource instanceof ValuesSource.Bytes) {
        ExecutionMode execution = null;
        if (executionHint != null) {
            execution = ExecutionMode.fromString(executionHint);
        }
        if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
            execution = ExecutionMode.MAP;
        }
        if (execution == null) {
            if (Aggregator.descendsFromBucketAggregator(parent)) {
                execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
            } else {
                execution = ExecutionMode.GLOBAL_ORDINALS;
            }
        }
        assert execution != null;
        DocValueFormat format = config.format();
        if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
            throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
        }
        return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic, this, pipelineAggregators, metaData);
    }
    if ((includeExclude != null) && (includeExclude.isRegexBased())) {
        throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
    }
    if (valuesSource instanceof ValuesSource.Numeric) {
        if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
            throw new UnsupportedOperationException("No support for examining floating point numerics");
        }
        IncludeExclude.LongFilter longFilter = null;
        if (includeExclude != null) {
            longFilter = includeExclude.convertToLongFilter(config.format());
        }
        return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), bucketCountThresholds, context, parent, significanceHeuristic, this, longFilter, pipelineAggregators, metaData);
    }
    throw new AggregationExecutionException("significant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}
Also used : BucketCountThresholds(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds) DocValueFormat(org.elasticsearch.search.DocValueFormat) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) ValuesSource(org.elasticsearch.search.aggregations.support.ValuesSource) AggregationExecutionException(org.elasticsearch.search.aggregations.AggregationExecutionException)

Example 2 with BucketCountThresholds

use of org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds in project elasticsearch by elastic.

the class TermsAggregatorFactory method doCreateInternal.

@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
    if (collectsFromSingleBucket == false) {
        return asMultiBucketAggregator(this, context, parent);
    }
    BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
    if (!(order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) && bucketCountThresholds.getShardSize() == TermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
        // The user has not made a shardSize selection. Use default
        // heuristic to avoid any wrong-ranking caused by distributed
        // counting
        bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
    }
    bucketCountThresholds.ensureValidity();
    if (valuesSource instanceof ValuesSource.Bytes) {
        ExecutionMode execution = null;
        if (executionHint != null) {
            execution = ExecutionMode.fromString(executionHint);
        }
        // In some cases, using ordinals is just not supported: override it
        if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
            execution = ExecutionMode.MAP;
        }
        final long maxOrd;
        final double ratio;
        if (execution == null || execution.needsGlobalOrdinals()) {
            ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
            IndexSearcher indexSearcher = context.searcher();
            maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher);
            ratio = maxOrd / ((double) indexSearcher.getIndexReader().numDocs());
        } else {
            maxOrd = -1;
            ratio = -1;
        }
        // Let's try to use a good default
        if (execution == null) {
            // ordinals would be sparse so we opt for hash
            if (Aggregator.descendsFromBucketAggregator(parent) || (includeExclude != null && includeExclude.isPartitionBased())) {
                execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
            } else {
                if (factories == AggregatorFactories.EMPTY) {
                    if (ratio <= 0.5 && maxOrd <= 2048) {
                        // 0.5: At least we need reduce the number of global
                        // ordinals look-ups by half
                        // 2048: GLOBAL_ORDINALS_LOW_CARDINALITY has
                        // additional memory usage, which directly linked to
                        // maxOrd, so we need to limit.
                        execution = ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY;
                    } else {
                        execution = ExecutionMode.GLOBAL_ORDINALS;
                    }
                } else {
                    execution = ExecutionMode.GLOBAL_ORDINALS;
                }
            }
        }
        SubAggCollectionMode cm = collectMode;
        if (cm == null) {
            cm = SubAggCollectionMode.DEPTH_FIRST;
            if (factories != AggregatorFactories.EMPTY) {
                cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), maxOrd);
            }
        }
        DocValueFormat format = config.format();
        if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
            throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
        }
        return execution.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, cm, showTermDocCountError, pipelineAggregators, metaData);
    }
    if ((includeExclude != null) && (includeExclude.isRegexBased())) {
        throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
    }
    if (valuesSource instanceof ValuesSource.Numeric) {
        IncludeExclude.LongFilter longFilter = null;
        SubAggCollectionMode cm = collectMode;
        if (cm == null) {
            if (factories != AggregatorFactories.EMPTY) {
                cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), -1);
            } else {
                cm = SubAggCollectionMode.DEPTH_FIRST;
            }
        }
        if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
            if (includeExclude != null) {
                longFilter = includeExclude.convertToDoubleFilter();
            }
            return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order, bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter, pipelineAggregators, metaData);
        }
        if (includeExclude != null) {
            longFilter = includeExclude.convertToLongFilter(config.format());
        }
        return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order, bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter, pipelineAggregators, metaData);
    }
    throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) DocValueFormat(org.elasticsearch.search.DocValueFormat) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) ValuesSource(org.elasticsearch.search.aggregations.support.ValuesSource) BucketCountThresholds(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds) SubAggCollectionMode(org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode) AggregationExecutionException(org.elasticsearch.search.aggregations.AggregationExecutionException)

Aggregations

DocValueFormat (org.elasticsearch.search.DocValueFormat)2 AggregationExecutionException (org.elasticsearch.search.aggregations.AggregationExecutionException)2 BucketCountThresholds (org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds)2 IncludeExclude (org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude)2 ValuesSource (org.elasticsearch.search.aggregations.support.ValuesSource)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 SubAggCollectionMode (org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode)1