Search in sources :

Example 16 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class DruidQuery method toGroupByQuery.

/**
 * Return this query as a GroupBy query, or null if this query is not compatible with GroupBy.
 *
 * @return query or null
 */
@Nullable
private GroupByQuery toGroupByQuery(final QueryFeatureInspector queryFeatureInspector) {
    if (grouping == null) {
        return null;
    }
    if (sorting != null && sorting.getOffsetLimit().hasLimit() && sorting.getOffsetLimit().getLimit() <= 0) {
        // Cannot handle zero or negative limits.
        return null;
    }
    final Pair<DataSource, Filtration> dataSourceFiltrationPair = getFiltration(dataSource, filter, virtualColumnRegistry);
    final DataSource newDataSource = dataSourceFiltrationPair.lhs;
    final Filtration filtration = dataSourceFiltrationPair.rhs;
    final DimFilterHavingSpec havingSpec;
    if (grouping.getHavingFilter() != null) {
        havingSpec = new DimFilterHavingSpec(Filtration.create(grouping.getHavingFilter()).optimizeFilterOnly(grouping.getOutputRowSignature()).getDimFilter(), true);
    } else {
        havingSpec = null;
    }
    final List<PostAggregator> postAggregators = new ArrayList<>(grouping.getPostAggregators());
    if (sorting != null && sorting.getProjection() != null) {
        postAggregators.addAll(sorting.getProjection().getPostAggregators());
    }
    GroupByQuery query = new GroupByQuery(newDataSource, filtration.getQuerySegmentSpec(), getVirtualColumns(true), filtration.getDimFilter(), Granularities.ALL, grouping.getDimensionSpecs(), grouping.getAggregatorFactories(), postAggregators, havingSpec, Optional.ofNullable(sorting).orElse(Sorting.none()).limitSpec(), grouping.getSubtotals().toSubtotalsSpec(grouping.getDimensionSpecs()), ImmutableSortedMap.copyOf(plannerContext.getQueryContext()));
    // We don't apply timestamp computation optimization yet when limit is pushed down. Maybe someday.
    if (query.getLimitSpec() instanceof DefaultLimitSpec && query.isApplyLimitPushDown()) {
        return query;
    }
    Map<String, Object> theContext = new HashMap<>();
    Granularity queryGranularity = null;
    // now, part of the query plan logic is handled in GroupByStrategyV2.
    if (!grouping.getDimensions().isEmpty()) {
        for (DimensionExpression dimensionExpression : grouping.getDimensions()) {
            Granularity granularity = Expressions.toQueryGranularity(dimensionExpression.getDruidExpression(), plannerContext.getExprMacroTable());
            if (granularity == null) {
                continue;
            }
            if (queryGranularity != null) {
                // group by more than one timestamp_floor
                // eg: group by timestamp_floor(__time to DAY),timestamp_floor(__time, to HOUR)
                queryGranularity = null;
                break;
            }
            queryGranularity = granularity;
            int timestampDimensionIndexInDimensions = grouping.getDimensions().indexOf(dimensionExpression);
            // these settings will only affect the most inner query sent to the down streaming compute nodes
            theContext.put(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD, dimensionExpression.getOutputName());
            theContext.put(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_INDEX, timestampDimensionIndexInDimensions);
            theContext.put(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_GRANULARITY, queryGranularity);
        }
    }
    if (queryGranularity == null) {
        return query;
    }
    return query.withOverriddenContext(theContext);
}
Also used : DimFilterHavingSpec(org.apache.druid.query.groupby.having.DimFilterHavingSpec) Filtration(org.apache.druid.sql.calcite.filtration.Filtration) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) HashMap(java.util.HashMap) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) ArrayList(java.util.ArrayList) DimensionExpression(org.apache.druid.sql.calcite.aggregation.DimensionExpression) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) JoinDataSource(org.apache.druid.query.JoinDataSource) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Nullable(javax.annotation.Nullable)

Example 17 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class TopNQueryEngine method query.

/**
 * Do the thing - process a {@link StorageAdapter} into a {@link Sequence} of {@link TopNResultValue}, with one of the
 * fine {@link TopNAlgorithm} available chosen based on the type of column being aggregated. The algorithm provides a
 * mapping function to process rows from the adapter {@link org.apache.druid.segment.Cursor} to apply
 * {@link AggregatorFactory} and create or update {@link TopNResultValue}
 */
public Sequence<Result<TopNResultValue>> query(final TopNQuery query, final StorageAdapter adapter, @Nullable final TopNQueryMetrics queryMetrics) {
    if (adapter == null) {
        throw new SegmentMissingException("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
    }
    final List<Interval> queryIntervals = query.getQuerySegmentSpec().getIntervals();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
    final Granularity granularity = query.getGranularity();
    final TopNMapFn mapFn = getMapFn(query, adapter, queryMetrics);
    Preconditions.checkArgument(queryIntervals.size() == 1, "Can only handle a single interval, got[%s]", queryIntervals);
    return Sequences.filter(Sequences.map(adapter.makeCursors(filter, queryIntervals.get(0), query.getVirtualColumns(), granularity, query.isDescending(), queryMetrics), input -> {
        if (queryMetrics != null) {
            queryMetrics.cursor(input);
        }
        return mapFn.apply(input, queryMetrics);
    }), Predicates.notNull());
}
Also used : Sequence(org.apache.druid.java.util.common.guava.Sequence) Granularity(org.apache.druid.java.util.common.granularity.Granularity) NonBlockingPool(org.apache.druid.collections.NonBlockingPool) ExtractionFn(org.apache.druid.query.extraction.ExtractionFn) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) ValueType(org.apache.druid.segment.column.ValueType) SegmentMissingException(org.apache.druid.segment.SegmentMissingException) StorageAdapter(org.apache.druid.segment.StorageAdapter) ByteBuffer(java.nio.ByteBuffer) Result(org.apache.druid.query.Result) Interval(org.joda.time.Interval) List(java.util.List) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) Types(org.apache.druid.segment.column.Types) Predicates(com.google.common.base.Predicates) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Preconditions(com.google.common.base.Preconditions) Filters(org.apache.druid.segment.filter.Filters) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) Filter(org.apache.druid.query.filter.Filter) Filter(org.apache.druid.query.filter.Filter) SegmentMissingException(org.apache.druid.segment.SegmentMissingException) Granularity(org.apache.druid.java.util.common.granularity.Granularity) Interval(org.joda.time.Interval)

Example 18 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class Expressions method toQueryGranularity.

/**
 * Converts an expression to a Granularity, if possible. This is possible if, and only if, the expression
 * is a timestamp_floor function on the __time column with literal parameters for period, origin, and timeZone.
 *
 * @return granularity or null if not possible
 */
@Nullable
public static Granularity toQueryGranularity(final DruidExpression expression, final ExprMacroTable macroTable) {
    final TimestampFloorExprMacro.TimestampFloorExpr expr = asTimestampFloorExpr(expression, macroTable);
    if (expr == null) {
        return null;
    }
    final Expr arg = expr.getArg();
    final Granularity granularity = expr.getGranularity();
    if (ColumnHolder.TIME_COLUMN_NAME.equals(arg.getBindingIfIdentifier())) {
        return granularity;
    } else {
        return null;
    }
}
Also used : TimestampFloorExprMacro(org.apache.druid.query.expression.TimestampFloorExprMacro) Expr(org.apache.druid.math.expr.Expr) Granularity(org.apache.druid.java.util.common.granularity.Granularity) Nullable(javax.annotation.Nullable)

Example 19 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class ResultGranularTimestampComparatorTest method testCompareDay.

@Test
public void testCompareDay() {
    Result<Object> res = new Result<Object>(time, null);
    Result<Object> same = new Result<Object>(time.plusHours(12), null);
    Result<Object> greater = new Result<Object>(time.plusHours(25), null);
    Result<Object> less = new Result<Object>(time.minusHours(1), null);
    Granularity day = Granularities.DAY;
    Assert.assertEquals(ResultGranularTimestampComparator.create(day, descending).compare(res, same), 0);
    Assert.assertEquals(ResultGranularTimestampComparator.create(day, descending).compare(res, greater), descending ? 1 : -1);
    Assert.assertEquals(ResultGranularTimestampComparator.create(day, descending).compare(res, less), descending ? -1 : 1);
}
Also used : Granularity(org.apache.druid.java.util.common.granularity.Granularity) Test(org.junit.Test)

Example 20 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class GranularUnprocessedPathSpec method addInputPaths.

@Override
public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException {
    // This PathSpec breaks so many abstractions that we might as break some more
    Preconditions.checkState(config.getGranularitySpec() instanceof UniformGranularitySpec, StringUtils.format("Cannot use %s without %s", GranularUnprocessedPathSpec.class.getSimpleName(), UniformGranularitySpec.class.getSimpleName()));
    final Path betaInput = new Path(getInputPath());
    final FileSystem fs = betaInput.getFileSystem(job.getConfiguration());
    final Granularity segmentGranularity = config.getGranularitySpec().getSegmentGranularity();
    Map<Long, Long> inputModifiedTimes = new TreeMap<>(Ordering.natural().reverse());
    for (FileStatus status : FSSpideringIterator.spiderIterable(fs, betaInput)) {
        final DateTime key = segmentGranularity.toDate(status.getPath().toString());
        final Long currVal = inputModifiedTimes.get(key.getMillis());
        final long mTime = status.getModificationTime();
        inputModifiedTimes.put(key.getMillis(), currVal == null ? mTime : Math.max(currVal, mTime));
    }
    Set<Interval> bucketsToRun = new TreeSet<>(Comparators.intervals());
    for (Map.Entry<Long, Long> entry : inputModifiedTimes.entrySet()) {
        DateTime timeBucket = DateTimes.utc(entry.getKey());
        long mTime = entry.getValue();
        String bucketOutput = StringUtils.format("%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), segmentGranularity.toPath(timeBucket));
        for (FileStatus fileStatus : FSSpideringIterator.spiderIterable(fs, new Path(bucketOutput))) {
            if (fileStatus.getModificationTime() > mTime) {
                bucketsToRun.add(new Interval(timeBucket, segmentGranularity.increment(timeBucket)));
                break;
            }
        }
        if (bucketsToRun.size() >= maxBuckets) {
            break;
        }
    }
    config.setGranularitySpec(new UniformGranularitySpec(segmentGranularity, config.getGranularitySpec().getQueryGranularity(), config.getGranularitySpec().isRollup(), Lists.newArrayList(bucketsToRun)));
    return super.addInputPaths(config, job);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Granularity(org.apache.druid.java.util.common.granularity.Granularity) TreeMap(java.util.TreeMap) DateTime(org.joda.time.DateTime) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) TreeMap(java.util.TreeMap) Map(java.util.Map) Interval(org.joda.time.Interval)

Aggregations

Granularity (org.apache.druid.java.util.common.granularity.Granularity)58 Interval (org.joda.time.Interval)27 ArrayList (java.util.ArrayList)22 DateTime (org.joda.time.DateTime)19 Test (org.junit.Test)16 List (java.util.List)14 Map (java.util.Map)14 HashMap (java.util.HashMap)13 Nullable (javax.annotation.Nullable)12 PeriodGranularity (org.apache.druid.java.util.common.granularity.PeriodGranularity)12 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)12 Period (org.joda.time.Period)11 ISE (org.apache.druid.java.util.common.ISE)8 Result (org.apache.druid.query.Result)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)7 ImmutableList (com.google.common.collect.ImmutableList)7 VisibleForTesting (com.google.common.annotations.VisibleForTesting)6 ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)6 LockGranularity (org.apache.druid.indexing.common.LockGranularity)6 Sequence (org.apache.druid.java.util.common.guava.Sequence)6