Search in sources :

Example 16 with SegmentAnalysis

use of org.apache.druid.query.metadata.metadata.SegmentAnalysis in project druid by druid-io.

the class SegmentMetadataQueryQueryToolChest method mergeAnalyses.

@VisibleForTesting
public static SegmentAnalysis mergeAnalyses(final SegmentAnalysis arg1, final SegmentAnalysis arg2, boolean lenientAggregatorMerge) {
    if (arg1 == null) {
        return arg2;
    }
    if (arg2 == null) {
        return arg1;
    }
    List<Interval> newIntervals = null;
    if (arg1.getIntervals() != null) {
        newIntervals = new ArrayList<>(arg1.getIntervals());
    }
    if (arg2.getIntervals() != null) {
        if (newIntervals == null) {
            newIntervals = new ArrayList<>();
        }
        newIntervals.addAll(arg2.getIntervals());
    }
    final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
    final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
    Map<String, ColumnAnalysis> columns = new TreeMap<>();
    Set<String> rightColumnNames = Sets.newHashSet(rightColumns.keySet());
    for (Map.Entry<String, ColumnAnalysis> entry : leftColumns.entrySet()) {
        final String columnName = entry.getKey();
        columns.put(columnName, entry.getValue().fold(rightColumns.get(columnName)));
        rightColumnNames.remove(columnName);
    }
    for (String columnName : rightColumnNames) {
        columns.put(columnName, rightColumns.get(columnName));
    }
    final Map<String, AggregatorFactory> aggregators = new HashMap<>();
    if (lenientAggregatorMerge) {
        // Merge each aggregator individually, ignoring nulls
        for (SegmentAnalysis analysis : ImmutableList.of(arg1, arg2)) {
            if (analysis.getAggregators() != null) {
                for (Map.Entry<String, AggregatorFactory> entry : analysis.getAggregators().entrySet()) {
                    final String aggregatorName = entry.getKey();
                    final AggregatorFactory aggregator = entry.getValue();
                    AggregatorFactory merged = aggregators.get(aggregatorName);
                    if (merged != null) {
                        try {
                            merged = merged.getMergingFactory(aggregator);
                        } catch (AggregatorFactoryNotMergeableException e) {
                            merged = null;
                        }
                    } else {
                        merged = aggregator;
                    }
                    aggregators.put(aggregatorName, merged);
                }
            }
        }
    } else {
        final AggregatorFactory[] aggs1 = arg1.getAggregators() != null ? arg1.getAggregators().values().toArray(new AggregatorFactory[0]) : null;
        final AggregatorFactory[] aggs2 = arg2.getAggregators() != null ? arg2.getAggregators().values().toArray(new AggregatorFactory[0]) : null;
        final AggregatorFactory[] merged = AggregatorFactory.mergeAggregators(Arrays.asList(aggs1, aggs2));
        if (merged != null) {
            for (AggregatorFactory aggregator : merged) {
                aggregators.put(aggregator.getName(), aggregator);
            }
        }
    }
    final TimestampSpec timestampSpec = TimestampSpec.mergeTimestampSpec(Lists.newArrayList(arg1.getTimestampSpec(), arg2.getTimestampSpec()));
    final Granularity queryGranularity = Granularity.mergeGranularities(Lists.newArrayList(arg1.getQueryGranularity(), arg2.getQueryGranularity()));
    final String mergedId;
    if (arg1.getId() != null && arg2.getId() != null && arg1.getId().equals(arg2.getId())) {
        mergedId = arg1.getId();
    } else {
        mergedId = "merged";
    }
    final Boolean rollup;
    if (arg1.isRollup() != null && arg2.isRollup() != null && arg1.isRollup().equals(arg2.isRollup())) {
        rollup = arg1.isRollup();
    } else {
        rollup = null;
    }
    return new SegmentAnalysis(mergedId, newIntervals, columns, arg1.getSize() + arg2.getSize(), arg1.getNumRows() + arg2.getNumRows(), aggregators.isEmpty() ? null : aggregators, timestampSpec, queryGranularity, rollup);
}
Also used : HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Granularity(org.apache.druid.java.util.common.granularity.Granularity) AggregatorFactoryNotMergeableException(org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 17 with SegmentAnalysis

use of org.apache.druid.query.metadata.metadata.SegmentAnalysis in project druid by druid-io.

the class SegmentMetadataQueryRunnerFactory method createRunner.

@Override
public QueryRunner<SegmentAnalysis> createRunner(final Segment segment) {
    return new QueryRunner<SegmentAnalysis>() {

        @Override
        public Sequence<SegmentAnalysis> run(QueryPlus<SegmentAnalysis> inQ, ResponseContext responseContext) {
            SegmentMetadataQuery updatedQuery = ((SegmentMetadataQuery) inQ.getQuery()).withFinalizedAnalysisTypes(toolChest.getConfig());
            final SegmentAnalyzer analyzer = new SegmentAnalyzer(updatedQuery.getAnalysisTypes());
            final Map<String, ColumnAnalysis> analyzedColumns = analyzer.analyze(segment);
            final long numRows = analyzer.numRows(segment);
            long totalSize = 0;
            if (analyzer.analyzingSize()) {
                // Initialize with the size of the whitespace, 1 byte per
                totalSize = analyzedColumns.size() * numRows;
            }
            Map<String, ColumnAnalysis> columns = new TreeMap<>();
            ColumnIncluderator includerator = updatedQuery.getToInclude();
            for (Map.Entry<String, ColumnAnalysis> entry : analyzedColumns.entrySet()) {
                final String columnName = entry.getKey();
                final ColumnAnalysis column = entry.getValue();
                if (!column.isError()) {
                    totalSize += column.getSize();
                }
                if (includerator.include(columnName)) {
                    columns.put(columnName, column);
                }
            }
            List<Interval> retIntervals = updatedQuery.analyzingInterval() ? Collections.singletonList(segment.getDataInterval()) : null;
            final Map<String, AggregatorFactory> aggregators;
            Metadata metadata = null;
            if (updatedQuery.hasAggregators()) {
                metadata = segment.asStorageAdapter().getMetadata();
                if (metadata != null && metadata.getAggregators() != null) {
                    aggregators = new HashMap<>();
                    for (AggregatorFactory aggregator : metadata.getAggregators()) {
                        aggregators.put(aggregator.getName(), aggregator);
                    }
                } else {
                    aggregators = null;
                }
            } else {
                aggregators = null;
            }
            final TimestampSpec timestampSpec;
            if (updatedQuery.hasTimestampSpec()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                timestampSpec = metadata != null ? metadata.getTimestampSpec() : null;
            } else {
                timestampSpec = null;
            }
            final Granularity queryGranularity;
            if (updatedQuery.hasQueryGranularity()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                queryGranularity = metadata != null ? metadata.getQueryGranularity() : null;
            } else {
                queryGranularity = null;
            }
            Boolean rollup = null;
            if (updatedQuery.hasRollup()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                rollup = metadata != null ? metadata.isRollup() : null;
                if (rollup == null) {
                    // in this case, this segment is built before no-rollup function is coded,
                    // thus it is built with rollup
                    rollup = Boolean.TRUE;
                }
            }
            return Sequences.simple(Collections.singletonList(new SegmentAnalysis(segment.getId().toString(), retIntervals, columns, totalSize, numRows, aggregators, timestampSpec, queryGranularity, rollup)));
        }
    };
}
Also used : Metadata(org.apache.druid.segment.Metadata) TreeMap(java.util.TreeMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ColumnIncluderator(org.apache.druid.query.metadata.metadata.ColumnIncluderator) ConcatQueryRunner(org.apache.druid.query.ConcatQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) SegmentMetadataQuery(org.apache.druid.query.metadata.metadata.SegmentMetadataQuery) ResponseContext(org.apache.druid.query.context.ResponseContext) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) QueryPlus(org.apache.druid.query.QueryPlus) Interval(org.joda.time.Interval)

Example 18 with SegmentAnalysis

use of org.apache.druid.query.metadata.metadata.SegmentAnalysis in project druid by druid-io.

the class SegmentMetadataQueryRunnerFactory method mergeRunners.

@Override
public QueryRunner<SegmentAnalysis> mergeRunners(QueryProcessingPool queryProcessingPool, Iterable<QueryRunner<SegmentAnalysis>> queryRunners) {
    return new ConcatQueryRunner<SegmentAnalysis>(Sequences.map(Sequences.simple(queryRunners), new Function<QueryRunner<SegmentAnalysis>, QueryRunner<SegmentAnalysis>>() {

        @Override
        public QueryRunner<SegmentAnalysis> apply(final QueryRunner<SegmentAnalysis> input) {
            return new QueryRunner<SegmentAnalysis>() {

                @Override
                public Sequence<SegmentAnalysis> run(final QueryPlus<SegmentAnalysis> queryPlus, final ResponseContext responseContext) {
                    final Query<SegmentAnalysis> query = queryPlus.getQuery();
                    final int priority = QueryContexts.getPriority(query);
                    final QueryPlus<SegmentAnalysis> threadSafeQueryPlus = queryPlus.withoutThreadUnsafeState();
                    ListenableFuture<Sequence<SegmentAnalysis>> future = queryProcessingPool.submitRunnerTask(new AbstractPrioritizedQueryRunnerCallable<Sequence<SegmentAnalysis>, SegmentAnalysis>(priority, input) {

                        @Override
                        public Sequence<SegmentAnalysis> call() {
                            return Sequences.simple(input.run(threadSafeQueryPlus, responseContext).toList());
                        }
                    });
                    try {
                        queryWatcher.registerQueryFuture(query, future);
                        if (QueryContexts.hasTimeout(query)) {
                            return future.get(QueryContexts.getTimeout(query), TimeUnit.MILLISECONDS);
                        } else {
                            return future.get();
                        }
                    } catch (InterruptedException e) {
                        log.warn(e, "Query interrupted, cancelling pending results, query id [%s]", query.getId());
                        future.cancel(true);
                        throw new QueryInterruptedException(e);
                    } catch (CancellationException e) {
                        throw new QueryInterruptedException(e);
                    } catch (TimeoutException e) {
                        log.info("Query timeout, cancelling pending results for query id [%s]", query.getId());
                        future.cancel(true);
                        throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query [%s] timed out", query.getId()));
                    } catch (ExecutionException e) {
                        throw new RuntimeException(e);
                    }
                }
            };
        }
    }));
}
Also used : Sequence(org.apache.druid.java.util.common.guava.Sequence) QueryInterruptedException(org.apache.druid.query.QueryInterruptedException) ConcatQueryRunner(org.apache.druid.query.ConcatQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) Function(com.google.common.base.Function) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) CancellationException(java.util.concurrent.CancellationException) ResponseContext(org.apache.druid.query.context.ResponseContext) ConcatQueryRunner(org.apache.druid.query.ConcatQueryRunner) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) ExecutionException(java.util.concurrent.ExecutionException) QueryPlus(org.apache.druid.query.QueryPlus) QueryInterruptedException(org.apache.druid.query.QueryInterruptedException) TimeoutException(java.util.concurrent.TimeoutException) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException)

Example 19 with SegmentAnalysis

use of org.apache.druid.query.metadata.metadata.SegmentAnalysis in project druid by druid-io.

the class SegmentMetadataQueryTest method testSegmentMetadataQueryWithHasMultipleValuesMerge.

@Test
public void testSegmentMetadataQueryWithHasMultipleValuesMerge() {
    SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(differentIds ? "merged" : SegmentId.dummy("testSegment").toString(), null, ImmutableMap.of("placement", new ColumnAnalysis(ColumnType.STRING, ValueType.STRING.toString(), false, false, 0, 1, null, null, null), "placementish", new ColumnAnalysis(ColumnType.STRING, ValueType.STRING.toString(), true, false, 0, 9, null, null, null)), 0, expectedSegmentAnalysis1.getNumRows() + expectedSegmentAnalysis2.getNumRows(), null, null, null, null);
    QueryToolChest toolChest = FACTORY.getToolchest();
    ExecutorService exec = Executors.newCachedThreadPool();
    QueryRunner myRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(FACTORY.mergeRunners(Execs.directExecutor(), Lists.newArrayList(toolChest.preMergeQueryDecoration(runner1), toolChest.preMergeQueryDecoration(runner2)))), toolChest);
    SegmentMetadataQuery query = Druids.newSegmentMetadataQueryBuilder().dataSource("testing").intervals("2013/2014").toInclude(new ListColumnIncluderator(Arrays.asList("placement", "placementish"))).analysisTypes(SegmentMetadataQuery.AnalysisType.CARDINALITY).merge(true).build();
    TestHelper.assertExpectedObjects(ImmutableList.of(mergedSegmentAnalysis), myRunner.run(QueryPlus.wrap(query)), "failed SegmentMetadata merging query");
    exec.shutdownNow();
}
Also used : FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) SegmentMetadataQuery(org.apache.druid.query.metadata.metadata.SegmentMetadataQuery) ListColumnIncluderator(org.apache.druid.query.metadata.metadata.ListColumnIncluderator) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) ExecutorService(java.util.concurrent.ExecutorService) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) QueryToolChest(org.apache.druid.query.QueryToolChest) QueryRunner(org.apache.druid.query.QueryRunner) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) Test(org.junit.Test)

Example 20 with SegmentAnalysis

use of org.apache.druid.query.metadata.metadata.SegmentAnalysis in project druid by druid-io.

the class SegmentMetadataQueryQueryToolChestTest method testCacheStrategy.

@Test
public void testCacheStrategy() throws Exception {
    SegmentMetadataQuery query = new SegmentMetadataQuery(new TableDataSource("dummy"), new LegacySegmentSpec("2015-01-01/2015-01-02"), null, null, null, null, false, false);
    CacheStrategy<SegmentAnalysis, SegmentAnalysis, SegmentMetadataQuery> strategy = new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()).getCacheStrategy(query);
    // Test cache key generation
    byte[] expectedKey = { 0x04, 0x09, 0x01, 0x0A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x02, 0x04 };
    byte[] actualKey = strategy.computeCacheKey(query);
    Assert.assertArrayEquals(expectedKey, actualKey);
    SegmentAnalysis result = new SegmentAnalysis("testSegment", ImmutableList.of(Intervals.of("2011-01-12T00:00:00.000Z/2011-04-15T00:00:00.001Z")), ImmutableMap.of("placement", new ColumnAnalysis(ColumnType.STRING, ValueType.STRING.name(), true, false, 10881, 1, "preferred", "preferred", null)), 71982, 100, null, null, null, null);
    Object preparedValue = strategy.prepareForSegmentLevelCache().apply(result);
    ObjectMapper objectMapper = new DefaultObjectMapper();
    SegmentAnalysis fromCacheValue = objectMapper.readValue(objectMapper.writeValueAsBytes(preparedValue), strategy.getCacheObjectClazz());
    SegmentAnalysis fromCacheResult = strategy.pullFromSegmentLevelCache().apply(fromCacheValue);
    Assert.assertEquals(result, fromCacheResult);
}
Also used : TableDataSource(org.apache.druid.query.TableDataSource) SegmentMetadataQuery(org.apache.druid.query.metadata.metadata.SegmentMetadataQuery) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) LegacySegmentSpec(org.apache.druid.query.spec.LegacySegmentSpec) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Test(org.junit.Test)

Aggregations

SegmentAnalysis (org.apache.druid.query.metadata.metadata.SegmentAnalysis)30 ColumnAnalysis (org.apache.druid.query.metadata.metadata.ColumnAnalysis)20 Test (org.junit.Test)18 SegmentMetadataQuery (org.apache.druid.query.metadata.metadata.SegmentMetadataQuery)16 QueryRunner (org.apache.druid.query.QueryRunner)11 ListColumnIncluderator (org.apache.druid.query.metadata.metadata.ListColumnIncluderator)11 ExecutorService (java.util.concurrent.ExecutorService)9 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)8 QueryToolChest (org.apache.druid.query.QueryToolChest)8 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)5 IOException (java.io.IOException)4 Map (java.util.Map)4 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)4 HashMap (java.util.HashMap)3 TableDataSource (org.apache.druid.query.TableDataSource)3 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)3 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 List (java.util.List)2