Search in sources :

Example 96 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class InputRowSchemasTest method test_createColumnsFilter_normal.

@Test
public void test_createColumnsFilter_normal() {
    final ColumnsFilter columnsFilter = InputRowSchemas.createColumnsFilter(new TimestampSpec("ts", "auto", null), new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("foo"))), new TransformSpec(new SelectorDimFilter("bar", "x", null), ImmutableList.of(new ExpressionTransform("baz", "qux + 3", ExprMacroTable.nil()))), new AggregatorFactory[] { new LongSumAggregatorFactory("billy", "bob") });
    Assert.assertEquals(ColumnsFilter.inclusionBased(ImmutableSet.of("ts", "foo", "bar", "qux", "bob")), columnsFilter);
}
Also used : SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) ColumnsFilter(org.apache.druid.data.input.ColumnsFilter) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Test(org.junit.Test) NullHandlingTest(org.apache.druid.common.config.NullHandlingTest)

Example 97 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class InputRowSchemasTest method test_createColumnsFilter_schemaless.

@Test
public void test_createColumnsFilter_schemaless() {
    final ColumnsFilter columnsFilter = InputRowSchemas.createColumnsFilter(new TimestampSpec("ts", "auto", null), DimensionsSpec.builder().setDimensionExclusions(ImmutableList.of("ts", "foo", "bar", "qux", "bob")).build(), new TransformSpec(new SelectorDimFilter("bar", "x", null), ImmutableList.of(new ExpressionTransform("baz", "qux + 3", ExprMacroTable.nil()))), new AggregatorFactory[] { new LongSumAggregatorFactory("billy", "bob") });
    Assert.assertEquals(ColumnsFilter.exclusionBased(ImmutableSet.of("foo")), columnsFilter);
}
Also used : SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) ColumnsFilter(org.apache.druid.data.input.ColumnsFilter) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Test(org.junit.Test) NullHandlingTest(org.apache.druid.common.config.NullHandlingTest)

Example 98 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class SegmentMetadataQueryQueryToolChest method mergeAnalyses.

@VisibleForTesting
public static SegmentAnalysis mergeAnalyses(final SegmentAnalysis arg1, final SegmentAnalysis arg2, boolean lenientAggregatorMerge) {
    if (arg1 == null) {
        return arg2;
    }
    if (arg2 == null) {
        return arg1;
    }
    List<Interval> newIntervals = null;
    if (arg1.getIntervals() != null) {
        newIntervals = new ArrayList<>(arg1.getIntervals());
    }
    if (arg2.getIntervals() != null) {
        if (newIntervals == null) {
            newIntervals = new ArrayList<>();
        }
        newIntervals.addAll(arg2.getIntervals());
    }
    final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
    final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
    Map<String, ColumnAnalysis> columns = new TreeMap<>();
    Set<String> rightColumnNames = Sets.newHashSet(rightColumns.keySet());
    for (Map.Entry<String, ColumnAnalysis> entry : leftColumns.entrySet()) {
        final String columnName = entry.getKey();
        columns.put(columnName, entry.getValue().fold(rightColumns.get(columnName)));
        rightColumnNames.remove(columnName);
    }
    for (String columnName : rightColumnNames) {
        columns.put(columnName, rightColumns.get(columnName));
    }
    final Map<String, AggregatorFactory> aggregators = new HashMap<>();
    if (lenientAggregatorMerge) {
        // Merge each aggregator individually, ignoring nulls
        for (SegmentAnalysis analysis : ImmutableList.of(arg1, arg2)) {
            if (analysis.getAggregators() != null) {
                for (Map.Entry<String, AggregatorFactory> entry : analysis.getAggregators().entrySet()) {
                    final String aggregatorName = entry.getKey();
                    final AggregatorFactory aggregator = entry.getValue();
                    AggregatorFactory merged = aggregators.get(aggregatorName);
                    if (merged != null) {
                        try {
                            merged = merged.getMergingFactory(aggregator);
                        } catch (AggregatorFactoryNotMergeableException e) {
                            merged = null;
                        }
                    } else {
                        merged = aggregator;
                    }
                    aggregators.put(aggregatorName, merged);
                }
            }
        }
    } else {
        final AggregatorFactory[] aggs1 = arg1.getAggregators() != null ? arg1.getAggregators().values().toArray(new AggregatorFactory[0]) : null;
        final AggregatorFactory[] aggs2 = arg2.getAggregators() != null ? arg2.getAggregators().values().toArray(new AggregatorFactory[0]) : null;
        final AggregatorFactory[] merged = AggregatorFactory.mergeAggregators(Arrays.asList(aggs1, aggs2));
        if (merged != null) {
            for (AggregatorFactory aggregator : merged) {
                aggregators.put(aggregator.getName(), aggregator);
            }
        }
    }
    final TimestampSpec timestampSpec = TimestampSpec.mergeTimestampSpec(Lists.newArrayList(arg1.getTimestampSpec(), arg2.getTimestampSpec()));
    final Granularity queryGranularity = Granularity.mergeGranularities(Lists.newArrayList(arg1.getQueryGranularity(), arg2.getQueryGranularity()));
    final String mergedId;
    if (arg1.getId() != null && arg2.getId() != null && arg1.getId().equals(arg2.getId())) {
        mergedId = arg1.getId();
    } else {
        mergedId = "merged";
    }
    final Boolean rollup;
    if (arg1.isRollup() != null && arg2.isRollup() != null && arg1.isRollup().equals(arg2.isRollup())) {
        rollup = arg1.isRollup();
    } else {
        rollup = null;
    }
    return new SegmentAnalysis(mergedId, newIntervals, columns, arg1.getSize() + arg2.getSize(), arg1.getNumRows() + arg2.getNumRows(), aggregators.isEmpty() ? null : aggregators, timestampSpec, queryGranularity, rollup);
}
Also used : HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Granularity(org.apache.druid.java.util.common.granularity.Granularity) AggregatorFactoryNotMergeableException(org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 99 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class SegmentMetadataQueryRunnerFactory method createRunner.

@Override
public QueryRunner<SegmentAnalysis> createRunner(final Segment segment) {
    return new QueryRunner<SegmentAnalysis>() {

        @Override
        public Sequence<SegmentAnalysis> run(QueryPlus<SegmentAnalysis> inQ, ResponseContext responseContext) {
            SegmentMetadataQuery updatedQuery = ((SegmentMetadataQuery) inQ.getQuery()).withFinalizedAnalysisTypes(toolChest.getConfig());
            final SegmentAnalyzer analyzer = new SegmentAnalyzer(updatedQuery.getAnalysisTypes());
            final Map<String, ColumnAnalysis> analyzedColumns = analyzer.analyze(segment);
            final long numRows = analyzer.numRows(segment);
            long totalSize = 0;
            if (analyzer.analyzingSize()) {
                // Initialize with the size of the whitespace, 1 byte per
                totalSize = analyzedColumns.size() * numRows;
            }
            Map<String, ColumnAnalysis> columns = new TreeMap<>();
            ColumnIncluderator includerator = updatedQuery.getToInclude();
            for (Map.Entry<String, ColumnAnalysis> entry : analyzedColumns.entrySet()) {
                final String columnName = entry.getKey();
                final ColumnAnalysis column = entry.getValue();
                if (!column.isError()) {
                    totalSize += column.getSize();
                }
                if (includerator.include(columnName)) {
                    columns.put(columnName, column);
                }
            }
            List<Interval> retIntervals = updatedQuery.analyzingInterval() ? Collections.singletonList(segment.getDataInterval()) : null;
            final Map<String, AggregatorFactory> aggregators;
            Metadata metadata = null;
            if (updatedQuery.hasAggregators()) {
                metadata = segment.asStorageAdapter().getMetadata();
                if (metadata != null && metadata.getAggregators() != null) {
                    aggregators = new HashMap<>();
                    for (AggregatorFactory aggregator : metadata.getAggregators()) {
                        aggregators.put(aggregator.getName(), aggregator);
                    }
                } else {
                    aggregators = null;
                }
            } else {
                aggregators = null;
            }
            final TimestampSpec timestampSpec;
            if (updatedQuery.hasTimestampSpec()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                timestampSpec = metadata != null ? metadata.getTimestampSpec() : null;
            } else {
                timestampSpec = null;
            }
            final Granularity queryGranularity;
            if (updatedQuery.hasQueryGranularity()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                queryGranularity = metadata != null ? metadata.getQueryGranularity() : null;
            } else {
                queryGranularity = null;
            }
            Boolean rollup = null;
            if (updatedQuery.hasRollup()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                rollup = metadata != null ? metadata.isRollup() : null;
                if (rollup == null) {
                    // in this case, this segment is built before no-rollup function is coded,
                    // thus it is built with rollup
                    rollup = Boolean.TRUE;
                }
            }
            return Sequences.simple(Collections.singletonList(new SegmentAnalysis(segment.getId().toString(), retIntervals, columns, totalSize, numRows, aggregators, timestampSpec, queryGranularity, rollup)));
        }
    };
}
Also used : Metadata(org.apache.druid.segment.Metadata) TreeMap(java.util.TreeMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ColumnIncluderator(org.apache.druid.query.metadata.metadata.ColumnIncluderator) ConcatQueryRunner(org.apache.druid.query.ConcatQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) SegmentMetadataQuery(org.apache.druid.query.metadata.metadata.SegmentMetadataQuery) ResponseContext(org.apache.druid.query.context.ResponseContext) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) QueryPlus(org.apache.druid.query.QueryPlus) Interval(org.joda.time.Interval)

Example 100 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class GroupByQueryRunnerFactoryTest method createSegment.

private Segment createSegment() throws Exception {
    IncrementalIndex incrementalIndex = new OnheapIncrementalIndex.Builder().setSimpleTestingIndexSchema(new CountAggregatorFactory("count")).setConcurrentEventAdd(true).setMaxRowCount(5000).build();
    StringInputRowParser parser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags"))), "\t", ImmutableList.of("timestamp", "product", "tags"), false, 0), "UTF-8");
    String[] rows = new String[] { "2011-01-12T00:00:00.000Z,product_1,t1", "2011-01-13T00:00:00.000Z,product_2,t2", "2011-01-14T00:00:00.000Z,product_3,t2" };
    for (String row : rows) {
        incrementalIndex.add(parser.parse(row));
    }
    closerRule.closeLater(incrementalIndex);
    return new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("test"));
}
Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec)

Aggregations

TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)154 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)113 Test (org.junit.Test)110 DataSchema (org.apache.druid.segment.indexing.DataSchema)49 InputRow (org.apache.druid.data.input.InputRow)47 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)41 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)39 InputRowSchema (org.apache.druid.data.input.InputRowSchema)37 InputEntityReader (org.apache.druid.data.input.InputEntityReader)33 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)32 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)30 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)29 ArrayList (java.util.ArrayList)28 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)28 StringInputRowParser (org.apache.druid.data.input.impl.StringInputRowParser)27 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)27 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)25 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)21 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)21 ArbitraryGranularitySpec (org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec)20