Search in sources :

Example 51 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class TestIndex method makeRealtimeIndex.

public static IncrementalIndex makeRealtimeIndex(final CharSource source, boolean rollup, boolean bitmap) {
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis()).withTimestampSpec(new TimestampSpec("ds", "auto", null)).withDimensionsSpec(bitmap ? DIMENSIONS_SPEC : DIMENSIONS_SPEC_NO_BITMAPS).withVirtualColumns(VIRTUAL_COLUMNS).withMetrics(METRIC_AGGS).withRollup(rollup).build();
    final IncrementalIndex retVal = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(10000).build();
    try {
        return loadIncrementalIndex(retVal, source);
    } catch (Exception e) {
        if (rollup) {
            realtimeIndex = null;
        } else {
            noRollupRealtimeIndex = null;
        }
        throw new RuntimeException(e);
    }
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) IOException(java.io.IOException)

Example 52 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class IndexGeneratorJob method makeIncrementalIndex.

private static IncrementalIndex makeIncrementalIndex(Bucket theBucket, AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable<String> oldDimOrder, Map<String, ColumnCapabilities> oldCapabilities) {
    final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withMinTimestamp(theBucket.time.getMillis()).withTimestampSpec(config.getSchema().getDataSchema().getTimestampSpec()).withDimensionsSpec(config.getSchema().getDataSchema().getDimensionsSpec()).withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity()).withMetrics(aggs).withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup()).build();
    // Build the incremental-index according to the spec that was chosen by the user
    IncrementalIndex newIndex = tuningConfig.getAppendableIndexSpec().builder().setIndexSchema(indexSchema).setMaxRowCount(tuningConfig.getMaxRowsInMemory()).setMaxBytesInMemory(tuningConfig.getMaxBytesInMemoryOrDefault()).build();
    if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) {
        newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities);
    }
    return newIndex;
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema)

Example 53 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTest method constructorFeeder.

@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> constructorFeeder() throws IOException {
    final IndexSpec indexSpec = new IndexSpec();
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(JodaUtils.MIN_INSTANT).withDimensionsSpec(ROW_PARSER).withMetrics(new LongSumAggregatorFactory(METRIC_LONG_NAME, DIM_LONG_NAME), new DoubleSumAggregatorFactory(METRIC_FLOAT_NAME, DIM_FLOAT_NAME)).build();
    final IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(MAX_ROWS * MAX_SHARD_NUMBER).build();
    for (Integer i = 0; i < MAX_ROWS; ++i) {
        index.add(ROW_PARSER.parseBatch(buildRow(i.longValue())).get(0));
    }
    FileUtils.mkdirp(PERSIST_DIR);
    INDEX_MERGER_V9.persist(index, PERSIST_DIR, indexSpec, null);
    final CoordinatorClient cc = new CoordinatorClient(null, null) {

        @Override
        public Collection<DataSegment> fetchUsedSegmentsInDataSourceForIntervals(String dataSource, List<Interval> intervals) {
            return ImmutableSet.copyOf(SEGMENT_SET);
        }
    };
    SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class);
    EasyMock.replay(notifierFactory);
    final SegmentCacheManagerFactory slf = new SegmentCacheManagerFactory(MAPPER);
    final RetryPolicyFactory retryPolicyFactory = new RetryPolicyFactory(new RetryPolicyConfig());
    Collection<Object[]> values = new ArrayList<>();
    for (InputRowParser parser : Arrays.<InputRowParser>asList(ROW_PARSER, new MapInputRowParser(new JSONParseSpec(new TimestampSpec(TIME_COLUMN, "auto", null), DimensionsSpec.builder().setDimensionExclusions(ImmutableList.of(DIM_FLOAT_NAME, DIM_LONG_NAME)).build(), null, null, null)))) {
        for (List<String> dim_names : Arrays.<List<String>>asList(null, ImmutableList.of(DIM_NAME))) {
            for (List<String> metric_names : Arrays.<List<String>>asList(null, ImmutableList.of(METRIC_LONG_NAME, METRIC_FLOAT_NAME))) {
                for (Boolean wrapInCombining : Arrays.asList(false, true)) {
                    final IngestSegmentFirehoseFactory isfFactory = new IngestSegmentFirehoseFactory(TASK.getDataSource(), Intervals.ETERNITY, null, new SelectorDimFilter(DIM_NAME, DIM_VALUE, null), dim_names, metric_names, null, INDEX_IO, cc, slf, retryPolicyFactory);
                    final FirehoseFactory factory = wrapInCombining ? new CombiningFirehoseFactory(ImmutableList.of(isfFactory)) : isfFactory;
                    values.add(new Object[] { StringUtils.format("DimNames[%s]MetricNames[%s]ParserDimNames[%s]WrapInCombining[%s]", dim_names == null ? "null" : "dims", metric_names == null ? "null" : "metrics", parser == ROW_PARSER ? "dims" : "null", wrapInCombining), factory, parser });
                }
            }
        }
    }
    return values;
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) CombiningFirehoseFactory(org.apache.druid.segment.realtime.firehose.CombiningFirehoseFactory) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) CombiningFirehoseFactory(org.apache.druid.segment.realtime.firehose.CombiningFirehoseFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) RetryPolicyConfig(org.apache.druid.indexing.common.RetryPolicyConfig) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser)

Example 54 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class GroupByStrategyV1 method processSubqueryResult.

@Override
public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
    final Set<AggregatorFactory> aggs = new HashSet<>();
    // Nested group-bys work by first running the inner query and then materializing the results in an incremental
    // index which the outer query is then run against. To build the incremental index, we use the fieldNames from
    // the aggregators for the outer query to define the column names so that the index will match the query. If
    // there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
    // multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
    // aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
    // subsequent ones) and return an error if the aggregator types are different.
    final Set<String> dimensionNames = new HashSet<>();
    for (DimensionSpec dimension : subquery.getDimensions()) {
        dimensionNames.add(dimension.getOutputName());
    }
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
            if (dimensionNames.contains(transferAgg.getName())) {
                // doesn't have this problem.
                continue;
            }
            if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {

                @Override
                public boolean apply(AggregatorFactory agg) {
                    return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
                }
            })) {
                throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
            }
            aggs.add(transferAgg);
        }
    }
    // We need the inner incremental index to have all the columns required by the outer query
    final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(ImmutableList.copyOf(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(new ArrayList<>()).build();
    final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
    final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), subquery, configSupplier.get(), subqueryResult);
    // Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
    // is ensured by QuerySegmentSpec.
    // GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
    // and concatenate the results.
    final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, null, configSupplier.get(), Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<ResultRow>>() {

        @Override
        public Sequence<ResultRow> apply(Interval interval) {
            return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
        }
    })));
    innerQueryResultIndex.close();
    return Sequences.withBaggage(outerQuery.postProcess(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) ArrayList(java.util.ArrayList) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Sequence(org.apache.druid.java.util.common.guava.Sequence) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) IAE(org.apache.druid.java.util.common.IAE) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Example 55 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class MapVirtualColumnGroupByTest method setup.

@Before
public void setup() throws IOException {
    final IncrementalIndex incrementalIndex = MapVirtualColumnTestBase.generateIndex();
    final GroupByStrategySelector strategySelector = new GroupByStrategySelector(GroupByQueryConfig::new, null, new GroupByStrategyV2(new DruidProcessingConfig() {

        @Override
        public String getFormatString() {
            return null;
        }

        @Override
        public int intermediateComputeSizeBytes() {
            return 10 * 1024 * 1024;
        }

        @Override
        public int getNumMergeBuffers() {
            return 1;
        }

        @Override
        public int getNumThreads() {
            return 1;
        }
    }, GroupByQueryConfig::new, new StupidPool<>("map-virtual-column-groupby-test", () -> ByteBuffer.allocate(1024)), new DefaultBlockingPool<>(() -> ByteBuffer.allocate(1024), 1), new DefaultObjectMapper(), QueryRunnerTestHelper.NOOP_QUERYWATCHER));
    final GroupByQueryRunnerFactory factory = new GroupByQueryRunnerFactory(strategySelector, new GroupByQueryQueryToolChest(strategySelector));
    runner = QueryRunnerTestHelper.makeQueryRunner(factory, SegmentId.dummy("index"), new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("index")), "incremental");
}
Also used : GroupByStrategySelector(org.apache.druid.query.groupby.strategy.GroupByStrategySelector) GroupByQueryRunnerFactory(org.apache.druid.query.groupby.GroupByQueryRunnerFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) GroupByStrategyV2(org.apache.druid.query.groupby.strategy.GroupByStrategyV2) DefaultBlockingPool(org.apache.druid.collections.DefaultBlockingPool) StupidPool(org.apache.druid.collections.StupidPool) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) GroupByQueryQueryToolChest(org.apache.druid.query.groupby.GroupByQueryQueryToolChest) Before(org.junit.Before)

Aggregations

IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)109 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)85 File (java.io.File)59 Test (org.junit.Test)51 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)46 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)46 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)26 IncrementalIndexTest (org.apache.druid.segment.data.IncrementalIndexTest)26 ArrayList (java.util.ArrayList)25 IncrementalIndexSchema (org.apache.druid.segment.incremental.IncrementalIndexSchema)25 IndexSpec (org.apache.druid.segment.IndexSpec)19 QueryableIndex (org.apache.druid.segment.QueryableIndex)19 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)17 InputRow (org.apache.druid.data.input.InputRow)15 IncrementalIndexSegment (org.apache.druid.segment.IncrementalIndexSegment)14 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)11 IOException (java.io.IOException)10 Before (org.junit.Before)10 Interval (org.joda.time.Interval)9