use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class TestIndex method makeRealtimeIndex.
public static IncrementalIndex makeRealtimeIndex(final CharSource source, boolean rollup, boolean bitmap) {
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis()).withTimestampSpec(new TimestampSpec("ds", "auto", null)).withDimensionsSpec(bitmap ? DIMENSIONS_SPEC : DIMENSIONS_SPEC_NO_BITMAPS).withVirtualColumns(VIRTUAL_COLUMNS).withMetrics(METRIC_AGGS).withRollup(rollup).build();
final IncrementalIndex retVal = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(10000).build();
try {
return loadIncrementalIndex(retVal, source);
} catch (Exception e) {
if (rollup) {
realtimeIndex = null;
} else {
noRollupRealtimeIndex = null;
}
throw new RuntimeException(e);
}
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class IndexGeneratorJob method makeIncrementalIndex.
private static IncrementalIndex makeIncrementalIndex(Bucket theBucket, AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable<String> oldDimOrder, Map<String, ColumnCapabilities> oldCapabilities) {
final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withMinTimestamp(theBucket.time.getMillis()).withTimestampSpec(config.getSchema().getDataSchema().getTimestampSpec()).withDimensionsSpec(config.getSchema().getDataSchema().getDimensionsSpec()).withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity()).withMetrics(aggs).withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup()).build();
// Build the incremental-index according to the spec that was chosen by the user
IncrementalIndex newIndex = tuningConfig.getAppendableIndexSpec().builder().setIndexSchema(indexSchema).setMaxRowCount(tuningConfig.getMaxRowsInMemory()).setMaxBytesInMemory(tuningConfig.getMaxBytesInMemoryOrDefault()).build();
if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) {
newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities);
}
return newIndex;
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTest method constructorFeeder.
@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> constructorFeeder() throws IOException {
final IndexSpec indexSpec = new IndexSpec();
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(JodaUtils.MIN_INSTANT).withDimensionsSpec(ROW_PARSER).withMetrics(new LongSumAggregatorFactory(METRIC_LONG_NAME, DIM_LONG_NAME), new DoubleSumAggregatorFactory(METRIC_FLOAT_NAME, DIM_FLOAT_NAME)).build();
final IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(MAX_ROWS * MAX_SHARD_NUMBER).build();
for (Integer i = 0; i < MAX_ROWS; ++i) {
index.add(ROW_PARSER.parseBatch(buildRow(i.longValue())).get(0));
}
FileUtils.mkdirp(PERSIST_DIR);
INDEX_MERGER_V9.persist(index, PERSIST_DIR, indexSpec, null);
final CoordinatorClient cc = new CoordinatorClient(null, null) {
@Override
public Collection<DataSegment> fetchUsedSegmentsInDataSourceForIntervals(String dataSource, List<Interval> intervals) {
return ImmutableSet.copyOf(SEGMENT_SET);
}
};
SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class);
EasyMock.replay(notifierFactory);
final SegmentCacheManagerFactory slf = new SegmentCacheManagerFactory(MAPPER);
final RetryPolicyFactory retryPolicyFactory = new RetryPolicyFactory(new RetryPolicyConfig());
Collection<Object[]> values = new ArrayList<>();
for (InputRowParser parser : Arrays.<InputRowParser>asList(ROW_PARSER, new MapInputRowParser(new JSONParseSpec(new TimestampSpec(TIME_COLUMN, "auto", null), DimensionsSpec.builder().setDimensionExclusions(ImmutableList.of(DIM_FLOAT_NAME, DIM_LONG_NAME)).build(), null, null, null)))) {
for (List<String> dim_names : Arrays.<List<String>>asList(null, ImmutableList.of(DIM_NAME))) {
for (List<String> metric_names : Arrays.<List<String>>asList(null, ImmutableList.of(METRIC_LONG_NAME, METRIC_FLOAT_NAME))) {
for (Boolean wrapInCombining : Arrays.asList(false, true)) {
final IngestSegmentFirehoseFactory isfFactory = new IngestSegmentFirehoseFactory(TASK.getDataSource(), Intervals.ETERNITY, null, new SelectorDimFilter(DIM_NAME, DIM_VALUE, null), dim_names, metric_names, null, INDEX_IO, cc, slf, retryPolicyFactory);
final FirehoseFactory factory = wrapInCombining ? new CombiningFirehoseFactory(ImmutableList.of(isfFactory)) : isfFactory;
values.add(new Object[] { StringUtils.format("DimNames[%s]MetricNames[%s]ParserDimNames[%s]WrapInCombining[%s]", dim_names == null ? "null" : "dims", metric_names == null ? "null" : "metrics", parser == ROW_PARSER ? "dims" : "null", wrapInCombining), factory, parser });
}
}
}
}
return values;
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class GroupByStrategyV1 method processSubqueryResult.
@Override
public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
final Set<AggregatorFactory> aggs = new HashSet<>();
// Nested group-bys work by first running the inner query and then materializing the results in an incremental
// index which the outer query is then run against. To build the incremental index, we use the fieldNames from
// the aggregators for the outer query to define the column names so that the index will match the query. If
// there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
// multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
// aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
// subsequent ones) and return an error if the aggregator types are different.
final Set<String> dimensionNames = new HashSet<>();
for (DimensionSpec dimension : subquery.getDimensions()) {
dimensionNames.add(dimension.getOutputName());
}
for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
if (dimensionNames.contains(transferAgg.getName())) {
// doesn't have this problem.
continue;
}
if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {
@Override
public boolean apply(AggregatorFactory agg) {
return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
}
})) {
throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
}
aggs.add(transferAgg);
}
}
// We need the inner incremental index to have all the columns required by the outer query
final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(ImmutableList.copyOf(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(new ArrayList<>()).build();
final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), subquery, configSupplier.get(), subqueryResult);
// Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
// is ensured by QuerySegmentSpec.
// GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
// and concatenate the results.
final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, null, configSupplier.get(), Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<ResultRow>>() {
@Override
public Sequence<ResultRow> apply(Interval interval) {
return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
}
})));
innerQueryResultIndex.close();
return Sequences.withBaggage(outerQuery.postProcess(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class MapVirtualColumnGroupByTest method setup.
@Before
public void setup() throws IOException {
final IncrementalIndex incrementalIndex = MapVirtualColumnTestBase.generateIndex();
final GroupByStrategySelector strategySelector = new GroupByStrategySelector(GroupByQueryConfig::new, null, new GroupByStrategyV2(new DruidProcessingConfig() {
@Override
public String getFormatString() {
return null;
}
@Override
public int intermediateComputeSizeBytes() {
return 10 * 1024 * 1024;
}
@Override
public int getNumMergeBuffers() {
return 1;
}
@Override
public int getNumThreads() {
return 1;
}
}, GroupByQueryConfig::new, new StupidPool<>("map-virtual-column-groupby-test", () -> ByteBuffer.allocate(1024)), new DefaultBlockingPool<>(() -> ByteBuffer.allocate(1024), 1), new DefaultObjectMapper(), QueryRunnerTestHelper.NOOP_QUERYWATCHER));
final GroupByQueryRunnerFactory factory = new GroupByQueryRunnerFactory(strategySelector, new GroupByQueryQueryToolChest(strategySelector));
runner = QueryRunnerTestHelper.makeQueryRunner(factory, SegmentId.dummy("index"), new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("index")), "incremental");
}
Aggregations