use of io.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class IndexMergerTest method testNoRollupMergeWithoutDuplicateRow.
@Test
public void testNoRollupMergeWithoutDuplicateRow() throws Exception {
// (d1, d2, d3) from only one index, and their dim values are ('empty', 'has null', 'no null')
// (d4, d5, d6, d7, d8, d9) are from both indexes
// d4: 'empty' join 'empty'
// d5: 'empty' join 'has null'
// d6: 'empty' join 'no null'
// d7: 'has null' join 'has null'
// d8: 'has null' join 'no null'
// d9: 'no null' join 'no null'
IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withMinTimestamp(0L).withQueryGranularity(Granularities.NONE).withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("count") }).withRollup(false).build();
IncrementalIndex toPersistA = new OnheapIncrementalIndex(indexSchema, true, 1000);
toPersistA.add(new MapBasedInputRow(1, Arrays.asList("d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9"), ImmutableMap.<String, Object>of("d1", "", "d2", "", "d3", "310", "d7", "", "d9", "910")));
toPersistA.add(new MapBasedInputRow(2, Arrays.asList("d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9"), ImmutableMap.<String, Object>of("d2", "210", "d3", "311", "d7", "710", "d8", "810", "d9", "911")));
IncrementalIndex toPersistB = new OnheapIncrementalIndex(indexSchema, true, 1000);
toPersistB.add(new MapBasedInputRow(3, Arrays.asList("d4", "d5", "d6", "d7", "d8", "d9"), ImmutableMap.<String, Object>of("d5", "520", "d6", "620", "d7", "720", "d8", "820", "d9", "920")));
toPersistB.add(new MapBasedInputRow(4, Arrays.asList("d4", "d5", "d6", "d7", "d8", "d9"), ImmutableMap.<String, Object>of("d5", "", "d6", "621", "d7", "", "d8", "821", "d9", "921")));
final File tmpDirA = temporaryFolder.newFolder();
final File tmpDirB = temporaryFolder.newFolder();
final File tmpDirMerged = temporaryFolder.newFolder();
QueryableIndex indexA = closer.closeLater(INDEX_IO.loadIndex(INDEX_MERGER.persist(toPersistA, tmpDirA, indexSpec)));
QueryableIndex indexB = closer.closeLater(INDEX_IO.loadIndex(INDEX_MERGER.persist(toPersistB, tmpDirB, indexSpec)));
final QueryableIndex merged = closer.closeLater(INDEX_IO.loadIndex(INDEX_MERGER.mergeQueryableIndex(Arrays.asList(indexA, indexB), true, new AggregatorFactory[] { new CountAggregatorFactory("count") }, tmpDirMerged, indexSpec)));
final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
final List<Rowboat> boatList = ImmutableList.copyOf(adapter.getRows());
Assert.assertEquals(ImmutableList.of("d2", "d3", "d5", "d6", "d7", "d8", "d9"), ImmutableList.copyOf(adapter.getDimensionNames()));
Assert.assertEquals(4, boatList.size());
Assert.assertArrayEquals(new int[][] { { 0 }, { 1 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 } }, boatList.get(0).getDims());
Assert.assertArrayEquals(new int[][] { { 1 }, { 2 }, { 0 }, { 0 }, { 1 }, { 1 }, { 1 } }, boatList.get(1).getDims());
Assert.assertArrayEquals(new int[][] { { 0 }, { 0 }, { 1 }, { 1 }, { 2 }, { 2 }, { 2 } }, boatList.get(2).getDims());
Assert.assertArrayEquals(new int[][] { { 0 }, { 0 }, { 0 }, { 2 }, { 0 }, { 3 }, { 3 } }, boatList.get(3).getDims());
checkBitmapIndex(Lists.newArrayList(0, 2, 3), adapter.getBitmapIndex("d2", ""));
checkBitmapIndex(Lists.newArrayList(1), adapter.getBitmapIndex("d2", "210"));
checkBitmapIndex(Lists.newArrayList(2, 3), adapter.getBitmapIndex("d3", ""));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("d3", "310"));
checkBitmapIndex(Lists.newArrayList(1), adapter.getBitmapIndex("d3", "311"));
checkBitmapIndex(Lists.newArrayList(0, 1, 3), adapter.getBitmapIndex("d5", ""));
checkBitmapIndex(Lists.newArrayList(2), adapter.getBitmapIndex("d5", "520"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("d6", ""));
checkBitmapIndex(Lists.newArrayList(2), adapter.getBitmapIndex("d6", "620"));
checkBitmapIndex(Lists.newArrayList(3), adapter.getBitmapIndex("d6", "621"));
checkBitmapIndex(Lists.newArrayList(0, 3), adapter.getBitmapIndex("d7", ""));
checkBitmapIndex(Lists.newArrayList(1), adapter.getBitmapIndex("d7", "710"));
checkBitmapIndex(Lists.newArrayList(2), adapter.getBitmapIndex("d7", "720"));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("d8", ""));
checkBitmapIndex(Lists.newArrayList(1), adapter.getBitmapIndex("d8", "810"));
checkBitmapIndex(Lists.newArrayList(2), adapter.getBitmapIndex("d8", "820"));
checkBitmapIndex(Lists.newArrayList(3), adapter.getBitmapIndex("d8", "821"));
checkBitmapIndex(new ArrayList<Integer>(), adapter.getBitmapIndex("d9", ""));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("d9", "910"));
checkBitmapIndex(Lists.newArrayList(1), adapter.getBitmapIndex("d9", "911"));
checkBitmapIndex(Lists.newArrayList(2), adapter.getBitmapIndex("d9", "920"));
checkBitmapIndex(Lists.newArrayList(3), adapter.getBitmapIndex("d9", "921"));
}
use of io.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class IndexGeneratorJob method makeIncrementalIndex.
private static IncrementalIndex makeIncrementalIndex(Bucket theBucket, AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable<String> oldDimOrder, Map<String, ColumnCapabilitiesImpl> oldCapabilities) {
final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withMinTimestamp(theBucket.time.getMillis()).withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec()).withDimensionsSpec(config.getSchema().getDataSchema().getParser()).withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity()).withMetrics(aggs).withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup()).build();
OnheapIncrementalIndex newIndex = new OnheapIncrementalIndex(indexSchema, !tuningConfig.isIgnoreInvalidRows(), tuningConfig.getRowFlushBoundary());
if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) {
newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities);
}
return newIndex;
}
use of io.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class Sink method makeNewCurrIndex.
private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) {
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withTimestampSpec(schema.getParser()).withQueryGranularity(schema.getGranularitySpec().getQueryGranularity()).withDimensionsSpec(schema.getParser()).withMetrics(schema.getAggregators()).withRollup(schema.getGranularitySpec().isRollup()).build();
final IncrementalIndex newIndex = new OnheapIncrementalIndex(indexSchema, reportParseExceptions, maxRowsInMemory);
final FireHydrant old;
synchronized (hydrantLock) {
if (writable) {
old = currHydrant;
int newCount = 0;
int numHydrants = hydrants.size();
if (numHydrants > 0) {
FireHydrant lastHydrant = hydrants.get(numHydrants - 1);
newCount = lastHydrant.getCount() + 1;
if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) {
Map<String, ColumnCapabilitiesImpl> oldCapabilities;
if (lastHydrant.hasSwapped()) {
oldCapabilities = Maps.newHashMap();
QueryableIndex oldIndex = lastHydrant.getSegment().asQueryableIndex();
for (String dim : oldIndex.getAvailableDimensions()) {
dimOrder.add(dim);
oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumn(dim).getCapabilities());
}
} else {
IncrementalIndex oldIndex = lastHydrant.getIndex();
dimOrder.addAll(oldIndex.getDimensionOrder());
oldCapabilities = oldIndex.getColumnCapabilities();
}
newIndex.loadDimensionIterable(dimOrder, oldCapabilities);
}
}
currHydrant = new FireHydrant(newIndex, newCount, getSegment().getIdentifier());
if (old != null) {
numRowsExcludingCurrIndex.addAndGet(old.getIndex().size());
}
hydrants.add(currHydrant);
} else {
// Oops, someone called finishWriting while we were making this new index.
newIndex.close();
throw new ISE("finishWriting() called during swap");
}
}
return old;
}
use of io.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTest method constructorFeeder.
@Parameterized.Parameters(name = "{1}")
public static Collection<Object[]> constructorFeeder() throws IOException {
final IndexSpec indexSpec = new IndexSpec();
final HeapMemoryTaskStorage ts = new HeapMemoryTaskStorage(new TaskStorageConfig(null) {
});
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.NONE).withMinTimestamp(JodaUtils.MIN_INSTANT).withDimensionsSpec(ROW_PARSER).withMetrics(new AggregatorFactory[] { new LongSumAggregatorFactory(METRIC_LONG_NAME, DIM_LONG_NAME), new DoubleSumAggregatorFactory(METRIC_FLOAT_NAME, DIM_FLOAT_NAME) }).build();
final OnheapIncrementalIndex index = new OnheapIncrementalIndex(schema, true, MAX_ROWS * MAX_SHARD_NUMBER);
for (Integer i = 0; i < MAX_ROWS; ++i) {
index.add(ROW_PARSER.parse(buildRow(i.longValue())));
}
if (!persistDir.mkdirs() && !persistDir.exists()) {
throw new IOException(String.format("Could not create directory at [%s]", persistDir.getAbsolutePath()));
}
INDEX_MERGER.persist(index, persistDir, indexSpec);
final TaskLockbox tl = new TaskLockbox(ts);
final IndexerSQLMetadataStorageCoordinator mdc = new IndexerSQLMetadataStorageCoordinator(null, null, null) {
private final Set<DataSegment> published = Sets.newHashSet();
private final Set<DataSegment> nuked = Sets.newHashSet();
@Override
public List<DataSegment> getUsedSegmentsForInterval(String dataSource, Interval interval) throws IOException {
return ImmutableList.copyOf(segmentSet);
}
@Override
public List<DataSegment> getUsedSegmentsForIntervals(String dataSource, List<Interval> interval) throws IOException {
return ImmutableList.copyOf(segmentSet);
}
@Override
public List<DataSegment> getUnusedSegmentsForInterval(String dataSource, Interval interval) {
return ImmutableList.of();
}
@Override
public Set<DataSegment> announceHistoricalSegments(Set<DataSegment> segments) {
Set<DataSegment> added = Sets.newHashSet();
for (final DataSegment segment : segments) {
if (published.add(segment)) {
added.add(segment);
}
}
return ImmutableSet.copyOf(added);
}
@Override
public void deleteSegments(Set<DataSegment> segments) {
nuked.addAll(segments);
}
};
final LocalTaskActionClientFactory tac = new LocalTaskActionClientFactory(ts, new TaskActionToolbox(tl, mdc, newMockEmitter(), EasyMock.createMock(SupervisorManager.class)));
SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class);
EasyMock.replay(notifierFactory);
final TaskToolboxFactory taskToolboxFactory = new TaskToolboxFactory(new TaskConfig(tmpDir.getAbsolutePath(), null, null, 50000, null, false, null, null), tac, newMockEmitter(), new DataSegmentPusher() {
@Deprecated
@Override
public String getPathForHadoop(String dataSource) {
return getPathForHadoop();
}
@Override
public String getPathForHadoop() {
throw new UnsupportedOperationException();
}
@Override
public DataSegment push(File file, DataSegment segment) throws IOException {
return segment;
}
}, new DataSegmentKiller() {
@Override
public void kill(DataSegment segments) throws SegmentLoadingException {
}
@Override
public void killAll() throws IOException {
throw new UnsupportedOperationException("not implemented");
}
}, new DataSegmentMover() {
@Override
public DataSegment move(DataSegment dataSegment, Map<String, Object> targetLoadSpec) throws SegmentLoadingException {
return dataSegment;
}
}, new DataSegmentArchiver() {
@Override
public DataSegment archive(DataSegment segment) throws SegmentLoadingException {
return segment;
}
@Override
public DataSegment restore(DataSegment segment) throws SegmentLoadingException {
return segment;
}
}, // segment announcer
null, notifierFactory, // query runner factory conglomerate corporation unionized collective
null, // query executor service
null, // monitor scheduler
null, new SegmentLoaderFactory(new SegmentLoaderLocalCacheManager(null, new SegmentLoaderConfig() {
@Override
public List<StorageLocationConfig> getLocations() {
return Lists.newArrayList();
}
}, MAPPER)), MAPPER, INDEX_MERGER, INDEX_IO, null, null, INDEX_MERGER_V9);
Collection<Object[]> values = new LinkedList<>();
for (InputRowParser parser : Arrays.<InputRowParser>asList(ROW_PARSER, new MapInputRowParser(new JSONParseSpec(new TimestampSpec(TIME_COLUMN, "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.<String>of()), ImmutableList.of(DIM_FLOAT_NAME, DIM_LONG_NAME), ImmutableList.<SpatialDimensionSchema>of()), null, null)))) {
for (List<String> dim_names : Arrays.<List<String>>asList(null, ImmutableList.of(DIM_NAME))) {
for (List<String> metric_names : Arrays.<List<String>>asList(null, ImmutableList.of(METRIC_LONG_NAME, METRIC_FLOAT_NAME))) {
values.add(new Object[] { new IngestSegmentFirehoseFactory(DATA_SOURCE_NAME, FOREVER, new SelectorDimFilter(DIM_NAME, DIM_VALUE, null), dim_names, metric_names, Guice.createInjector(new Module() {
@Override
public void configure(Binder binder) {
binder.bind(TaskToolboxFactory.class).toInstance(taskToolboxFactory);
}
}), INDEX_IO), String.format("DimNames[%s]MetricNames[%s]ParserDimNames[%s]", dim_names == null ? "null" : "dims", metric_names == null ? "null" : "metrics", parser == ROW_PARSER ? "dims" : "null"), parser });
}
}
}
return values;
}
use of io.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTimelineTest method persist.
private static Map<String, Object> persist(File tmpDir, InputRow... rows) {
final File persistDir = new File(tmpDir, UUID.randomUUID().toString());
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.NONE).withMinTimestamp(JodaUtils.MIN_INSTANT).withDimensionsSpec(ROW_PARSER).withMetrics(new AggregatorFactory[] { new LongSumAggregatorFactory(METRICS[0], METRICS[0]) }).build();
final OnheapIncrementalIndex index = new OnheapIncrementalIndex(schema, true, rows.length);
for (InputRow row : rows) {
try {
index.add(row);
} catch (IndexSizeExceededException e) {
throw Throwables.propagate(e);
}
}
try {
INDEX_MERGER.persist(index, persistDir, new IndexSpec());
} catch (IOException e) {
throw Throwables.propagate(e);
}
return ImmutableMap.<String, Object>of("type", "local", "path", persistDir.getAbsolutePath());
}
Aggregations