use of io.druid.segment.IndexMerger in project druid by druid-io.
the class AggregationTestHelper method createGroupByQueryAggregationTestHelper.
public static final AggregationTestHelper createGroupByQueryAggregationTestHelper(List<? extends Module> jsonModulesToRegister, GroupByQueryConfig config, TemporaryFolder tempFolder) {
ObjectMapper mapper = new DefaultObjectMapper();
GroupByQueryRunnerFactory factory = GroupByQueryRunnerTest.makeQueryRunnerFactory(mapper, config);
IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {
@Override
public int columnCacheSizeBytes() {
return 0;
}
});
return new AggregationTestHelper(mapper, new IndexMerger(mapper, indexIO), indexIO, factory.getToolchest(), factory, tempFolder, jsonModulesToRegister);
}
use of io.druid.segment.IndexMerger in project druid by druid-io.
the class AggregationTestHelper method createTopNQueryAggregationTestHelper.
public static final AggregationTestHelper createTopNQueryAggregationTestHelper(List<? extends Module> jsonModulesToRegister, TemporaryFolder tempFolder) {
ObjectMapper mapper = new DefaultObjectMapper();
TopNQueryQueryToolChest toolchest = new TopNQueryQueryToolChest(new TopNQueryConfig(), QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator());
TopNQueryRunnerFactory factory = new TopNQueryRunnerFactory(new StupidPool<>("TopNQueryRunnerFactory-bufferPool", new Supplier<ByteBuffer>() {
@Override
public ByteBuffer get() {
return ByteBuffer.allocate(10 * 1024 * 1024);
}
}), toolchest, QueryRunnerTestHelper.NOOP_QUERYWATCHER);
IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {
@Override
public int columnCacheSizeBytes() {
return 0;
}
});
return new AggregationTestHelper(mapper, new IndexMerger(mapper, indexIO), indexIO, toolchest, factory, tempFolder, jsonModulesToRegister);
}
use of io.druid.segment.IndexMerger in project druid by druid-io.
the class YeOldePlumberSchool method findPlumber.
@Override
public Plumber findPlumber(final DataSchema schema, final RealtimeTuningConfig config, final FireDepartmentMetrics metrics) {
// There can be only one.
final Sink theSink = new Sink(interval, schema, config.getShardSpec(), version, config.getMaxRowsInMemory(), config.isReportParseExceptions());
// Temporary directory to hold spilled segments.
final File persistDir = new File(tmpSegmentDir, theSink.getSegment().getIdentifier());
// Set of spilled segments. Will be merged at the end.
final Set<File> spilled = Sets.newHashSet();
// IndexMerger implementation.
final IndexMerger theIndexMerger = config.getBuildV9Directly() ? indexMergerV9 : indexMerger;
return new Plumber() {
@Override
public Object startJob() {
return null;
}
@Override
public int add(InputRow row, Supplier<Committer> committerSupplier) throws IndexSizeExceededException {
Sink sink = getSink(row.getTimestampFromEpoch());
if (sink == null) {
return -1;
}
final int numRows = sink.add(row);
if (!sink.canAppendRow()) {
persist(committerSupplier.get());
}
return numRows;
}
private Sink getSink(long timestamp) {
if (theSink.getInterval().contains(timestamp)) {
return theSink;
} else {
return null;
}
}
@Override
public <T> QueryRunner<T> getQueryRunner(Query<T> query) {
throw new UnsupportedOperationException("Don't query me, bro.");
}
@Override
public void persist(Committer committer) {
spillIfSwappable();
committer.run();
}
@Override
public void finishJob() {
// The segment we will upload
File fileToUpload = null;
try {
// User should have persisted everything by now.
Preconditions.checkState(!theSink.swappable(), "All data must be persisted before fininshing the job!");
if (spilled.size() == 0) {
throw new IllegalStateException("Nothing indexed?");
} else if (spilled.size() == 1) {
fileToUpload = Iterables.getOnlyElement(spilled);
} else {
List<QueryableIndex> indexes = Lists.newArrayList();
for (final File oneSpill : spilled) {
indexes.add(indexIO.loadIndex(oneSpill));
}
fileToUpload = new File(tmpSegmentDir, "merged");
theIndexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), fileToUpload, config.getIndexSpec());
}
// Map merged segment so we can extract dimensions
final QueryableIndex mappedSegment = indexIO.loadIndex(fileToUpload);
final DataSegment segmentToUpload = theSink.getSegment().withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions())).withBinaryVersion(SegmentUtils.getVersionFromDir(fileToUpload));
dataSegmentPusher.push(fileToUpload, segmentToUpload);
log.info("Uploaded segment[%s]", segmentToUpload.getIdentifier());
} catch (Exception e) {
log.warn(e, "Failed to merge and upload");
throw Throwables.propagate(e);
} finally {
try {
if (fileToUpload != null) {
log.info("Deleting Index File[%s]", fileToUpload);
FileUtils.deleteDirectory(fileToUpload);
}
} catch (IOException e) {
log.warn(e, "Error deleting directory[%s]", fileToUpload);
}
}
}
private void spillIfSwappable() {
if (theSink.swappable()) {
final FireHydrant indexToPersist = theSink.swap();
final int rowsToPersist = indexToPersist.getIndex().size();
final File dirToPersist = getSpillDir(indexToPersist.getCount());
log.info("Spilling index[%d] with rows[%d] to: %s", indexToPersist.getCount(), rowsToPersist, dirToPersist);
try {
theIndexMerger.persist(indexToPersist.getIndex(), dirToPersist, config.getIndexSpec());
indexToPersist.swapSegment(null);
metrics.incrementRowOutputCount(rowsToPersist);
spilled.add(dirToPersist);
} catch (Exception e) {
log.warn(e, "Failed to spill index[%d]", indexToPersist.getCount());
throw Throwables.propagate(e);
}
}
}
private File getSpillDir(final int n) {
return new File(persistDir, String.format("spill%d", n));
}
};
}
use of io.druid.segment.IndexMerger in project druid by druid-io.
the class AggregationTestHelper method createSelectQueryAggregationTestHelper.
public static final AggregationTestHelper createSelectQueryAggregationTestHelper(List<? extends Module> jsonModulesToRegister, TemporaryFolder tempFolder) {
ObjectMapper mapper = new DefaultObjectMapper();
mapper.setInjectableValues(new InjectableValues.Std().addValue(SelectQueryConfig.class, new SelectQueryConfig(true)));
Supplier<SelectQueryConfig> configSupplier = Suppliers.ofInstance(new SelectQueryConfig(true));
SelectQueryQueryToolChest toolchest = new SelectQueryQueryToolChest(new DefaultObjectMapper(), QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator(), configSupplier);
SelectQueryRunnerFactory factory = new SelectQueryRunnerFactory(new SelectQueryQueryToolChest(new DefaultObjectMapper(), QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator(), configSupplier), new SelectQueryEngine(configSupplier), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {
@Override
public int columnCacheSizeBytes() {
return 0;
}
});
return new AggregationTestHelper(mapper, new IndexMerger(mapper, indexIO), indexIO, toolchest, factory, tempFolder, jsonModulesToRegister);
}
use of io.druid.segment.IndexMerger in project druid by druid-io.
the class AggregationTestHelper method createTimeseriesQueryAggregationTestHelper.
public static final AggregationTestHelper createTimeseriesQueryAggregationTestHelper(List<? extends Module> jsonModulesToRegister, TemporaryFolder tempFolder) {
ObjectMapper mapper = new DefaultObjectMapper();
TimeseriesQueryQueryToolChest toolchest = new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator());
TimeseriesQueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(toolchest, new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {
@Override
public int columnCacheSizeBytes() {
return 0;
}
});
return new AggregationTestHelper(mapper, new IndexMerger(mapper, indexIO), indexIO, toolchest, factory, tempFolder, jsonModulesToRegister);
}
Aggregations