use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class SqlBenchmark method setup.
@Setup(Level.Trial)
public void setup() throws Exception {
tmpDir = Files.createTempDir();
log.info("Starting benchmark setup using tmpDir[%s], rows[%,d].", tmpDir, rowsPerSegment);
if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
}
final BenchmarkSchemaInfo schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get("basic");
final BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + 1, schemaInfo.getDataInterval(), rowsPerSegment);
final List<InputRow> rows = Lists.newArrayList();
for (int i = 0; i < rowsPerSegment; i++) {
final InputRow row = dataGenerator.nextRow();
if (i % 20000 == 0) {
log.info("%,d/%,d rows generated.", i, rowsPerSegment);
}
rows.add(row);
}
log.info("%,d/%,d rows generated.", rows.size(), rowsPerSegment);
final PlannerConfig plannerConfig = new PlannerConfig();
final QueryRunnerFactoryConglomerate conglomerate = CalciteTests.queryRunnerFactoryConglomerate();
final QueryableIndex index = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).indexMerger(TestHelper.getTestIndexMergerV9()).rows(rows).buildMMappedIndex();
this.walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(DataSegment.builder().dataSource("foo").interval(index.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index);
final Map<String, Table> tableMap = ImmutableMap.<String, Table>of("foo", new DruidTable(new TableDataSource("foo"), RowSignature.builder().add("__time", ValueType.LONG).add("dimSequential", ValueType.STRING).add("dimZipf", ValueType.STRING).add("dimUniform", ValueType.STRING).build()));
final Schema druidSchema = new AbstractSchema() {
@Override
protected Map<String, Table> getTableMap() {
return tableMap;
}
};
plannerFactory = new PlannerFactory(Calcites.createRootSchema(druidSchema), walker, CalciteTests.createOperatorTable(), plannerConfig);
groupByQuery = GroupByQuery.builder().setDataSource("foo").setInterval(new Interval(JodaUtils.MIN_INSTANT, JodaUtils.MAX_INSTANT)).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("dimZipf", "d0"), new DefaultDimensionSpec("dimSequential", "d1"))).setAggregatorSpecs(Arrays.<AggregatorFactory>asList(new CountAggregatorFactory("c"))).setGranularity(Granularities.ALL).build();
sqlQuery = "SELECT\n" + " dimZipf AS d0," + " dimSequential AS d1,\n" + " COUNT(*) AS c\n" + "FROM druid.foo\n" + "GROUP BY dimZipf, dimSequential";
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class TimeseriesBenchmark method setup.
@Setup
public void setup() throws IOException {
log.info("SETUP CALLED AT " + System.currentTimeMillis());
if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
}
executorService = Execs.multiThreaded(numSegments, "TimeseriesThreadPool");
setupQueries();
String[] schemaQuery = schemaAndQuery.split("\\.");
String schemaName = schemaQuery[0];
String queryName = schemaQuery[1];
schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schemaName);
query = SCHEMA_QUERY_MAP.get(schemaName).get(queryName);
incIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
log.info("Generating rows for segment " + i);
BenchmarkDataGenerator gen = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + i, schemaInfo.getDataInterval(), rowsPerSegment);
IncrementalIndex incIndex = makeIncIndex();
for (int j = 0; j < rowsPerSegment; j++) {
InputRow row = gen.nextRow();
if (j % 10000 == 0) {
log.info(j + " rows generated.");
}
incIndex.add(row);
}
log.info(rowsPerSegment + " rows generated");
incIndexes.add(incIndex);
}
tmpDir = Files.createTempDir();
log.info("Using temp dir: " + tmpDir.getAbsolutePath());
qIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpDir, new IndexSpec());
QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
qIndexes.add(qIndex);
}
factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class SearchBenchmark method setup.
@Setup
public void setup() throws IOException {
log.info("SETUP CALLED AT " + +System.currentTimeMillis());
if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
}
executorService = Execs.multiThreaded(numSegments, "SearchThreadPool");
setupQueries();
String[] schemaQuery = schemaAndQuery.split("\\.");
String schemaName = schemaQuery[0];
String queryName = schemaQuery[1];
schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schemaName);
queryBuilder = SCHEMA_QUERY_MAP.get(schemaName).get(queryName);
queryBuilder.limit(limit);
query = queryBuilder.build();
incIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
log.info("Generating rows for segment " + i);
BenchmarkDataGenerator gen = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), System.currentTimeMillis(), schemaInfo.getDataInterval(), rowsPerSegment);
IncrementalIndex incIndex = makeIncIndex();
for (int j = 0; j < rowsPerSegment; j++) {
InputRow row = gen.nextRow();
if (j % 10000 == 0) {
log.info(j + " rows generated.");
}
incIndex.add(row);
}
incIndexes.add(incIndex);
}
tmpDir = Files.createTempDir();
log.info("Using temp dir: " + tmpDir.getAbsolutePath());
qIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpDir, new IndexSpec());
QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
qIndexes.add(qIndex);
}
final SearchQueryConfig config = new SearchQueryConfig().withOverrides(query);
factory = new SearchQueryRunnerFactory(new SearchStrategySelector(Suppliers.ofInstance(config)), new SearchQueryQueryToolChest(config, QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator()), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class TopNBenchmark method setup.
@Setup
public void setup() throws IOException {
log.info("SETUP CALLED AT " + System.currentTimeMillis());
if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
}
executorService = Execs.multiThreaded(numSegments, "TopNThreadPool");
setupQueries();
String[] schemaQuery = schemaAndQuery.split("\\.");
String schemaName = schemaQuery[0];
String queryName = schemaQuery[1];
schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schemaName);
queryBuilder = SCHEMA_QUERY_MAP.get(schemaName).get(queryName);
queryBuilder.threshold(threshold);
query = queryBuilder.build();
incIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
log.info("Generating rows for segment " + i);
BenchmarkDataGenerator gen = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + i, schemaInfo.getDataInterval(), rowsPerSegment);
IncrementalIndex incIndex = makeIncIndex();
for (int j = 0; j < rowsPerSegment; j++) {
InputRow row = gen.nextRow();
if (j % 10000 == 0) {
log.info(j + " rows generated.");
}
incIndex.add(row);
}
incIndexes.add(incIndex);
}
tmpDir = Files.createTempDir();
log.info("Using temp dir: " + tmpDir.getAbsolutePath());
qIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpDir, new IndexSpec());
QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
qIndexes.add(qIndex);
}
factory = new TopNQueryRunnerFactory(new StupidPool<>("TopNBenchmark-compute-bufferPool", new OffheapBufferGenerator("compute", 250000000), 0, Integer.MAX_VALUE), new TopNQueryQueryToolChest(new TopNQueryConfig(), QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator()), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class YeOldePlumberSchool method findPlumber.
@Override
public Plumber findPlumber(final DataSchema schema, final RealtimeTuningConfig config, final FireDepartmentMetrics metrics) {
// There can be only one.
final Sink theSink = new Sink(interval, schema, config.getShardSpec(), version, config.getMaxRowsInMemory(), config.isReportParseExceptions());
// Temporary directory to hold spilled segments.
final File persistDir = new File(tmpSegmentDir, theSink.getSegment().getIdentifier());
// Set of spilled segments. Will be merged at the end.
final Set<File> spilled = Sets.newHashSet();
// IndexMerger implementation.
final IndexMerger theIndexMerger = config.getBuildV9Directly() ? indexMergerV9 : indexMerger;
return new Plumber() {
@Override
public Object startJob() {
return null;
}
@Override
public int add(InputRow row, Supplier<Committer> committerSupplier) throws IndexSizeExceededException {
Sink sink = getSink(row.getTimestampFromEpoch());
if (sink == null) {
return -1;
}
final int numRows = sink.add(row);
if (!sink.canAppendRow()) {
persist(committerSupplier.get());
}
return numRows;
}
private Sink getSink(long timestamp) {
if (theSink.getInterval().contains(timestamp)) {
return theSink;
} else {
return null;
}
}
@Override
public <T> QueryRunner<T> getQueryRunner(Query<T> query) {
throw new UnsupportedOperationException("Don't query me, bro.");
}
@Override
public void persist(Committer committer) {
spillIfSwappable();
committer.run();
}
@Override
public void finishJob() {
// The segment we will upload
File fileToUpload = null;
try {
// User should have persisted everything by now.
Preconditions.checkState(!theSink.swappable(), "All data must be persisted before fininshing the job!");
if (spilled.size() == 0) {
throw new IllegalStateException("Nothing indexed?");
} else if (spilled.size() == 1) {
fileToUpload = Iterables.getOnlyElement(spilled);
} else {
List<QueryableIndex> indexes = Lists.newArrayList();
for (final File oneSpill : spilled) {
indexes.add(indexIO.loadIndex(oneSpill));
}
fileToUpload = new File(tmpSegmentDir, "merged");
theIndexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), fileToUpload, config.getIndexSpec());
}
// Map merged segment so we can extract dimensions
final QueryableIndex mappedSegment = indexIO.loadIndex(fileToUpload);
final DataSegment segmentToUpload = theSink.getSegment().withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions())).withBinaryVersion(SegmentUtils.getVersionFromDir(fileToUpload));
dataSegmentPusher.push(fileToUpload, segmentToUpload);
log.info("Uploaded segment[%s]", segmentToUpload.getIdentifier());
} catch (Exception e) {
log.warn(e, "Failed to merge and upload");
throw Throwables.propagate(e);
} finally {
try {
if (fileToUpload != null) {
log.info("Deleting Index File[%s]", fileToUpload);
FileUtils.deleteDirectory(fileToUpload);
}
} catch (IOException e) {
log.warn(e, "Error deleting directory[%s]", fileToUpload);
}
}
}
private void spillIfSwappable() {
if (theSink.swappable()) {
final FireHydrant indexToPersist = theSink.swap();
final int rowsToPersist = indexToPersist.getIndex().size();
final File dirToPersist = getSpillDir(indexToPersist.getCount());
log.info("Spilling index[%d] with rows[%d] to: %s", indexToPersist.getCount(), rowsToPersist, dirToPersist);
try {
theIndexMerger.persist(indexToPersist.getIndex(), dirToPersist, config.getIndexSpec());
indexToPersist.swapSegment(null);
metrics.incrementRowOutputCount(rowsToPersist);
spilled.add(dirToPersist);
} catch (Exception e) {
log.warn(e, "Failed to spill index[%d]", indexToPersist.getCount());
throw Throwables.propagate(e);
}
}
}
private File getSpillDir(final int n) {
return new File(persistDir, String.format("spill%d", n));
}
};
}
Aggregations