use of io.druid.segment.IndexSpec in project druid by druid-io.
the class IngestSegmentFirehoseTest method createTestIndex.
private void createTestIndex(File segmentDir) throws Exception {
List<String> rows = Lists.newArrayList("2014102200,host1,10", "2014102200,host2,20", "2014102200,host3,30", "2014102201,host1,10", "2014102201,host2,20", "2014102201,host3,30", "2014102202,host1,10", "2014102202,host2,20", "2014102202,host3,30");
StringInputRowParser parser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited")), Charsets.UTF_8.toString());
AggregatorFactory[] aggregators = new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited") };
IncrementalIndex index = null;
try {
index = new OnheapIncrementalIndex(0, Granularities.NONE, aggregators, true, true, true, 5000);
for (String line : rows) {
index.add(parser.parse(line));
}
indexMerger.persist(index, segmentDir, new IndexSpec());
} finally {
if (index != null) {
index.close();
}
}
}
use of io.druid.segment.IndexSpec in project druid by druid-io.
the class GroupByTypeInterfaceBenchmark method setup.
@Setup(Level.Trial)
public void setup() throws IOException {
log.info("SETUP CALLED AT %d", System.currentTimeMillis());
if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
}
executorService = Execs.multiThreaded(numProcessingThreads, "GroupByThreadPool[%d]");
setupQueries();
String schemaName = "basic";
schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schemaName);
stringQuery = SCHEMA_QUERY_MAP.get(schemaName).get("string");
longFloatQuery = SCHEMA_QUERY_MAP.get(schemaName).get("longFloat");
longQuery = SCHEMA_QUERY_MAP.get(schemaName).get("long");
floatQuery = SCHEMA_QUERY_MAP.get(schemaName).get("float");
final BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + 1, schemaInfo.getDataInterval(), rowsPerSegment);
tmpDir = Files.createTempDir();
log.info("Using temp dir: %s", tmpDir.getAbsolutePath());
// queryableIndexes -> numSegments worth of on-disk segments
// anIncrementalIndex -> the last incremental index
anIncrementalIndex = null;
queryableIndexes = new ArrayList<>(numSegments);
for (int i = 0; i < numSegments; i++) {
log.info("Generating rows for segment %d/%d", i + 1, numSegments);
final IncrementalIndex index = makeIncIndex();
for (int j = 0; j < rowsPerSegment; j++) {
final InputRow row = dataGenerator.nextRow();
if (j % 20000 == 0) {
log.info("%,d/%,d rows generated.", i * rowsPerSegment + j, rowsPerSegment * numSegments);
}
index.add(row);
}
log.info("%,d/%,d rows generated, persisting segment %d/%d.", (i + 1) * rowsPerSegment, rowsPerSegment * numSegments, i + 1, numSegments);
final File file = INDEX_MERGER_V9.persist(index, new File(tmpDir, String.valueOf(i)), new IndexSpec());
queryableIndexes.add(INDEX_IO.loadIndex(file));
if (i == numSegments - 1) {
anIncrementalIndex = index;
} else {
index.close();
}
}
StupidPool<ByteBuffer> bufferPool = new StupidPool<>("GroupByBenchmark-computeBufferPool", new OffheapBufferGenerator("compute", 250_000_000), 0, Integer.MAX_VALUE);
// limit of 2 is required since we simulate both historical merge and broker merge in the same process
BlockingPool<ByteBuffer> mergePool = new BlockingPool<>(new OffheapBufferGenerator("merge", 250_000_000), 2);
final GroupByQueryConfig config = new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return defaultStrategy;
}
@Override
public int getBufferGrouperInitialBuckets() {
return initialBuckets;
}
@Override
public long getMaxOnDiskStorage() {
return 1_000_000_000L;
}
};
config.setSingleThreaded(false);
config.setMaxIntermediateRows(Integer.MAX_VALUE);
config.setMaxResults(Integer.MAX_VALUE);
DruidProcessingConfig druidProcessingConfig = new DruidProcessingConfig() {
@Override
public int getNumThreads() {
// Used by "v2" strategy for concurrencyHint
return numProcessingThreads;
}
@Override
public String getFormatString() {
return null;
}
};
final Supplier<GroupByQueryConfig> configSupplier = Suppliers.ofInstance(config);
final GroupByStrategySelector strategySelector = new GroupByStrategySelector(configSupplier, new GroupByStrategyV1(configSupplier, new GroupByQueryEngine(configSupplier, bufferPool), QueryBenchmarkUtil.NOOP_QUERYWATCHER, bufferPool), new GroupByStrategyV2(druidProcessingConfig, configSupplier, bufferPool, mergePool, new ObjectMapper(new SmileFactory()), QueryBenchmarkUtil.NOOP_QUERYWATCHER));
factory = new GroupByQueryRunnerFactory(strategySelector, new GroupByQueryQueryToolChest(strategySelector, QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator()));
}
use of io.druid.segment.IndexSpec in project druid by druid-io.
the class FilterPartitionBenchmark method setup.
@Setup
public void setup() throws IOException {
log.info("SETUP CALLED AT " + System.currentTimeMillis());
if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
}
schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schema);
BenchmarkDataGenerator gen = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED, schemaInfo.getDataInterval(), rowsPerSegment);
incIndex = makeIncIndex();
for (int j = 0; j < rowsPerSegment; j++) {
InputRow row = gen.nextRow();
if (j % 10000 == 0) {
log.info(j + " rows generated.");
}
incIndex.add(row);
}
tmpDir = Files.createTempDir();
log.info("Using temp dir: " + tmpDir.getAbsolutePath());
indexFile = INDEX_MERGER_V9.persist(incIndex, tmpDir, new IndexSpec());
qIndex = INDEX_IO.loadIndex(indexFile);
Interval interval = schemaInfo.getDataInterval();
timeFilterNone = new BoundFilter(new BoundDimFilter(Column.TIME_COLUMN_NAME, String.valueOf(Long.MAX_VALUE), String.valueOf(Long.MAX_VALUE), true, true, null, null, StringComparators.ALPHANUMERIC));
long halfEnd = (interval.getEndMillis() + interval.getStartMillis()) / 2;
timeFilterHalf = new BoundFilter(new BoundDimFilter(Column.TIME_COLUMN_NAME, String.valueOf(interval.getStartMillis()), String.valueOf(halfEnd), true, true, null, null, StringComparators.ALPHANUMERIC));
timeFilterAll = new BoundFilter(new BoundDimFilter(Column.TIME_COLUMN_NAME, String.valueOf(interval.getStartMillis()), String.valueOf(interval.getEndMillis()), true, true, null, null, StringComparators.ALPHANUMERIC));
}
use of io.druid.segment.IndexSpec in project druid by druid-io.
the class FilteredAggregatorBenchmark method setup.
@Setup
public void setup() throws IOException {
log.info("SETUP CALLED AT " + System.currentTimeMillis());
if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
}
schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schema);
BenchmarkDataGenerator gen = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED, schemaInfo.getDataInterval(), rowsPerSegment);
incIndex = makeIncIndex(schemaInfo.getAggsArray());
filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new JavaScriptDimFilter("dimSequential", "function(x) { return false }", null, JavaScriptConfig.getEnabledInstance()), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Arrays.asList("X"), null)));
filteredMetrics = new AggregatorFactory[1];
filteredMetrics[0] = new FilteredAggregatorFactory(new CountAggregatorFactory("rows"), filter);
incIndexFilteredAgg = makeIncIndex(filteredMetrics);
inputRows = new ArrayList<>();
for (int j = 0; j < rowsPerSegment; j++) {
InputRow row = gen.nextRow();
if (j % 10000 == 0) {
log.info(j + " rows generated.");
}
incIndex.add(row);
inputRows.add(row);
}
tmpDir = Files.createTempDir();
log.info("Using temp dir: " + tmpDir.getAbsolutePath());
indexFile = INDEX_MERGER_V9.persist(incIndex, tmpDir, new IndexSpec());
qIndex = INDEX_IO.loadIndex(indexFile);
factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(filteredMetrics[0]);
query = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(false).build();
}
use of io.druid.segment.IndexSpec in project druid by druid-io.
the class TaskSerdeTest method testSegmentConvertSerde.
@Test
public void testSegmentConvertSerde() throws IOException {
final DataSegment segment = new DataSegment("dataSource", Interval.parse("1990-01-01/1999-12-31"), "version", ImmutableMap.<String, Object>of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("metric1", "metric2"), NoneShardSpec.instance(), 0, 12345L);
final ConvertSegmentTask convertSegmentTaskOriginal = ConvertSegmentTask.create(segment, new IndexSpec(new RoaringBitmapSerdeFactory(null), CompressedObjectStrategy.CompressionStrategy.LZF, CompressedObjectStrategy.CompressionStrategy.UNCOMPRESSED, CompressionFactory.LongEncodingStrategy.LONGS), false, true, null);
final String json = jsonMapper.writeValueAsString(convertSegmentTaskOriginal);
final Task task = jsonMapper.readValue(json, Task.class);
Assert.assertTrue(task instanceof ConvertSegmentTask);
final ConvertSegmentTask convertSegmentTask = (ConvertSegmentTask) task;
Assert.assertEquals(convertSegmentTaskOriginal.getDataSource(), convertSegmentTask.getDataSource());
Assert.assertEquals(convertSegmentTaskOriginal.getInterval(), convertSegmentTask.getInterval());
Assert.assertEquals(convertSegmentTaskOriginal.getIndexSpec().getBitmapSerdeFactory().getClass().getCanonicalName(), convertSegmentTask.getIndexSpec().getBitmapSerdeFactory().getClass().getCanonicalName());
Assert.assertEquals(convertSegmentTaskOriginal.getIndexSpec().getDimensionCompression(), convertSegmentTask.getIndexSpec().getDimensionCompression());
Assert.assertEquals(convertSegmentTaskOriginal.getIndexSpec().getMetricCompression(), convertSegmentTask.getIndexSpec().getMetricCompression());
Assert.assertEquals(false, convertSegmentTask.isForce());
Assert.assertEquals(segment, convertSegmentTask.getSegment());
}
Aggregations