Search in sources :

Example 26 with IndexSpec

use of io.druid.segment.IndexSpec in project druid by druid-io.

the class IndexPersistBenchmark method persistV9.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void persistV9(Blackhole blackhole) throws Exception {
    File tmpDir = Files.createTempDir();
    log.info("Using temp dir: " + tmpDir.getAbsolutePath());
    try {
        File indexFile = INDEX_MERGER_V9.persist(incIndex, tmpDir, new IndexSpec());
        blackhole.consume(indexFile);
    } finally {
        FileUtils.deleteDirectory(tmpDir);
    }
}
Also used : IndexSpec(io.druid.segment.IndexSpec) File(java.io.File) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 27 with IndexSpec

use of io.druid.segment.IndexSpec in project druid by druid-io.

the class GroupByBenchmark method setup.

@Setup(Level.Trial)
public void setup() throws IOException {
    log.info("SETUP CALLED AT " + +System.currentTimeMillis());
    if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
        ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
    }
    executorService = Execs.multiThreaded(numProcessingThreads, "GroupByThreadPool[%d]");
    setupQueries();
    String[] schemaQuery = schemaAndQuery.split("\\.");
    String schemaName = schemaQuery[0];
    String queryName = schemaQuery[1];
    schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schemaName);
    query = SCHEMA_QUERY_MAP.get(schemaName).get(queryName);
    final BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + 1, schemaInfo.getDataInterval(), rowsPerSegment);
    tmpDir = Files.createTempDir();
    log.info("Using temp dir: %s", tmpDir.getAbsolutePath());
    // queryableIndexes   -> numSegments worth of on-disk segments
    // anIncrementalIndex -> the last incremental index
    anIncrementalIndex = null;
    queryableIndexes = new ArrayList<>(numSegments);
    for (int i = 0; i < numSegments; i++) {
        log.info("Generating rows for segment %d/%d", i + 1, numSegments);
        final IncrementalIndex index = makeIncIndex(schemaInfo.isWithRollup());
        for (int j = 0; j < rowsPerSegment; j++) {
            final InputRow row = dataGenerator.nextRow();
            if (j % 20000 == 0) {
                log.info("%,d/%,d rows generated.", i * rowsPerSegment + j, rowsPerSegment * numSegments);
            }
            index.add(row);
        }
        log.info("%,d/%,d rows generated, persisting segment %d/%d.", (i + 1) * rowsPerSegment, rowsPerSegment * numSegments, i + 1, numSegments);
        final File file = INDEX_MERGER_V9.persist(index, new File(tmpDir, String.valueOf(i)), new IndexSpec());
        queryableIndexes.add(INDEX_IO.loadIndex(file));
        if (i == numSegments - 1) {
            anIncrementalIndex = index;
        } else {
            index.close();
        }
    }
    StupidPool<ByteBuffer> bufferPool = new StupidPool<>("GroupByBenchmark-computeBufferPool", new OffheapBufferGenerator("compute", 250_000_000), 0, Integer.MAX_VALUE);
    // limit of 2 is required since we simulate both historical merge and broker merge in the same process
    BlockingPool<ByteBuffer> mergePool = new BlockingPool<>(new OffheapBufferGenerator("merge", 250_000_000), 2);
    final GroupByQueryConfig config = new GroupByQueryConfig() {

        @Override
        public String getDefaultStrategy() {
            return defaultStrategy;
        }

        @Override
        public int getBufferGrouperInitialBuckets() {
            return initialBuckets;
        }

        @Override
        public long getMaxOnDiskStorage() {
            return 1_000_000_000L;
        }
    };
    config.setSingleThreaded(false);
    config.setMaxIntermediateRows(Integer.MAX_VALUE);
    config.setMaxResults(Integer.MAX_VALUE);
    DruidProcessingConfig druidProcessingConfig = new DruidProcessingConfig() {

        @Override
        public int getNumThreads() {
            // Used by "v2" strategy for concurrencyHint
            return numProcessingThreads;
        }

        @Override
        public String getFormatString() {
            return null;
        }
    };
    final Supplier<GroupByQueryConfig> configSupplier = Suppliers.ofInstance(config);
    final GroupByStrategySelector strategySelector = new GroupByStrategySelector(configSupplier, new GroupByStrategyV1(configSupplier, new GroupByQueryEngine(configSupplier, bufferPool), QueryBenchmarkUtil.NOOP_QUERYWATCHER, bufferPool), new GroupByStrategyV2(druidProcessingConfig, configSupplier, bufferPool, mergePool, new ObjectMapper(new SmileFactory()), QueryBenchmarkUtil.NOOP_QUERYWATCHER));
    factory = new GroupByQueryRunnerFactory(strategySelector, new GroupByQueryQueryToolChest(strategySelector, QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator()));
}
Also used : GroupByStrategySelector(io.druid.query.groupby.strategy.GroupByStrategySelector) IndexSpec(io.druid.segment.IndexSpec) BenchmarkDataGenerator(io.druid.benchmark.datagen.BenchmarkDataGenerator) HyperUniquesSerde(io.druid.query.aggregation.hyperloglog.HyperUniquesSerde) GroupByQueryQueryToolChest(io.druid.query.groupby.GroupByQueryQueryToolChest) GroupByStrategyV1(io.druid.query.groupby.strategy.GroupByStrategyV1) GroupByStrategyV2(io.druid.query.groupby.strategy.GroupByStrategyV2) GroupByQueryEngine(io.druid.query.groupby.GroupByQueryEngine) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryRunnerFactory(io.druid.query.groupby.GroupByQueryRunnerFactory) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) ByteBuffer(java.nio.ByteBuffer) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) OffheapBufferGenerator(io.druid.offheap.OffheapBufferGenerator) InputRow(io.druid.data.input.InputRow) BlockingPool(io.druid.collections.BlockingPool) StupidPool(io.druid.collections.StupidPool) DruidProcessingConfig(io.druid.query.DruidProcessingConfig) File(java.io.File) Setup(org.openjdk.jmh.annotations.Setup)

Example 28 with IndexSpec

use of io.druid.segment.IndexSpec in project druid by druid-io.

the class SelectBenchmark method setup.

@Setup
public void setup() throws IOException {
    log.info("SETUP CALLED AT " + System.currentTimeMillis());
    if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
        ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
    }
    executorService = Execs.multiThreaded(numSegments, "SelectThreadPool");
    setupQueries();
    String[] schemaQuery = schemaAndQuery.split("\\.");
    String schemaName = schemaQuery[0];
    String queryName = schemaQuery[1];
    schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schemaName);
    queryBuilder = SCHEMA_QUERY_MAP.get(schemaName).get(queryName);
    queryBuilder.pagingSpec(PagingSpec.newSpec(pagingThreshold));
    query = queryBuilder.build();
    incIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
        BenchmarkDataGenerator gen = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + i, schemaInfo.getDataInterval(), rowsPerSegment);
        IncrementalIndex incIndex = makeIncIndex();
        for (int j = 0; j < rowsPerSegment; j++) {
            InputRow row = gen.nextRow();
            if (j % 10000 == 0) {
                log.info(j + " rows generated.");
            }
            incIndex.add(row);
        }
        incIndexes.add(incIndex);
    }
    tmpDir = Files.createTempDir();
    log.info("Using temp dir: " + tmpDir.getAbsolutePath());
    qIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
        File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpDir, new IndexSpec());
        QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
        qIndexes.add(qIndex);
    }
    final Supplier<SelectQueryConfig> selectConfigSupplier = Suppliers.ofInstance(new SelectQueryConfig(true));
    factory = new SelectQueryRunnerFactory(new SelectQueryQueryToolChest(JSON_MAPPER, QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator(), selectConfigSupplier), new SelectQueryEngine(selectConfigSupplier), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
}
Also used : IndexSpec(io.druid.segment.IndexSpec) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) BenchmarkDataGenerator(io.druid.benchmark.datagen.BenchmarkDataGenerator) HyperUniquesSerde(io.druid.query.aggregation.hyperloglog.HyperUniquesSerde) SelectQueryRunnerFactory(io.druid.query.select.SelectQueryRunnerFactory) SelectQueryConfig(io.druid.query.select.SelectQueryConfig) SelectQueryQueryToolChest(io.druid.query.select.SelectQueryQueryToolChest) SelectQueryEngine(io.druid.query.select.SelectQueryEngine) QueryableIndex(io.druid.segment.QueryableIndex) InputRow(io.druid.data.input.InputRow) File(java.io.File) Setup(org.openjdk.jmh.annotations.Setup)

Example 29 with IndexSpec

use of io.druid.segment.IndexSpec in project druid by druid-io.

the class MultiValuedDimensionTest method setupClass.

@BeforeClass
public static void setupClass() throws Exception {
    incrementalIndex = new OnheapIncrementalIndex(0, Granularities.NONE, new AggregatorFactory[] { new CountAggregatorFactory("count") }, true, true, true, 5000);
    StringInputRowParser parser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags")), null, null), "\t", ImmutableList.of("timestamp", "product", "tags")), "UTF-8");
    String[] rows = new String[] { "2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3", "2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5", "2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7", "2011-01-14T00:00:00.000Z,product_4" };
    for (String row : rows) {
        incrementalIndex.add(parser.parse(row));
    }
    persistedSegmentDir = Files.createTempDir();
    TestHelper.getTestIndexMerger().persist(incrementalIndex, persistedSegmentDir, new IndexSpec());
    queryableIndex = TestHelper.getTestIndexIO().loadIndex(persistedSegmentDir);
}
Also used : IndexSpec(io.druid.segment.IndexSpec) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) CSVParseSpec(io.druid.data.input.impl.CSVParseSpec) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) BeforeClass(org.junit.BeforeClass)

Example 30 with IndexSpec

use of io.druid.segment.IndexSpec in project druid by druid-io.

the class AggregationTestHelper method createIndex.

public void createIndex(Iterator rows, InputRowParser parser, final AggregatorFactory[] metrics, File outDir, long minTimestamp, Granularity gran, boolean deserializeComplexMetrics, int maxRowCount) throws Exception {
    IncrementalIndex index = null;
    List<File> toMerge = new ArrayList<>();
    try {
        index = new OnheapIncrementalIndex(minTimestamp, gran, metrics, deserializeComplexMetrics, true, true, maxRowCount);
        while (rows.hasNext()) {
            Object row = rows.next();
            if (!index.canAppendRow()) {
                File tmp = tempFolder.newFolder();
                toMerge.add(tmp);
                indexMerger.persist(index, tmp, new IndexSpec());
                index.close();
                index = new OnheapIncrementalIndex(minTimestamp, gran, metrics, deserializeComplexMetrics, true, true, maxRowCount);
            }
            if (row instanceof String && parser instanceof StringInputRowParser) {
                //Note: this is required because StringInputRowParser is InputRowParser<ByteBuffer> as opposed to
                //InputRowsParser<String>
                index.add(((StringInputRowParser) parser).parse((String) row));
            } else {
                index.add(parser.parse(row));
            }
        }
        if (toMerge.size() > 0) {
            File tmp = tempFolder.newFolder();
            toMerge.add(tmp);
            indexMerger.persist(index, tmp, new IndexSpec());
            List<QueryableIndex> indexes = new ArrayList<>(toMerge.size());
            for (File file : toMerge) {
                indexes.add(indexIO.loadIndex(file));
            }
            indexMerger.mergeQueryableIndex(indexes, true, metrics, outDir, new IndexSpec());
            for (QueryableIndex qi : indexes) {
                qi.close();
            }
        } else {
            indexMerger.persist(index, outDir, new IndexSpec());
        }
    } finally {
        if (index != null) {
            index.close();
        }
    }
}
Also used : IndexSpec(io.druid.segment.IndexSpec) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) QueryableIndex(io.druid.segment.QueryableIndex) ArrayList(java.util.ArrayList) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) File(java.io.File)

Aggregations

IndexSpec (io.druid.segment.IndexSpec)37 File (java.io.File)21 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)15 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)13 InputRow (io.druid.data.input.InputRow)11 Test (org.junit.Test)11 BenchmarkDataGenerator (io.druid.benchmark.datagen.BenchmarkDataGenerator)10 HyperUniquesSerde (io.druid.query.aggregation.hyperloglog.HyperUniquesSerde)10 QueryableIndex (io.druid.segment.QueryableIndex)10 Setup (org.openjdk.jmh.annotations.Setup)10 Period (org.joda.time.Period)7 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)6 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)5 StupidPool (io.druid.collections.StupidPool)4 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)4 OffheapBufferGenerator (io.druid.offheap.OffheapBufferGenerator)4 QueryableIndexSegment (io.druid.segment.QueryableIndexSegment)4 RoaringBitmapSerdeFactory (io.druid.segment.data.RoaringBitmapSerdeFactory)4 DataSegment (io.druid.timeline.DataSegment)4 IOException (java.io.IOException)4