Search in sources :

Example 16 with IncrementalIndexSchema

use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.

the class DoubleStorageTest method buildIndex.

private static QueryableIndex buildIndex(String storeDoubleAsFloat) throws IOException {
    String oldValue = System.getProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY);
    System.setProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY, storeDoubleAsFloat);
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-13T00:00:00.000Z").getMillis()).withDimensionsSpec(ROW_PARSER).withMetrics(new DoubleSumAggregatorFactory(DIM_FLOAT_NAME, DIM_FLOAT_NAME)).build();
    final IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(MAX_ROWS).build();
    getStreamOfEvents().forEach(o -> {
        try {
            index.add(ROW_PARSER.parseBatch((Map<String, Object>) o).get(0));
        } catch (IndexSizeExceededException e) {
            throw new RuntimeException(e);
        }
    });
    if (oldValue == null) {
        System.clearProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY);
    } else {
        System.setProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY, oldValue);
    }
    File someTmpFile = File.createTempFile("billy", "yay");
    someTmpFile.delete();
    FileUtils.mkdirp(someTmpFile);
    INDEX_MERGER_V9.persist(index, someTmpFile, new IndexSpec(), null);
    someTmpFile.delete();
    return INDEX_IO.loadIndex(someTmpFile);
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) File(java.io.File) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 17 with IncrementalIndexSchema

use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.

the class GroupByQueryHelper method createIndexAccumulatorPair.

public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config) {
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final Granularity gran = query.getGranularity();
    final long timeStart = query.getIntervals().get(0).getStartMillis();
    final boolean combine = subquery == null;
    long granTimeStart = timeStart;
    if (!(Granularities.ALL.equals(gran))) {
        granTimeStart = gran.bucketStart(timeStart);
    }
    final List<AggregatorFactory> aggs;
    if (combine) {
        aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {

            @Override
            public AggregatorFactory apply(AggregatorFactory input) {
                return input.getCombiningFactory();
            }
        });
    } else {
        aggs = query.getAggregatorSpecs();
    }
    final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {

        @Override
        public String apply(DimensionSpec input) {
            return input.getOutputName();
        }
    });
    final IncrementalIndex index;
    final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
    // All groupBy dimensions are strings, for now.
    final List<DimensionSchema> dimensionSchemas = new ArrayList<>();
    for (DimensionSpec dimension : query.getDimensions()) {
        dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
    }
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas)).withMetrics(aggs.toArray(new AggregatorFactory[0])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
    final AppendableIndexBuilder indexBuilder;
    if (query.getContextValue("useOffheap", false)) {
        throw new UnsupportedOperationException("The 'useOffheap' option is no longer available for groupBy v1. Please move to the newer groupBy engine, " + "which always operates off-heap, by removing any custom 'druid.query.groupBy.defaultStrategy' runtime " + "properties and 'groupByStrategy' query context parameters that you have set.");
    } else {
        indexBuilder = new OnheapIncrementalIndex.Builder();
    }
    index = indexBuilder.setIndexSchema(indexSchema).setDeserializeComplexMetrics(false).setConcurrentEventAdd(true).setSortFacts(sortResults).setMaxRowCount(querySpecificConfig.getMaxResults()).build();
    Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {

        @Override
        public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
            final MapBasedRow mapBasedRow;
            if (in instanceof MapBasedRow) {
                mapBasedRow = (MapBasedRow) in;
            } else if (in instanceof ResultRow) {
                final ResultRow row = (ResultRow) in;
                mapBasedRow = row.toMapBasedRow(combine ? query : subquery);
            } else {
                throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
            }
            try {
                accumulated.add(new MapBasedInputRow(mapBasedRow.getTimestamp(), dimensions, mapBasedRow.getEvent()));
            } catch (IndexSizeExceededException e) {
                throw new ResourceLimitExceededException(e.getMessage());
            }
            return accumulated;
        }
    };
    return new Pair<>(index, accumulator);
}
Also used : Accumulator(org.apache.druid.java.util.common.guava.Accumulator) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) AppendableIndexBuilder(org.apache.druid.segment.incremental.AppendableIndexBuilder) ArrayList(java.util.ArrayList) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) Granularity(org.apache.druid.java.util.common.granularity.Granularity) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ISE(org.apache.druid.java.util.common.ISE) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) Pair(org.apache.druid.java.util.common.Pair) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 18 with IncrementalIndexSchema

use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.

the class SegmentGenerator method generate.

public QueryableIndex generate(final DataSegment dataSegment, final GeneratorSchemaInfo schemaInfo, final Granularity granularity, final int numRows) {
    // In case we need to generate hyperUniques.
    ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
    final String dataHash = Hashing.sha256().newHasher().putString(dataSegment.getId().toString(), StandardCharsets.UTF_8).putString(schemaInfo.toString(), StandardCharsets.UTF_8).putString(granularity.toString(), StandardCharsets.UTF_8).putInt(numRows).hash().toString();
    final File outDir = new File(getSegmentDir(dataSegment.getId(), dataHash), "merged");
    if (outDir.exists()) {
        try {
            log.info("Found segment with hash[%s] cached in directory[%s].", dataHash, outDir);
            return TestHelper.getTestIndexIO().loadIndex(outDir);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
    log.info("Writing segment with hash[%s] to directory[%s].", dataHash, outDir);
    final DataGenerator dataGenerator = new DataGenerator(schemaInfo.getColumnSchemas(), dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */
    schemaInfo.getDataInterval(), numRows);
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(schemaInfo.getDimensionsSpec()).withMetrics(schemaInfo.getAggsArray()).withRollup(schemaInfo.isWithRollup()).withQueryGranularity(granularity).build();
    final List<InputRow> rows = new ArrayList<>();
    final List<QueryableIndex> indexes = new ArrayList<>();
    for (int i = 0; i < numRows; i++) {
        final InputRow row = dataGenerator.nextRow();
        rows.add(row);
        if ((i + 1) % 20000 == 0) {
            log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment);
        }
        if (rows.size() % MAX_ROWS_IN_MEMORY == 0) {
            indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema));
            rows.clear();
        }
    }
    log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment);
    if (rows.size() > 0) {
        indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema));
        rows.clear();
    }
    final QueryableIndex retVal;
    if (indexes.isEmpty()) {
        throw new ISE("No rows to index?");
    } else {
        try {
            final IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
            retVal = TestHelper.getTestIndexIO().loadIndex(TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()).mergeQueryableIndex(indexes, false, schemaInfo.getAggs().stream().map(AggregatorFactory::getCombiningFactory).toArray(AggregatorFactory[]::new), null, outDir, indexSpec, indexSpec, new BaseProgressIndicator(), null, -1));
            for (QueryableIndex index : indexes) {
                index.close();
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
    log.info("Finished writing segment[%s] to[%s]", dataSegment, outDir);
    return retVal;
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ArrayList(java.util.ArrayList) HyperUniquesSerde(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde) IOException(java.io.IOException) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRow(org.apache.druid.data.input.InputRow) ISE(org.apache.druid.java.util.common.ISE) File(java.io.File) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator)

Example 19 with IncrementalIndexSchema

use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.

the class ExpressionSelectorsTest method test_incrementalIndexStringSelector.

@Test
public void test_incrementalIndexStringSelector() throws IndexSizeExceededException {
    // This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
    // DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
    // exception trying to make an optimized string expression selector that was not appropriate to use for the
    // underlying dimension selector.
    // This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the
    // conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details
    IncrementalIndexSchema schema = new IncrementalIndexSchema(0, new TimestampSpec("time", "millis", DateTimes.nowUtc()), Granularities.NONE, VirtualColumns.EMPTY, DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("count") }, true);
    IncrementalIndex index = new OnheapIncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).build();
    index.add(new MapBasedInputRow(DateTimes.nowUtc().getMillis(), ImmutableList.of("x"), ImmutableMap.of("x", "foo")));
    index.add(new MapBasedInputRow(DateTimes.nowUtc().plusMillis(1000).getMillis(), ImmutableList.of("y"), ImmutableMap.of("y", "foo")));
    IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
    Sequence<Cursor> cursors = adapter.makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null);
    int rowsProcessed = cursors.map(cursor -> {
        DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()), null);
        DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()), null);
        int rowCount = 0;
        while (!cursor.isDone()) {
            Object x = xExprSelector.getObject();
            Object y = yExprSelector.getObject();
            List<String> expectedFoo = Collections.singletonList("foofoo");
            List<String> expectedNull = NullHandling.replaceWithDefault() ? Collections.singletonList("foo") : Collections.singletonList(null);
            if (rowCount == 0) {
                Assert.assertEquals(expectedFoo, x);
                Assert.assertEquals(expectedNull, y);
            } else {
                Assert.assertEquals(expectedNull, x);
                Assert.assertEquals(expectedFoo, y);
            }
            rowCount++;
            cursor.advance();
        }
        return rowCount;
    }).accumulate(0, (in, acc) -> in + acc);
    Assert.assertEquals(2, rowsProcessed);
}
Also used : SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) StorageAdapter(org.apache.druid.segment.StorageAdapter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GeneratorBasicSchemas(org.apache.druid.segment.generator.GeneratorBasicSchemas) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) Expr(org.apache.druid.math.expr.Expr) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) AfterClass(org.junit.AfterClass) ImmutableMap(com.google.common.collect.ImmutableMap) Closer(org.apache.druid.java.util.common.io.Closer) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) ExprEval(org.apache.druid.math.expr.ExprEval) BaseSingleValueDimensionSelector(org.apache.druid.segment.BaseSingleValueDimensionSelector) TestObjectColumnSelector(org.apache.druid.segment.TestObjectColumnSelector) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) List(java.util.List) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) BeforeClass(org.junit.BeforeClass) Intervals(org.apache.druid.java.util.common.Intervals) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) Supplier(com.google.common.base.Supplier) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Parser(org.apache.druid.math.expr.Parser) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) DimensionSelector(org.apache.druid.segment.DimensionSelector) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Cursor(org.apache.druid.segment.Cursor) NullHandling(org.apache.druid.common.config.NullHandling) Assert(org.junit.Assert) CloseableUtils(org.apache.druid.utils.CloseableUtils) Collections(java.util.Collections) BaseSingleValueDimensionSelector(org.apache.druid.segment.BaseSingleValueDimensionSelector) DimensionSelector(org.apache.druid.segment.DimensionSelector) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) Cursor(org.apache.druid.segment.Cursor) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

IncrementalIndexSchema (org.apache.druid.segment.incremental.IncrementalIndexSchema)19 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)16 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)14 File (java.io.File)10 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)8 Test (org.junit.Test)8 ArrayList (java.util.ArrayList)7 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)7 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)7 IncrementalIndexTest (org.apache.druid.segment.data.IncrementalIndexTest)6 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)5 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)5 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)5 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)4 IndexSizeExceededException (org.apache.druid.segment.incremental.IndexSizeExceededException)4 IOException (java.io.IOException)3 List (java.util.List)3 InputRow (org.apache.druid.data.input.InputRow)3 ISE (org.apache.druid.java.util.common.ISE)3 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)3