Search in sources :

Example 96 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class IndexMergerTestBase method testAddMetrics.

@Test
public void testAddMetrics() throws IOException {
    IncrementalIndex index1 = IncrementalIndexTest.createIndex(new AggregatorFactory[] { new LongSumAggregatorFactory("A", "A") });
    closer.closeLater(index1);
    long timestamp = System.currentTimeMillis();
    index1.add(new MapBasedInputRow(timestamp, Arrays.asList("dim1", "dim2"), ImmutableMap.of("dim1", "1", "dim2", "2", "A", 5)));
    IncrementalIndex index2 = IncrementalIndexTest.createIndex(new AggregatorFactory[] { new LongSumAggregatorFactory("A", "A"), new LongSumAggregatorFactory("C", "C") });
    index2.add(new MapBasedInputRow(timestamp, Arrays.asList("dim1", "dim2"), ImmutableMap.of("dim1", "1", "dim2", "2", "A", 5, "C", 6)));
    closer.closeLater(index2);
    Interval interval = new Interval(DateTimes.EPOCH, DateTimes.nowUtc());
    RoaringBitmapFactory factory = new RoaringBitmapFactory();
    List<IndexableAdapter> toMerge = Arrays.asList(new IncrementalIndexAdapter(interval, index1, factory), new IncrementalIndexAdapter(interval, index2, factory));
    final File tmpDirMerged = temporaryFolder.newFolder();
    File merged = indexMerger.merge(toMerge, true, new AggregatorFactory[] { new LongSumAggregatorFactory("A", "A"), new LongSumAggregatorFactory("C", "C") }, tmpDirMerged, indexSpec, -1);
    final QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(closer.closeLater(indexIO.loadIndex(merged)));
    Assert.assertEquals(ImmutableSet.of("A", "C"), ImmutableSet.copyOf(adapter.getAvailableMetrics()));
}
Also used : IncrementalIndexAdapter(org.apache.druid.segment.incremental.IncrementalIndexAdapter) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) File(java.io.File) Interval(org.joda.time.Interval) IncrementalIndexTest(org.apache.druid.segment.data.IncrementalIndexTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 97 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class QueryableIndexIndexableAdapterTest method testGetBitmapIndex.

@Test
public void testGetBitmapIndex() throws Exception {
    final long timestamp = System.currentTimeMillis();
    IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
    IncrementalIndexTest.populateIndex(timestamp, toPersist);
    final File tempDir = temporaryFolder.newFolder();
    QueryableIndex index = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, INDEX_SPEC, null)));
    IndexableAdapter adapter = new QueryableIndexIndexableAdapter(index);
    String dimension = "dim1";
    // null is added to all dimensions with value
    @SuppressWarnings("UnusedAssignment") BitmapValues bitmapValues = adapter.getBitmapValues(dimension, 0);
    try (CloseableIndexed<String> dimValueLookup = adapter.getDimValueLookup(dimension)) {
        for (int i = 0; i < dimValueLookup.size(); i++) {
            bitmapValues = adapter.getBitmapValues(dimension, i);
            Assert.assertEquals(1, bitmapValues.size());
        }
    }
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) BitmapValues(org.apache.druid.segment.data.BitmapValues) File(java.io.File) Test(org.junit.Test) IncrementalIndexTest(org.apache.druid.segment.data.IncrementalIndexTest)

Example 98 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class ExpressionSelectorsTest method test_incrementalIndexStringSelector.

@Test
public void test_incrementalIndexStringSelector() throws IndexSizeExceededException {
    // This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
    // DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
    // exception trying to make an optimized string expression selector that was not appropriate to use for the
    // underlying dimension selector.
    // This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the
    // conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details
    IncrementalIndexSchema schema = new IncrementalIndexSchema(0, new TimestampSpec("time", "millis", DateTimes.nowUtc()), Granularities.NONE, VirtualColumns.EMPTY, DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("count") }, true);
    IncrementalIndex index = new OnheapIncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).build();
    index.add(new MapBasedInputRow(DateTimes.nowUtc().getMillis(), ImmutableList.of("x"), ImmutableMap.of("x", "foo")));
    index.add(new MapBasedInputRow(DateTimes.nowUtc().plusMillis(1000).getMillis(), ImmutableList.of("y"), ImmutableMap.of("y", "foo")));
    IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
    Sequence<Cursor> cursors = adapter.makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null);
    int rowsProcessed = cursors.map(cursor -> {
        DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()), null);
        DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()), null);
        int rowCount = 0;
        while (!cursor.isDone()) {
            Object x = xExprSelector.getObject();
            Object y = yExprSelector.getObject();
            List<String> expectedFoo = Collections.singletonList("foofoo");
            List<String> expectedNull = NullHandling.replaceWithDefault() ? Collections.singletonList("foo") : Collections.singletonList(null);
            if (rowCount == 0) {
                Assert.assertEquals(expectedFoo, x);
                Assert.assertEquals(expectedNull, y);
            } else {
                Assert.assertEquals(expectedNull, x);
                Assert.assertEquals(expectedFoo, y);
            }
            rowCount++;
            cursor.advance();
        }
        return rowCount;
    }).accumulate(0, (in, acc) -> in + acc);
    Assert.assertEquals(2, rowsProcessed);
}
Also used : SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) StorageAdapter(org.apache.druid.segment.StorageAdapter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GeneratorBasicSchemas(org.apache.druid.segment.generator.GeneratorBasicSchemas) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) Expr(org.apache.druid.math.expr.Expr) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) AfterClass(org.junit.AfterClass) ImmutableMap(com.google.common.collect.ImmutableMap) Closer(org.apache.druid.java.util.common.io.Closer) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) ExprEval(org.apache.druid.math.expr.ExprEval) BaseSingleValueDimensionSelector(org.apache.druid.segment.BaseSingleValueDimensionSelector) TestObjectColumnSelector(org.apache.druid.segment.TestObjectColumnSelector) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) List(java.util.List) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) BeforeClass(org.junit.BeforeClass) Intervals(org.apache.druid.java.util.common.Intervals) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) Supplier(com.google.common.base.Supplier) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Parser(org.apache.druid.math.expr.Parser) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) DimensionSelector(org.apache.druid.segment.DimensionSelector) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Cursor(org.apache.druid.segment.Cursor) NullHandling(org.apache.druid.common.config.NullHandling) Assert(org.junit.Assert) CloseableUtils(org.apache.druid.utils.CloseableUtils) Collections(java.util.Collections) BaseSingleValueDimensionSelector(org.apache.druid.segment.BaseSingleValueDimensionSelector) DimensionSelector(org.apache.druid.segment.DimensionSelector) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) Cursor(org.apache.druid.segment.Cursor) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 99 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class InputSourceSampler method sample.

public SamplerResponse sample(final InputSource inputSource, // inputFormat can be null only if inputSource.needsFormat() = false or parser is specified.
@Nullable final InputFormat inputFormat, @Nullable final DataSchema dataSchema, @Nullable final SamplerConfig samplerConfig) {
    Preconditions.checkNotNull(inputSource, "inputSource required");
    if (inputSource.needsFormat()) {
        Preconditions.checkNotNull(inputFormat, "inputFormat required");
    }
    final DataSchema nonNullDataSchema = dataSchema == null ? DEFAULT_DATA_SCHEMA : dataSchema;
    final SamplerConfig nonNullSamplerConfig = samplerConfig == null ? SamplerConfig.empty() : samplerConfig;
    final Closer closer = Closer.create();
    final File tempDir = FileUtils.createTempDir();
    closer.register(() -> FileUtils.deleteDirectory(tempDir));
    try {
        final InputSourceReader reader = buildReader(nonNullSamplerConfig, nonNullDataSchema, inputSource, inputFormat, tempDir);
        try (final CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample();
            final IncrementalIndex index = buildIncrementalIndex(nonNullSamplerConfig, nonNullDataSchema);
            final Closer closer1 = closer) {
            List<SamplerResponseRow> responseRows = new ArrayList<>(nonNullSamplerConfig.getNumRows());
            int numRowsIndexed = 0;
            while (responseRows.size() < nonNullSamplerConfig.getNumRows() && iterator.hasNext()) {
                final InputRowListPlusRawValues inputRowListPlusRawValues = iterator.next();
                final List<Map<String, Object>> rawColumnsList = inputRowListPlusRawValues.getRawValuesList();
                final ParseException parseException = inputRowListPlusRawValues.getParseException();
                if (parseException != null) {
                    if (rawColumnsList != null) {
                        // add all rows to response
                        responseRows.addAll(rawColumnsList.stream().map(rawColumns -> new SamplerResponseRow(rawColumns, null, true, parseException.getMessage())).collect(Collectors.toList()));
                    } else {
                        // no data parsed, add one response row
                        responseRows.add(new SamplerResponseRow(null, null, true, parseException.getMessage()));
                    }
                    continue;
                }
                List<InputRow> inputRows = inputRowListPlusRawValues.getInputRows();
                if (inputRows == null) {
                    continue;
                }
                for (int i = 0; i < inputRows.size(); i++) {
                    // InputRowListPlusRawValues guarantees the size of rawColumnsList and inputRows are the same
                    Map<String, Object> rawColumns = rawColumnsList == null ? null : rawColumnsList.get(i);
                    InputRow row = inputRows.get(i);
                    // keep the index of the row to be added to responseRows for further use
                    final int rowIndex = responseRows.size();
                    IncrementalIndexAddResult addResult = index.add(new SamplerInputRow(row, rowIndex), true);
                    if (addResult.hasParseException()) {
                        responseRows.add(new SamplerResponseRow(rawColumns, null, true, addResult.getParseException().getMessage()));
                    } else {
                        // store the raw value; will be merged with the data from the IncrementalIndex later
                        responseRows.add(new SamplerResponseRow(rawColumns, null, null, null));
                        numRowsIndexed++;
                    }
                }
            }
            final List<String> columnNames = index.getColumnNames();
            columnNames.remove(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
            for (Row row : index) {
                Map<String, Object> parsed = new LinkedHashMap<>();
                parsed.put(ColumnHolder.TIME_COLUMN_NAME, row.getTimestampFromEpoch());
                columnNames.forEach(k -> parsed.put(k, row.getRaw(k)));
                Number sortKey = row.getMetric(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
                if (sortKey != null) {
                    responseRows.set(sortKey.intValue(), responseRows.get(sortKey.intValue()).withParsed(parsed));
                }
            }
            // make sure size of responseRows meets the input
            if (responseRows.size() > nonNullSamplerConfig.getNumRows()) {
                responseRows = responseRows.subList(0, nonNullSamplerConfig.getNumRows());
            }
            int numRowsRead = responseRows.size();
            return new SamplerResponse(numRowsRead, numRowsIndexed, responseRows.stream().filter(Objects::nonNull).filter(x -> x.getParsed() != null || x.isUnparseable() != null).collect(Collectors.toList()));
        }
    } catch (Exception e) {
        throw new SamplerException(e, "Failed to sample data: %s", e.getMessage());
    }
}
Also used : ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Closer(org.apache.druid.java.util.common.io.Closer) InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) ParseException(org.apache.druid.java.util.common.parsers.ParseException) DataSchema(org.apache.druid.segment.indexing.DataSchema) InputSourceReader(org.apache.druid.data.input.InputSourceReader) TimedShutoffInputSourceReader(org.apache.druid.data.input.impl.TimedShutoffInputSourceReader) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) InputRow(org.apache.druid.data.input.InputRow) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) ParseException(org.apache.druid.java.util.common.parsers.ParseException) Row(org.apache.druid.data.input.Row) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InputRow(org.apache.druid.data.input.InputRow) File(java.io.File) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 100 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class IncrementalIndexTest method testConcurrentAdd.

@Test
public void testConcurrentAdd() throws Exception {
    final IncrementalIndex index = indexCreator.createIndex((Object) DEFAULT_AGGREGATOR_FACTORIES);
    final int threadCount = 10;
    final int elementsPerThread = 200;
    final int dimensionCount = 5;
    ExecutorService executor = Executors.newFixedThreadPool(threadCount);
    final long timestamp = System.currentTimeMillis();
    final CountDownLatch latch = new CountDownLatch(threadCount);
    for (int j = 0; j < threadCount; j++) {
        executor.submit(new Runnable() {

            @Override
            public void run() {
                try {
                    for (int i = 0; i < elementsPerThread; i++) {
                        index.add(getRow(timestamp + i, i, dimensionCount));
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
                latch.countDown();
            }
        });
    }
    Assert.assertTrue(latch.await(60, TimeUnit.SECONDS));
    boolean isRollup = index.isRollup();
    Assert.assertEquals(dimensionCount, index.getDimensionNames().size());
    Assert.assertEquals(elementsPerThread * (isRollup ? 1 : threadCount), index.size());
    Iterator<Row> iterator = index.iterator();
    int curr = 0;
    while (iterator.hasNext()) {
        Row row = iterator.next();
        Assert.assertEquals(timestamp + (isRollup ? curr : curr / threadCount), row.getTimestampFromEpoch());
        Assert.assertEquals(isRollup ? threadCount : 1, row.getMetric("count").intValue());
        curr++;
    }
    Assert.assertEquals(elementsPerThread * (isRollup ? 1 : threadCount), curr);
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) ExecutorService(java.util.concurrent.ExecutorService) Row(org.apache.druid.data.input.Row) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) CountDownLatch(java.util.concurrent.CountDownLatch) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) ExecutionException(java.util.concurrent.ExecutionException) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)109 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)85 File (java.io.File)59 Test (org.junit.Test)51 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)46 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)46 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)26 IncrementalIndexTest (org.apache.druid.segment.data.IncrementalIndexTest)26 ArrayList (java.util.ArrayList)25 IncrementalIndexSchema (org.apache.druid.segment.incremental.IncrementalIndexSchema)25 IndexSpec (org.apache.druid.segment.IndexSpec)19 QueryableIndex (org.apache.druid.segment.QueryableIndex)19 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)17 InputRow (org.apache.druid.data.input.InputRow)15 IncrementalIndexSegment (org.apache.druid.segment.IncrementalIndexSegment)14 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)11 IOException (java.io.IOException)10 Before (org.junit.Before)10 Interval (org.joda.time.Interval)9