Search in sources :

Example 36 with StorageAdapter

use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.

the class BatchDeltaIngestionTest method testIngestion.

private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment, List<String> expectedDimensions, List<String> expectedMetrics) throws Exception {
    IndexGeneratorJob job = new IndexGeneratorJob(config);
    Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job)));
    List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config);
    JobHelper.renameIndexFilesForSegments(config.getSchema(), dataSegmentAndIndexZipFilePaths);
    JobHelper.maybeDeleteIntermediatePath(true, config.getSchema());
    File workingPath = new File(config.makeIntermediatePath().toUri().getPath());
    Assert.assertFalse(workingPath.exists());
    File segmentFolder = new File(StringUtils.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
    Assert.assertTrue(segmentFolder.exists());
    File indexZip = new File(segmentFolder, "index.zip");
    Assert.assertTrue(indexZip.exists());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(indexZip, tmpUnzippedSegmentDir);
    QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), TransformSpec.NONE, expectedDimensions, expectedMetrics, null);
    List<InputRow> rows = new ArrayList<>();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRowsGenerated, rows, expectedDimensions, expectedMetrics);
}
Also used : IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(org.apache.druid.data.input.Firehose) ArrayList(java.util.ArrayList) StorageAdapter(org.apache.druid.segment.StorageAdapter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRow(org.apache.druid.data.input.InputRow) File(java.io.File) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter)

Example 37 with StorageAdapter

use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.

the class BaseFilterTest method makeConstructors.

public static Collection<Object[]> makeConstructors() {
    final List<Object[]> constructors = new ArrayList<>();
    final Map<String, BitmapSerdeFactory> bitmapSerdeFactories = ImmutableMap.of("concise", new ConciseBitmapSerdeFactory(), "roaring", new RoaringBitmapSerdeFactory(true));
    final Map<String, SegmentWriteOutMediumFactory> segmentWriteOutMediumFactories = ImmutableMap.of("tmpFile segment write-out medium", TmpFileSegmentWriteOutMediumFactory.instance(), "off-heap memory segment write-out medium", OffHeapMemorySegmentWriteOutMediumFactory.instance());
    final Map<String, Function<IndexBuilder, Pair<StorageAdapter, Closeable>>> finishers = ImmutableMap.<String, Function<IndexBuilder, Pair<StorageAdapter, Closeable>>>builder().put("incremental", input -> {
        final IncrementalIndex index = input.buildIncrementalIndex();
        return Pair.of(new IncrementalIndexStorageAdapter(index), index);
    }).put("mmapped", input -> {
        final QueryableIndex index = input.buildMMappedIndex();
        return Pair.of(new QueryableIndexStorageAdapter(index), index);
    }).put("mmappedMerged", input -> {
        final QueryableIndex index = input.buildMMappedMergedIndex();
        return Pair.of(new QueryableIndexStorageAdapter(index), index);
    }).put("rowBasedWithoutTypeSignature", input -> Pair.of(input.buildRowBasedSegmentWithoutTypeSignature().asStorageAdapter(), () -> {
    })).put("rowBasedWithTypeSignature", input -> Pair.of(input.buildRowBasedSegmentWithTypeSignature().asStorageAdapter(), () -> {
    })).build();
    for (Map.Entry<String, BitmapSerdeFactory> bitmapSerdeFactoryEntry : bitmapSerdeFactories.entrySet()) {
        for (Map.Entry<String, SegmentWriteOutMediumFactory> segmentWriteOutMediumFactoryEntry : segmentWriteOutMediumFactories.entrySet()) {
            for (Map.Entry<String, Function<IndexBuilder, Pair<StorageAdapter, Closeable>>> finisherEntry : finishers.entrySet()) {
                for (boolean cnf : ImmutableList.of(false, true)) {
                    for (boolean optimize : ImmutableList.of(false, true)) {
                        final String testName = StringUtils.format("bitmaps[%s], indexMerger[%s], finisher[%s], cnf[%s], optimize[%s]", bitmapSerdeFactoryEntry.getKey(), segmentWriteOutMediumFactoryEntry.getKey(), finisherEntry.getKey(), cnf, optimize);
                        final IndexBuilder indexBuilder = IndexBuilder.create().schema(DEFAULT_INDEX_SCHEMA).indexSpec(new IndexSpec(bitmapSerdeFactoryEntry.getValue(), null, null, null)).segmentWriteOutMediumFactory(segmentWriteOutMediumFactoryEntry.getValue());
                        constructors.add(new Object[] { testName, indexBuilder, finisherEntry.getValue(), cnf, optimize });
                    }
                }
            }
        }
    }
    return constructors;
}
Also used : Arrays(java.util.Arrays) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) RowAdapters(org.apache.druid.segment.RowAdapters) IndexSpec(org.apache.druid.segment.IndexSpec) ExprType(org.apache.druid.math.expr.ExprType) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) IndexedInts(org.apache.druid.segment.data.IndexedInts) StorageAdapter(org.apache.druid.segment.StorageAdapter) TmpFileSegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory) ByteBuffer(java.nio.ByteBuffer) Pair(org.apache.druid.java.util.common.Pair) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) ExpressionType(org.apache.druid.math.expr.ExpressionType) Expr(org.apache.druid.math.expr.Expr) Map(java.util.Map) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) OffHeapMemorySegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory) Parameterized(org.junit.runners.Parameterized) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) Function(com.google.common.base.Function) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) IndexBuilder(org.apache.druid.segment.IndexBuilder) VectorObjectSelector(org.apache.druid.segment.vector.VectorObjectSelector) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) VectorValueSelector(org.apache.druid.segment.vector.VectorValueSelector) VectorColumnSelectorFactory(org.apache.druid.segment.vector.VectorColumnSelectorFactory) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) DimFilter(org.apache.druid.query.filter.DimFilter) BitmapIndexSelector(org.apache.druid.query.filter.BitmapIndexSelector) Iterables(com.google.common.collect.Iterables) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) Intervals(org.apache.druid.java.util.common.Intervals) FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) Parser(org.apache.druid.math.expr.Parser) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) HashMap(java.util.HashMap) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) ArrayList(java.util.ArrayList) VectorCursor(org.apache.druid.segment.vector.VectorCursor) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) VectorValueMatcher(org.apache.druid.query.filter.vector.VectorValueMatcher) RowBasedStorageAdapter(org.apache.druid.segment.RowBasedStorageAdapter) DimensionSelector(org.apache.druid.segment.DimensionSelector) VectorAggregator(org.apache.druid.query.aggregation.VectorAggregator) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) Sequences(org.apache.druid.java.util.common.guava.Sequences) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) SingleValueDimensionVectorSelector(org.apache.druid.segment.vector.SingleValueDimensionVectorSelector) Nullable(javax.annotation.Nullable) Before(org.junit.Before) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) BitmapResultFactory(org.apache.druid.query.BitmapResultFactory) ColumnInspector(org.apache.druid.segment.ColumnInspector) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Aggregator(org.apache.druid.query.aggregation.Aggregator) Maps(com.google.common.collect.Maps) ColumnSelector(org.apache.druid.segment.ColumnSelector) Granularities(org.apache.druid.java.util.common.granularity.Granularities) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory) Rule(org.junit.Rule) Cursor(org.apache.druid.segment.Cursor) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) Closeable(java.io.Closeable) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Filter(org.apache.druid.query.filter.Filter) IndexSpec(org.apache.druid.segment.IndexSpec) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) StorageAdapter(org.apache.druid.segment.StorageAdapter) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) RowBasedStorageAdapter(org.apache.druid.segment.RowBasedStorageAdapter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) TmpFileSegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory) OffHeapMemorySegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) Function(com.google.common.base.Function) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) IndexBuilder(org.apache.druid.segment.IndexBuilder) QueryableIndex(org.apache.druid.segment.QueryableIndex) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory)

Example 38 with StorageAdapter

use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.

the class IngestSegmentFirehoseTest method testReadFromIndexAndWriteAnotherIndex.

@Test
public void testReadFromIndexAndWriteAnotherIndex() throws Exception {
    // Tests a "reindexing" use case that is a common use of ingestSegment.
    File segmentDir = tempFolder.newFolder();
    createTestIndex(segmentDir);
    try (final QueryableIndex qi = indexIO.loadIndex(segmentDir);
        final IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withDimensionsSpec(DIMENSIONS_SPEC_REINDEX).withMetrics(AGGREGATORS_REINDEX.toArray(new AggregatorFactory[0])).build()).setMaxRowCount(5000).build()) {
        final StorageAdapter sa = new QueryableIndexStorageAdapter(qi);
        final WindowedStorageAdapter wsa = new WindowedStorageAdapter(sa, sa.getInterval());
        final IngestSegmentFirehose firehose = new IngestSegmentFirehose(ImmutableList.of(wsa, wsa), TransformSpec.NONE, ImmutableList.of("host", "spatial"), ImmutableList.of("visited_sum", "unique_hosts"), null);
        int count = 0;
        while (firehose.hasMore()) {
            final InputRow row = firehose.nextRow();
            Assert.assertNotNull(row);
            if (count == 0) {
                Assert.assertEquals(DateTimes.of("2014-10-22T00Z"), row.getTimestamp());
                Assert.assertEquals("host1", row.getRaw("host"));
                Assert.assertEquals("0,1", row.getRaw("spatial"));
                Assert.assertEquals(10L, row.getRaw("visited_sum"));
                Assert.assertEquals(1.0d, ((HyperLogLogCollector) row.getRaw("unique_hosts")).estimateCardinality(), 0.1);
            }
            count++;
            index.add(row);
        }
        Assert.assertEquals(18, count);
        // Check the index
        Assert.assertEquals(9, index.size());
        final IncrementalIndexStorageAdapter queryable = new IncrementalIndexStorageAdapter(index);
        Assert.assertEquals(2, queryable.getAvailableDimensions().size());
        Assert.assertEquals("host", queryable.getAvailableDimensions().get(0));
        Assert.assertEquals("spatial", queryable.getAvailableDimensions().get(1));
        Assert.assertEquals(ImmutableList.of("visited_sum", "unique_hosts"), queryable.getAvailableMetrics());
        // Do a spatial filter
        final IngestSegmentFirehose firehose2 = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(queryable, Intervals.of("2000/3000"))), TransformSpec.NONE, ImmutableList.of("host", "spatial"), ImmutableList.of("visited_sum", "unique_hosts"), new SpatialDimFilter("spatial", new RadiusBound(new float[] { 1, 0 }, 0.1f)));
        final InputRow row = firehose2.nextRow();
        Assert.assertFalse(firehose2.hasMore());
        Assert.assertEquals(DateTimes.of("2014-10-22T00Z"), row.getTimestamp());
        Assert.assertEquals("host2", row.getRaw("host"));
        Assert.assertEquals("1,0", row.getRaw("spatial"));
        Assert.assertEquals(40L, row.getRaw("visited_sum"));
        Assert.assertEquals(1.0d, ((HyperLogLogCollector) row.getRaw("unique_hosts")).estimateCardinality(), 0.1);
    }
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) StorageAdapter(org.apache.druid.segment.StorageAdapter) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SpatialDimFilter(org.apache.druid.query.filter.SpatialDimFilter) RadiusBound(org.apache.druid.collections.spatial.search.RadiusBound) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRow(org.apache.druid.data.input.InputRow) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) File(java.io.File) Test(org.junit.Test)

Example 39 with StorageAdapter

use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.

the class FireHydrantTest method testGetSegmentForQueryButNotAbleToAcquireReferences.

@Test
public void testGetSegmentForQueryButNotAbleToAcquireReferences() {
    ReferenceCountingSegment incrementalSegmentReference = hydrant.getHydrantSegment();
    Assert.assertEquals(0, incrementalSegmentReference.getNumReferences());
    Optional<Pair<SegmentReference, Closeable>> maybeSegmentAndCloseable = hydrant.getSegmentForQuery(segmentReference -> new SegmentReference() {

        @Override
        public Optional<Closeable> acquireReferences() {
            return Optional.empty();
        }

        @Override
        public SegmentId getId() {
            return incrementalIndexSegment.getId();
        }

        @Override
        public Interval getDataInterval() {
            return incrementalIndexSegment.getDataInterval();
        }

        @Nullable
        @Override
        public QueryableIndex asQueryableIndex() {
            return incrementalIndexSegment.asQueryableIndex();
        }

        @Override
        public StorageAdapter asStorageAdapter() {
            return incrementalIndexSegment.asStorageAdapter();
        }

        @Override
        public void close() {
            incrementalIndexSegment.close();
        }
    });
    Assert.assertFalse(maybeSegmentAndCloseable.isPresent());
    Assert.assertEquals(0, incrementalSegmentReference.getNumReferences());
}
Also used : ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) Optional(java.util.Optional) SegmentId(org.apache.druid.timeline.SegmentId) QueryableIndex(org.apache.druid.segment.QueryableIndex) SegmentReference(org.apache.druid.segment.SegmentReference) StorageAdapter(org.apache.druid.segment.StorageAdapter) Nullable(javax.annotation.Nullable) Pair(org.apache.druid.java.util.common.Pair) Interval(org.joda.time.Interval) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

StorageAdapter (org.apache.druid.segment.StorageAdapter)39 Cursor (org.apache.druid.segment.Cursor)22 QueryableIndexStorageAdapter (org.apache.druid.segment.QueryableIndexStorageAdapter)22 Test (org.junit.Test)16 Benchmark (org.openjdk.jmh.annotations.Benchmark)14 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)14 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)14 Filter (org.apache.druid.query.filter.Filter)13 DimensionSelector (org.apache.druid.segment.DimensionSelector)11 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)11 DimFilter (org.apache.druid.query.filter.DimFilter)10 SelectorFilter (org.apache.druid.segment.filter.SelectorFilter)10 Interval (org.joda.time.Interval)10 List (java.util.List)9 AndDimFilter (org.apache.druid.query.filter.AndDimFilter)9 BoundDimFilter (org.apache.druid.query.filter.BoundDimFilter)9 OrDimFilter (org.apache.druid.query.filter.OrDimFilter)9 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)9 ColumnSelectorFactory (org.apache.druid.segment.ColumnSelectorFactory)8 Filters (org.apache.druid.segment.filter.Filters)8