Search in sources :

Example 6 with QueryableIndexStorageAdapter

use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class FilterPartitionBenchmark method readWithExFnPostFilter.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void readWithExFnPostFilter(Blackhole blackhole) throws Exception {
    Filter filter = new NoBitmapSelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter();
    StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
    Sequence<Cursor> cursors = makeCursors(sa, filter);
    Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
    List<String> strings = Sequences.toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
    for (String st : strings) {
        blackhole.consume(st);
    }
}
Also used : AndFilter(io.druid.segment.filter.AndFilter) BoundFilter(io.druid.segment.filter.BoundFilter) DimensionPredicateFilter(io.druid.segment.filter.DimensionPredicateFilter) BoundDimFilter(io.druid.query.filter.BoundDimFilter) SelectorFilter(io.druid.segment.filter.SelectorFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) DimFilter(io.druid.query.filter.DimFilter) Filter(io.druid.query.filter.Filter) OrFilter(io.druid.segment.filter.OrFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) StorageAdapter(io.druid.segment.StorageAdapter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) List(java.util.List) ArrayList(java.util.ArrayList) Cursor(io.druid.segment.Cursor) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 7 with QueryableIndexStorageAdapter

use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class IngestSegmentFirehoseFactory method connect.

@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
    log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
    if (taskToolbox == null) {
        // Noop Task is just used to create the toolbox and list segments.
        taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
    }
    try {
        final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
        final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
        VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
        for (DataSegment segment : usedSegments) {
            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
        }
        final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
        final List<String> dims;
        if (dimensions != null) {
            dims = dimensions;
        } else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
            dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
        } else {
            Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
                    return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getDimensions();
                        }
                    }));
                }
            })));
            dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
        }
        final List<String> metricsList;
        if (metrics != null) {
            metricsList = metrics;
        } else {
            Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
                    return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getMetrics();
                        }
                    }));
                }
            })));
            metricsList = Lists.newArrayList(metricsSet);
        }
        final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {

            @Override
            public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
                return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {

                    @Override
                    public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
                        final DataSegment segment = input.getObject();
                        try {
                            return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
                        } catch (IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                });
            }
        })));
        return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
    } catch (IOException e) {
        throw Throwables.propagate(e);
    } catch (SegmentLoadingException e) {
        throw Throwables.propagate(e);
    }
}
Also used : IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) Set(java.util.Set) NoopTask(io.druid.indexing.common.task.NoopTask) DataSegment(io.druid.timeline.DataSegment) Function(com.google.common.base.Function) PartitionChunk(io.druid.timeline.partition.PartitionChunk) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) IOException(java.io.IOException) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) SegmentListUsedAction(io.druid.indexing.common.actions.SegmentListUsedAction) File(java.io.File)

Example 8 with QueryableIndexStorageAdapter

use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class DatasourceRecordReader method initialize.

@Override
public void initialize(InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException {
    spec = readAndVerifyDatasourceIngestionSpec(context.getConfiguration(), HadoopDruidIndexerConfig.JSON_MAPPER);
    List<WindowedDataSegment> segments = ((DatasourceInputSplit) split).getSegments();
    List<WindowedStorageAdapter> adapters = Lists.transform(segments, new Function<WindowedDataSegment, WindowedStorageAdapter>() {

        @Override
        public WindowedStorageAdapter apply(WindowedDataSegment segment) {
            try {
                logger.info("Getting storage path for segment [%s]", segment.getSegment().getIdentifier());
                Path path = new Path(JobHelper.getURIFromSegment(segment.getSegment()));
                logger.info("Fetch segment files from [%s]", path);
                File dir = Files.createTempDir();
                tmpSegmentDirs.add(dir);
                logger.info("Locally storing fetched segment at [%s]", dir);
                JobHelper.unzipNoGuava(path, context.getConfiguration(), dir, context);
                logger.info("finished fetching segment files");
                QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(dir);
                indexes.add(index);
                numRows += index.getNumRows();
                return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(index), segment.getInterval());
            } catch (IOException ex) {
                throw Throwables.propagate(ex);
            }
        }
    });
    firehose = new IngestSegmentFirehose(adapters, spec.getDimensions(), spec.getMetrics(), spec.getFilter(), spec.getGranularity());
}
Also used : Path(org.apache.hadoop.fs.Path) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) IOException(java.io.IOException) QueryableIndex(io.druid.segment.QueryableIndex) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) File(java.io.File)

Example 9 with QueryableIndexStorageAdapter

use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class BatchDeltaIngestionTest method testIngestion.

private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment) throws Exception {
    IndexGeneratorJob job = new IndexGeneratorJob(config);
    JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
    File segmentFolder = new File(String.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
    Assert.assertTrue(segmentFolder.exists());
    File descriptor = new File(segmentFolder, "descriptor.json");
    File indexZip = new File(segmentFolder, "index.zip");
    Assert.assertTrue(descriptor.exists());
    Assert.assertTrue(indexZip.exists());
    DataSegment dataSegment = MAPPER.readValue(descriptor, DataSegment.class);
    Assert.assertEquals("website", dataSegment.getDataSource());
    Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
    Assert.assertEquals(INTERVAL_FULL, dataSegment.getInterval());
    Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
    Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
    Assert.assertEquals("host", dataSegment.getDimensions().get(0));
    Assert.assertEquals("visited_sum", dataSegment.getMetrics().get(0));
    Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
    Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
    HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
    Assert.assertEquals(0, spec.getPartitionNum());
    Assert.assertEquals(1, spec.getPartitions());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegment, tmpUnzippedSegmentDir);
    QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, Granularities.NONE);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRowsGenerated, rows);
}
Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(io.druid.data.input.Firehose) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) StorageAdapter(io.druid.segment.StorageAdapter) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) DataSegment(io.druid.timeline.DataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) LocalDataSegmentPuller(io.druid.segment.loading.LocalDataSegmentPuller) QueryableIndex(io.druid.segment.QueryableIndex) InputRow(io.druid.data.input.InputRow) File(java.io.File) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter)

Example 10 with QueryableIndexStorageAdapter

use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class BaseFilterTest method makeConstructors.

public static Collection<Object[]> makeConstructors() {
    final List<Object[]> constructors = Lists.newArrayList();
    final Map<String, BitmapSerdeFactory> bitmapSerdeFactories = ImmutableMap.<String, BitmapSerdeFactory>of("concise", new ConciseBitmapSerdeFactory(), "roaring", new RoaringBitmapSerdeFactory(true));
    final Map<String, IndexMerger> indexMergers = ImmutableMap.<String, IndexMerger>of("IndexMerger", TestHelper.getTestIndexMerger(), "IndexMergerV9", TestHelper.getTestIndexMergerV9());
    final Map<String, Function<IndexBuilder, Pair<StorageAdapter, Closeable>>> finishers = ImmutableMap.of("incremental", new Function<IndexBuilder, Pair<StorageAdapter, Closeable>>() {

        @Override
        public Pair<StorageAdapter, Closeable> apply(IndexBuilder input) {
            final IncrementalIndex index = input.buildIncrementalIndex();
            return Pair.<StorageAdapter, Closeable>of(new IncrementalIndexStorageAdapter(index), new Closeable() {

                @Override
                public void close() throws IOException {
                    index.close();
                }
            });
        }
    }, "mmapped", new Function<IndexBuilder, Pair<StorageAdapter, Closeable>>() {

        @Override
        public Pair<StorageAdapter, Closeable> apply(IndexBuilder input) {
            final QueryableIndex index = input.buildMMappedIndex();
            return Pair.<StorageAdapter, Closeable>of(new QueryableIndexStorageAdapter(index), new Closeable() {

                @Override
                public void close() throws IOException {
                    index.close();
                }
            });
        }
    }, "mmappedMerged", new Function<IndexBuilder, Pair<StorageAdapter, Closeable>>() {

        @Override
        public Pair<StorageAdapter, Closeable> apply(IndexBuilder input) {
            final QueryableIndex index = input.buildMMappedMergedIndex();
            return Pair.<StorageAdapter, Closeable>of(new QueryableIndexStorageAdapter(index), new Closeable() {

                @Override
                public void close() throws IOException {
                    index.close();
                }
            });
        }
    });
    for (Map.Entry<String, BitmapSerdeFactory> bitmapSerdeFactoryEntry : bitmapSerdeFactories.entrySet()) {
        for (Map.Entry<String, IndexMerger> indexMergerEntry : indexMergers.entrySet()) {
            for (Map.Entry<String, Function<IndexBuilder, Pair<StorageAdapter, Closeable>>> finisherEntry : finishers.entrySet()) {
                for (boolean cnf : ImmutableList.of(false, true)) {
                    for (boolean optimize : ImmutableList.of(false, true)) {
                        final String testName = String.format("bitmaps[%s], indexMerger[%s], finisher[%s], optimize[%s]", bitmapSerdeFactoryEntry.getKey(), indexMergerEntry.getKey(), finisherEntry.getKey(), optimize);
                        final IndexBuilder indexBuilder = IndexBuilder.create().indexSpec(new IndexSpec(bitmapSerdeFactoryEntry.getValue(), null, null, null)).indexMerger(indexMergerEntry.getValue());
                        constructors.add(new Object[] { testName, indexBuilder, finisherEntry.getValue(), cnf, optimize });
                    }
                }
            }
        }
    }
    return constructors;
}
Also used : IndexSpec(io.druid.segment.IndexSpec) Closeable(java.io.Closeable) IncrementalIndexStorageAdapter(io.druid.segment.incremental.IncrementalIndexStorageAdapter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) StorageAdapter(io.druid.segment.StorageAdapter) Function(com.google.common.base.Function) RoaringBitmapSerdeFactory(io.druid.segment.data.RoaringBitmapSerdeFactory) ConciseBitmapSerdeFactory(io.druid.segment.data.ConciseBitmapSerdeFactory) Pair(io.druid.java.util.common.Pair) IndexMerger(io.druid.segment.IndexMerger) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) IndexBuilder(io.druid.segment.IndexBuilder) QueryableIndex(io.druid.segment.QueryableIndex) IncrementalIndexStorageAdapter(io.druid.segment.incremental.IncrementalIndexStorageAdapter) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) RoaringBitmapSerdeFactory(io.druid.segment.data.RoaringBitmapSerdeFactory) ConciseBitmapSerdeFactory(io.druid.segment.data.ConciseBitmapSerdeFactory) BitmapSerdeFactory(io.druid.segment.data.BitmapSerdeFactory)

Aggregations

QueryableIndexStorageAdapter (io.druid.segment.QueryableIndexStorageAdapter)20 StorageAdapter (io.druid.segment.StorageAdapter)16 Cursor (io.druid.segment.Cursor)14 ArrayList (java.util.ArrayList)13 List (java.util.List)13 Benchmark (org.openjdk.jmh.annotations.Benchmark)13 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)13 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)13 DimFilter (io.druid.query.filter.DimFilter)9 AndDimFilter (io.druid.query.filter.AndDimFilter)8 BoundDimFilter (io.druid.query.filter.BoundDimFilter)8 OrDimFilter (io.druid.query.filter.OrDimFilter)8 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)8 Filter (io.druid.query.filter.Filter)6 QueryableIndex (io.druid.segment.QueryableIndex)5 AndFilter (io.druid.segment.filter.AndFilter)5 BoundFilter (io.druid.segment.filter.BoundFilter)5 DimensionPredicateFilter (io.druid.segment.filter.DimensionPredicateFilter)5 OrFilter (io.druid.segment.filter.OrFilter)5 SelectorFilter (io.druid.segment.filter.SelectorFilter)5