Search in sources :

Example 1 with QueryableIndexStorageAdapter

use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class IngestSegmentFirehoseFactory method connect.

@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
    log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
    if (taskToolbox == null) {
        // Noop Task is just used to create the toolbox and list segments.
        taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
    }
    try {
        final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
        final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
        VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
        for (DataSegment segment : usedSegments) {
            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
        }
        final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
        final List<String> dims;
        if (dimensions != null) {
            dims = dimensions;
        } else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
            dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
        } else {
            Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
                    return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getDimensions();
                        }
                    }));
                }
            })));
            dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
        }
        final List<String> metricsList;
        if (metrics != null) {
            metricsList = metrics;
        } else {
            Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
                    return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getMetrics();
                        }
                    }));
                }
            })));
            metricsList = Lists.newArrayList(metricsSet);
        }
        final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {

            @Override
            public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
                return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {

                    @Override
                    public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
                        final DataSegment segment = input.getObject();
                        try {
                            return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
                        } catch (IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                });
            }
        })));
        return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
    } catch (IOException e) {
        throw Throwables.propagate(e);
    } catch (SegmentLoadingException e) {
        throw Throwables.propagate(e);
    }
}
Also used : IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) Set(java.util.Set) NoopTask(io.druid.indexing.common.task.NoopTask) DataSegment(io.druid.timeline.DataSegment) Function(com.google.common.base.Function) PartitionChunk(io.druid.timeline.partition.PartitionChunk) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) IOException(java.io.IOException) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) SegmentListUsedAction(io.druid.indexing.common.actions.SegmentListUsedAction) File(java.io.File)

Example 2 with QueryableIndexStorageAdapter

use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class IngestSegmentFirehoseTest method testSanity.

@Test
public void testSanity() throws Exception {
    File segmentDir = tempFolder.newFolder();
    createTestIndex(segmentDir);
    QueryableIndex qi = null;
    try {
        qi = indexIO.loadIndex(segmentDir);
        StorageAdapter sa = new QueryableIndexStorageAdapter(qi);
        WindowedStorageAdapter wsa = new WindowedStorageAdapter(sa, sa.getInterval());
        IngestSegmentFirehose firehose = new IngestSegmentFirehose(ImmutableList.of(wsa, wsa), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, Granularities.NONE);
        int count = 0;
        while (firehose.hasMore()) {
            firehose.nextRow();
            count++;
        }
        Assert.assertEquals(18, count);
    } finally {
        if (qi != null) {
            qi.close();
        }
    }
}
Also used : QueryableIndex(io.druid.segment.QueryableIndex) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) StorageAdapter(io.druid.segment.StorageAdapter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) File(java.io.File) Test(org.junit.Test)

Example 3 with QueryableIndexStorageAdapter

use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class DumpSegment method runDump.

private void runDump(final Injector injector, final QueryableIndex index) throws IOException {
    final ObjectMapper objectMapper = injector.getInstance(Key.get(ObjectMapper.class, Json.class));
    final QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    final List<String> columnNames = getColumnsToInclude(index);
    final DimFilter filter = filterJson != null ? objectMapper.readValue(filterJson, DimFilter.class) : null;
    final Sequence<Cursor> cursors = adapter.makeCursors(Filters.toFilter(filter), index.getDataInterval().withChronology(ISOChronology.getInstanceUTC()), VirtualColumns.EMPTY, Granularities.ALL, false);
    withOutputStream(new Function<OutputStream, Object>() {

        @Override
        public Object apply(final OutputStream out) {
            final Sequence<Object> sequence = Sequences.map(cursors, new Function<Cursor, Object>() {

                @Override
                public Object apply(Cursor cursor) {
                    final List<ObjectColumnSelector> selectors = Lists.newArrayList();
                    for (String columnName : columnNames) {
                        selectors.add(makeSelector(columnName, index.getColumn(columnName), cursor));
                    }
                    while (!cursor.isDone()) {
                        final Map<String, Object> row = Maps.newLinkedHashMap();
                        for (int i = 0; i < columnNames.size(); i++) {
                            final String columnName = columnNames.get(i);
                            final Object value = selectors.get(i).get();
                            if (timeISO8601 && columnNames.get(i).equals(Column.TIME_COLUMN_NAME)) {
                                row.put(columnName, new DateTime(value, DateTimeZone.UTC).toString());
                            } else {
                                row.put(columnName, value);
                            }
                        }
                        try {
                            out.write(objectMapper.writeValueAsBytes(row));
                            out.write('\n');
                        } catch (IOException e) {
                            throw Throwables.propagate(e);
                        }
                        cursor.advance();
                    }
                    return null;
                }
            });
            evaluateSequenceForSideEffects(sequence);
            return null;
        }
    });
}
Also used : OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) Json(io.druid.guice.annotations.Json) IOException(java.io.IOException) Sequence(io.druid.java.util.common.guava.Sequence) Cursor(io.druid.segment.Cursor) DateTime(org.joda.time.DateTime) Function(com.google.common.base.Function) DimFilter(io.druid.query.filter.DimFilter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ObjectColumnSelector(io.druid.segment.ObjectColumnSelector)

Example 4 with QueryableIndexStorageAdapter

use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class FilterPartitionBenchmark method timeFilterHalf.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void timeFilterHalf(Blackhole blackhole) throws Exception {
    StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
    Sequence<Cursor> cursors = makeCursors(sa, timeFilterHalf);
    Sequence<List<Long>> longListSeq = readCursorsLong(cursors, blackhole);
    List<Long> strings = Sequences.toList(Sequences.limit(longListSeq, 1), Lists.<List<Long>>newArrayList()).get(0);
    for (Long st : strings) {
        blackhole.consume(st);
    }
}
Also used : QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) StorageAdapter(io.druid.segment.StorageAdapter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) List(java.util.List) ArrayList(java.util.ArrayList) Cursor(io.druid.segment.Cursor) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 5 with QueryableIndexStorageAdapter

use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class FilterPartitionBenchmark method stringRead.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void stringRead(Blackhole blackhole) throws Exception {
    StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
    Sequence<Cursor> cursors = makeCursors(sa, null);
    Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
    List<String> strings = Sequences.toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
    for (String st : strings) {
        blackhole.consume(st);
    }
}
Also used : QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) StorageAdapter(io.druid.segment.StorageAdapter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) List(java.util.List) ArrayList(java.util.ArrayList) Cursor(io.druid.segment.Cursor) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Aggregations

QueryableIndexStorageAdapter (io.druid.segment.QueryableIndexStorageAdapter)19 StorageAdapter (io.druid.segment.StorageAdapter)16 Cursor (io.druid.segment.Cursor)14 ArrayList (java.util.ArrayList)13 List (java.util.List)13 Benchmark (org.openjdk.jmh.annotations.Benchmark)13 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)13 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)13 DimFilter (io.druid.query.filter.DimFilter)9 AndDimFilter (io.druid.query.filter.AndDimFilter)8 BoundDimFilter (io.druid.query.filter.BoundDimFilter)8 OrDimFilter (io.druid.query.filter.OrDimFilter)8 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)8 Filter (io.druid.query.filter.Filter)6 AndFilter (io.druid.segment.filter.AndFilter)6 BoundFilter (io.druid.segment.filter.BoundFilter)6 DimensionPredicateFilter (io.druid.segment.filter.DimensionPredicateFilter)6 OrFilter (io.druid.segment.filter.OrFilter)6 SelectorFilter (io.druid.segment.filter.SelectorFilter)6 QueryableIndex (io.druid.segment.QueryableIndex)4