use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
if (taskToolbox == null) {
// Noop Task is just used to create the toolbox and list segments.
taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
}
try {
final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
for (DataSegment segment : usedSegments) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
final List<String> dims;
if (dimensions != null) {
dims = dimensions;
} else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
} else {
Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getDimensions();
}
}));
}
})));
dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
}
final List<String> metricsList;
if (metrics != null) {
metricsList = metrics;
} else {
Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getMetrics();
}
}));
}
})));
metricsList = Lists.newArrayList(metricsSet);
}
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
});
}
})));
return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
} catch (IOException e) {
throw Throwables.propagate(e);
} catch (SegmentLoadingException e) {
throw Throwables.propagate(e);
}
}
use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class IngestSegmentFirehoseTest method testSanity.
@Test
public void testSanity() throws Exception {
File segmentDir = tempFolder.newFolder();
createTestIndex(segmentDir);
QueryableIndex qi = null;
try {
qi = indexIO.loadIndex(segmentDir);
StorageAdapter sa = new QueryableIndexStorageAdapter(qi);
WindowedStorageAdapter wsa = new WindowedStorageAdapter(sa, sa.getInterval());
IngestSegmentFirehose firehose = new IngestSegmentFirehose(ImmutableList.of(wsa, wsa), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, Granularities.NONE);
int count = 0;
while (firehose.hasMore()) {
firehose.nextRow();
count++;
}
Assert.assertEquals(18, count);
} finally {
if (qi != null) {
qi.close();
}
}
}
use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class DumpSegment method runDump.
private void runDump(final Injector injector, final QueryableIndex index) throws IOException {
final ObjectMapper objectMapper = injector.getInstance(Key.get(ObjectMapper.class, Json.class));
final QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(index);
final List<String> columnNames = getColumnsToInclude(index);
final DimFilter filter = filterJson != null ? objectMapper.readValue(filterJson, DimFilter.class) : null;
final Sequence<Cursor> cursors = adapter.makeCursors(Filters.toFilter(filter), index.getDataInterval().withChronology(ISOChronology.getInstanceUTC()), VirtualColumns.EMPTY, Granularities.ALL, false);
withOutputStream(new Function<OutputStream, Object>() {
@Override
public Object apply(final OutputStream out) {
final Sequence<Object> sequence = Sequences.map(cursors, new Function<Cursor, Object>() {
@Override
public Object apply(Cursor cursor) {
final List<ObjectColumnSelector> selectors = Lists.newArrayList();
for (String columnName : columnNames) {
selectors.add(makeSelector(columnName, index.getColumn(columnName), cursor));
}
while (!cursor.isDone()) {
final Map<String, Object> row = Maps.newLinkedHashMap();
for (int i = 0; i < columnNames.size(); i++) {
final String columnName = columnNames.get(i);
final Object value = selectors.get(i).get();
if (timeISO8601 && columnNames.get(i).equals(Column.TIME_COLUMN_NAME)) {
row.put(columnName, new DateTime(value, DateTimeZone.UTC).toString());
} else {
row.put(columnName, value);
}
}
try {
out.write(objectMapper.writeValueAsBytes(row));
out.write('\n');
} catch (IOException e) {
throw Throwables.propagate(e);
}
cursor.advance();
}
return null;
}
});
evaluateSequenceForSideEffects(sequence);
return null;
}
});
}
use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class FilterPartitionBenchmark method timeFilterHalf.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void timeFilterHalf(Blackhole blackhole) throws Exception {
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
Sequence<Cursor> cursors = makeCursors(sa, timeFilterHalf);
Sequence<List<Long>> longListSeq = readCursorsLong(cursors, blackhole);
List<Long> strings = Sequences.toList(Sequences.limit(longListSeq, 1), Lists.<List<Long>>newArrayList()).get(0);
for (Long st : strings) {
blackhole.consume(st);
}
}
use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class FilterPartitionBenchmark method stringRead.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void stringRead(Blackhole blackhole) throws Exception {
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
Sequence<Cursor> cursors = makeCursors(sa, null);
Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
List<String> strings = Sequences.toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
for (String st : strings) {
blackhole.consume(st);
}
}
Aggregations