use of org.apache.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class ExpressionVectorSelectorBenchmark method scan.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void scan(Blackhole blackhole) {
final VirtualColumns virtualColumns = VirtualColumns.create(ImmutableList.of(new ExpressionVirtualColumn("v", expression, ExpressionType.toColumnType(outputType), TestExprMacroTable.INSTANCE)));
if (vectorize) {
VectorCursor cursor = new QueryableIndexStorageAdapter(index).makeVectorCursor(null, index.getDataInterval(), virtualColumns, false, 512, null);
if (outputType.isNumeric()) {
VectorValueSelector selector = cursor.getColumnSelectorFactory().makeValueSelector("v");
if (outputType.is(ExprType.DOUBLE)) {
while (!cursor.isDone()) {
blackhole.consume(selector.getDoubleVector());
blackhole.consume(selector.getNullVector());
cursor.advance();
}
} else {
while (!cursor.isDone()) {
blackhole.consume(selector.getLongVector());
blackhole.consume(selector.getNullVector());
cursor.advance();
}
}
closer.register(cursor);
}
} else {
Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(null, index.getDataInterval(), virtualColumns, Granularities.ALL, false, null);
int rowCount = cursors.map(cursor -> {
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
int rows = 0;
while (!cursor.isDone()) {
blackhole.consume(selector.getObject());
rows++;
cursor.advance();
}
return rows;
}).accumulate(0, (acc, in) -> acc + in);
blackhole.consume(rowCount);
}
}
use of org.apache.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser, File temporaryDirectory) throws ParseException {
log.debug("Connecting firehose: dataSource[%s], interval[%s], segmentIds[%s]", dataSource, interval, segmentIds);
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = getTimeline();
// Download all segments locally.
// Note: this requires enough local storage space to fit all of the segments, even though
// IngestSegmentFirehose iterates over the segments in series. We may want to change this
// to download files lazily, perhaps sharing code with PrefetchableTextFilesFirehoseFactory.
final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
Map<DataSegment, File> segmentFileMap = Maps.newLinkedHashMap();
for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
final DataSegment segment = chunk.getObject();
segmentFileMap.computeIfAbsent(segment, k -> {
try {
return segmentCacheManager.getSegmentFiles(segment);
} catch (SegmentLoadingException e) {
throw new RuntimeException(e);
}
});
}
}
final List<String> dims = ReingestionTimelineUtils.getDimensionsToReingest(dimensions, inputRowParser.getParseSpec().getDimensionsSpec(), timeLineSegments);
final List<String> metricsList = metrics == null ? ReingestionTimelineUtils.getUniqueMetrics(timeLineSegments) : metrics;
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getId()))), holder.getInterval());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
}
})));
final TransformSpec transformSpec = TransformSpec.fromInputRowParser(inputRowParser);
return new IngestSegmentFirehose(adapters, transformSpec, dims, metricsList, dimFilter);
}
use of org.apache.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class DruidSegmentReader method intermediateRowIterator.
@Override
protected CloseableIterator<Map<String, Object>> intermediateRowIterator() throws IOException {
final CleanableFile segmentFile = source.fetch(temporaryDirectory, null);
final WindowedStorageAdapter storageAdapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile.file())), source.getIntervalFilter());
final Sequence<Cursor> cursors = storageAdapter.getAdapter().makeCursors(Filters.toFilter(dimFilter), storageAdapter.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
// Retain order of columns from the original segments. Useful for preserving dimension order if we're in
// schemaless mode.
final Set<String> columnsToRead = Sets.newLinkedHashSet(Iterables.filter(storageAdapter.getAdapter().getRowSignature().getColumnNames(), columnsFilter::apply));
final Sequence<Map<String, Object>> sequence = Sequences.concat(Sequences.map(cursors, cursor -> cursorToSequence(cursor, columnsToRead)));
return makeCloseableIteratorFromSequenceAndSegmentFile(sequence, segmentFile);
}
use of org.apache.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class IndexTaskTest method testNumShardsAndPartitionDimensionsProvided.
@Test
public void testNumShardsAndPartitionDimensionsProvided() throws Exception {
final File tmpDir = temporaryFolder.newFolder();
final File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("2014-01-01T00:00:10Z,a,1\n");
writer.write("2014-01-01T01:00:20Z,b,1\n");
writer.write("2014-01-01T02:00:30Z,c,1\n");
}
final IndexTask indexTask = new IndexTask(null, null, createDefaultIngestionSpec(jsonMapper, tmpDir, null, null, createTuningConfigWithPartitionsSpec(new HashedPartitionsSpec(null, 2, ImmutableList.of("dim")), true), false, false), null);
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(2, segments.size());
for (DataSegment segment : segments) {
Assert.assertEquals(DATASOURCE, segment.getDataSource());
Assert.assertEquals(Intervals.of("2014/P1D"), segment.getInterval());
Assert.assertEquals(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
final HashBasedNumberedShardSpec hashBasedNumberedShardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
Assert.assertEquals(HashPartitionFunction.MURMUR3_32_ABS, hashBasedNumberedShardSpec.getPartitionFunction());
final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
final List<Integer> hashes = cursorSequence.map(cursor -> {
final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dim", "dim"));
final int hash = HashPartitionFunction.MURMUR3_32_ABS.hash(HashBasedNumberedShardSpec.serializeGroupKey(jsonMapper, Collections.singletonList(selector.getObject())), hashBasedNumberedShardSpec.getNumBuckets());
cursor.advance();
return hash;
}).toList();
Assert.assertTrue(hashes.stream().allMatch(h -> h.intValue() == hashes.get(0)));
}
}
use of org.apache.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class StringLastTimeseriesQueryTest method testTimeseriesQuery.
@Test
public void testTimeseriesQuery() {
TimeseriesQueryEngine engine = new TimeseriesQueryEngine();
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(ImmutableList.of(new StringLastAggregatorFactory("nonfolding", CLIENT_TYPE, null, 1024), new StringLastAggregatorFactory("folding", LAST_CLIENT_TYPE, null, 1024), new StringLastAggregatorFactory("nonexistent", "nonexistent", null, 1024), new StringLastAggregatorFactory("numeric", "cnt", null, 1024))).build();
List<Result<TimeseriesResultValue>> expectedResults = Collections.singletonList(new Result<>(TIME1, new TimeseriesResultValue(ImmutableMap.<String, Object>builder().put("nonfolding", new SerializablePairLongString(TIME2.getMillis(), "android")).put("folding", new SerializablePairLongString(TIME2.getMillis(), "android")).put("nonexistent", new SerializablePairLongString(DateTimes.MIN.getMillis(), null)).put("numeric", new SerializablePairLongString(DateTimes.MIN.getMillis(), null)).build())));
final Iterable<Result<TimeseriesResultValue>> iiResults = engine.process(query, new IncrementalIndexStorageAdapter(incrementalIndex)).toList();
final Iterable<Result<TimeseriesResultValue>> qiResults = engine.process(query, new QueryableIndexStorageAdapter(queryableIndex)).toList();
TestHelper.assertExpectedResults(expectedResults, iiResults, "incremental index");
TestHelper.assertExpectedResults(expectedResults, qiResults, "queryable index");
}
Aggregations