use of io.druid.segment.StorageAdapter in project druid by druid-io.
the class FilterPartitionBenchmark method readWithExFnPostFilter.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void readWithExFnPostFilter(Blackhole blackhole) throws Exception {
Filter filter = new NoBitmapSelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter();
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
Sequence<Cursor> cursors = makeCursors(sa, filter);
Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
List<String> strings = Sequences.toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
for (String st : strings) {
blackhole.consume(st);
}
}
use of io.druid.segment.StorageAdapter in project druid by druid-io.
the class UseIndexesStrategy method getExecutionPlan.
@Override
public List<SearchQueryExecutor> getExecutionPlan(SearchQuery query, Segment segment) {
final ImmutableList.Builder<SearchQueryExecutor> builder = ImmutableList.builder();
final QueryableIndex index = segment.asQueryableIndex();
final StorageAdapter adapter = segment.asStorageAdapter();
final List<DimensionSpec> searchDims = getDimsToSearch(adapter.getAvailableDimensions(), query.getDimensions());
if (index != null) {
final // pair of bitmap dims and non-bitmap dims
Pair<List<DimensionSpec>, List<DimensionSpec>> pair = partitionDimensionList(adapter, searchDims);
final List<DimensionSpec> bitmapSuppDims = pair.lhs;
final List<DimensionSpec> nonBitmapSuppDims = pair.rhs;
if (bitmapSuppDims.size() > 0) {
final BitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector(index.getBitmapFactoryForDimensions(), VirtualColumns.EMPTY, index);
// from the non-bitmap-support filter, and then use it to compute the filtered result by intersecting bitmaps.
if (filter == null || filter.supportsBitmapIndex(selector)) {
final ImmutableBitmap timeFilteredBitmap = makeTimeFilteredBitmap(index, segment, filter, interval);
builder.add(new IndexOnlyExecutor(query, segment, timeFilteredBitmap, bitmapSuppDims));
} else {
// Fall back to cursor-based execution strategy
nonBitmapSuppDims.addAll(bitmapSuppDims);
}
}
if (nonBitmapSuppDims.size() > 0) {
builder.add(new CursorBasedExecutor(query, segment, filter, interval, nonBitmapSuppDims));
}
} else {
builder.add(new CursorBasedExecutor(query, segment, filter, interval, searchDims));
}
return builder.build();
}
use of io.druid.segment.StorageAdapter in project druid by druid-io.
the class SelectQueryEngine method process.
public Sequence<Result<SelectResultValue>> process(final SelectQuery query, final Segment segment) {
final StorageAdapter adapter = segment.asStorageAdapter();
if (adapter == null) {
throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
// at the point where this code is called, only one datasource should exist.
String dataSource = Iterables.getOnlyElement(query.getDataSource().getNames());
final Iterable<DimensionSpec> dims;
if (query.getDimensions() == null || query.getDimensions().isEmpty()) {
dims = DefaultDimensionSpec.toSpec(adapter.getAvailableDimensions());
} else {
dims = query.getDimensions();
}
final Iterable<String> metrics;
if (query.getMetrics() == null || query.getMetrics().isEmpty()) {
metrics = adapter.getAvailableMetrics();
} else {
metrics = query.getMetrics();
}
List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
// should be rewritten with given interval
final String segmentId = DataSegmentUtils.withInterval(dataSource, segment.getIdentifier(), intervals.get(0));
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
return QueryRunnerHelper.makeCursorBasedQuery(adapter, query.getQuerySegmentSpec().getIntervals(), filter, query.getVirtualColumns(), query.isDescending(), query.getGranularity(), new Function<Cursor, Result<SelectResultValue>>() {
@Override
public Result<SelectResultValue> apply(Cursor cursor) {
final SelectResultValueBuilder builder = new SelectResultValueBuilder(cursor.getTime(), query.getPagingSpec(), query.isDescending());
final LongColumnSelector timestampColumnSelector = cursor.makeLongColumnSelector(Column.TIME_COLUMN_NAME);
final List<ColumnSelectorPlus<SelectColumnSelectorStrategy>> selectorPlusList = Arrays.asList(DimensionHandlerUtils.createColumnSelectorPluses(STRATEGY_FACTORY, Lists.newArrayList(dims), cursor));
for (DimensionSpec dimSpec : dims) {
builder.addDimension(dimSpec.getOutputName());
}
final Map<String, ObjectColumnSelector> metSelectors = Maps.newHashMap();
for (String metric : metrics) {
final ObjectColumnSelector metricSelector = cursor.makeObjectColumnSelector(metric);
metSelectors.put(metric, metricSelector);
builder.addMetric(metric);
}
final PagingOffset offset = query.getPagingOffset(segmentId);
cursor.advanceTo(offset.startDelta());
int lastOffset = offset.startOffset();
for (; !cursor.isDone() && offset.hasNext(); cursor.advance(), offset.next()) {
final Map<String, Object> theEvent = singleEvent(EventHolder.timestampKey, timestampColumnSelector, selectorPlusList, metSelectors);
builder.addEntry(new EventHolder(segmentId, lastOffset = offset.current(), theEvent));
}
builder.finished(segmentId, lastOffset);
return builder.build();
}
});
}
use of io.druid.segment.StorageAdapter in project druid by druid-io.
the class BatchDeltaIngestionTest method testIngestion.
private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment) throws Exception {
IndexGeneratorJob job = new IndexGeneratorJob(config);
JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
File segmentFolder = new File(String.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
Assert.assertTrue(segmentFolder.exists());
File descriptor = new File(segmentFolder, "descriptor.json");
File indexZip = new File(segmentFolder, "index.zip");
Assert.assertTrue(descriptor.exists());
Assert.assertTrue(indexZip.exists());
DataSegment dataSegment = MAPPER.readValue(descriptor, DataSegment.class);
Assert.assertEquals("website", dataSegment.getDataSource());
Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
Assert.assertEquals(INTERVAL_FULL, dataSegment.getInterval());
Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
Assert.assertEquals("host", dataSegment.getDimensions().get(0));
Assert.assertEquals("visited_sum", dataSegment.getMetrics().get(0));
Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
Assert.assertEquals(0, spec.getPartitionNum());
Assert.assertEquals(1, spec.getPartitions());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegment, tmpUnzippedSegmentDir);
QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, Granularities.NONE);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRowsGenerated, rows);
}
use of io.druid.segment.StorageAdapter in project druid by druid-io.
the class SegmentAnalyzer method analyze.
public Map<String, ColumnAnalysis> analyze(Segment segment) {
Preconditions.checkNotNull(segment, "segment");
// index is null for incremental-index-based segments, but storageAdapter is always available
final QueryableIndex index = segment.asQueryableIndex();
final StorageAdapter storageAdapter = segment.asStorageAdapter();
// get length and column names from storageAdapter
final int length = storageAdapter.getNumRows();
final Set<String> columnNames = Sets.newHashSet();
Iterables.addAll(columnNames, storageAdapter.getAvailableDimensions());
Iterables.addAll(columnNames, storageAdapter.getAvailableMetrics());
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
for (String columnName : columnNames) {
final Column column = index == null ? null : index.getColumn(columnName);
final ColumnCapabilities capabilities = column != null ? column.getCapabilities() : storageAdapter.getColumnCapabilities(columnName);
final ColumnAnalysis analysis;
final ValueType type = capabilities.getType();
switch(type) {
case LONG:
analysis = analyzeNumericColumn(capabilities, length, Longs.BYTES);
break;
case FLOAT:
analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
break;
case STRING:
if (index != null) {
analysis = analyzeStringColumn(capabilities, column);
} else {
analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
}
break;
case COMPLEX:
analysis = analyzeComplexColumn(capabilities, column, storageAdapter.getColumnTypeName(columnName));
break;
default:
log.warn("Unknown column type[%s].", type);
analysis = ColumnAnalysis.error(String.format("unknown_type_%s", type));
}
columns.put(columnName, analysis);
}
// Add time column too
ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(Column.TIME_COLUMN_NAME);
if (timeCapabilities == null) {
timeCapabilities = new ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false);
}
columns.put(Column.TIME_COLUMN_NAME, analyzeNumericColumn(timeCapabilities, length, NUM_BYTES_IN_TIMESTAMP));
return columns;
}
Aggregations