use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.
the class SegmentAnalyzer method analyze.
public Map<String, ColumnAnalysis> analyze(Segment segment) {
Preconditions.checkNotNull(segment, "segment");
// index is null for incremental-index-based segments, but storageAdapter is always available
final QueryableIndex index = segment.asQueryableIndex();
final StorageAdapter storageAdapter = segment.asStorageAdapter();
// get length and column names from storageAdapter
final int length = storageAdapter.getNumRows();
Map<String, ColumnAnalysis> columns = new TreeMap<>();
final RowSignature rowSignature = storageAdapter.getRowSignature();
for (String columnName : rowSignature.getColumnNames()) {
final ColumnCapabilities capabilities;
if (storageAdapter instanceof IncrementalIndexStorageAdapter) {
// See javadocs for getSnapshotColumnCapabilities for a discussion of why we need to do this.
capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName);
} else {
capabilities = storageAdapter.getColumnCapabilities(columnName);
}
final ColumnAnalysis analysis;
switch(capabilities.getType()) {
case LONG:
final int bytesPerRow = ColumnHolder.TIME_COLUMN_NAME.equals(columnName) ? NUM_BYTES_IN_TIMESTAMP : Long.BYTES;
analysis = analyzeNumericColumn(capabilities, length, bytesPerRow);
break;
case FLOAT:
analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
break;
case DOUBLE:
analysis = analyzeNumericColumn(capabilities, length, Double.BYTES);
break;
case STRING:
if (index != null) {
analysis = analyzeStringColumn(capabilities, index.getColumnHolder(columnName));
} else {
analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
}
break;
case COMPLEX:
final ColumnHolder columnHolder = index != null ? index.getColumnHolder(columnName) : null;
analysis = analyzeComplexColumn(capabilities, columnHolder);
break;
default:
log.warn("Unknown column type[%s].", capabilities.asTypeString());
analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", capabilities.asTypeString()));
}
columns.put(columnName, analysis);
}
return columns;
}
use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.
the class ScanQueryEngine method process.
public Sequence<ScanResultValue> process(final ScanQuery query, final Segment segment, final ResponseContext responseContext) {
// "legacy" should be non-null due to toolChest.mergeResults
final boolean legacy = Preconditions.checkNotNull(query.isLegacy(), "Expected non-null 'legacy' parameter");
final Long numScannedRows = responseContext.getRowScanCount();
if (numScannedRows != null && numScannedRows >= query.getScanRowsLimit() && query.getTimeOrder().equals(ScanQuery.Order.NONE)) {
return Sequences.empty();
}
final boolean hasTimeout = QueryContexts.hasTimeout(query);
final Long timeoutAt = responseContext.getTimeoutTime();
final long start = System.currentTimeMillis();
final StorageAdapter adapter = segment.asStorageAdapter();
if (adapter == null) {
throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
final List<String> allColumns = new ArrayList<>();
if (query.getColumns() != null && !query.getColumns().isEmpty()) {
if (legacy && !query.getColumns().contains(LEGACY_TIMESTAMP_KEY)) {
allColumns.add(LEGACY_TIMESTAMP_KEY);
}
// Unless we're in legacy mode, allColumns equals query.getColumns() exactly. This is nice since it makes
// the compactedList form easier to use.
allColumns.addAll(query.getColumns());
} else {
final Set<String> availableColumns = Sets.newLinkedHashSet(Iterables.concat(Collections.singleton(legacy ? LEGACY_TIMESTAMP_KEY : ColumnHolder.TIME_COLUMN_NAME), Iterables.transform(Arrays.asList(query.getVirtualColumns().getVirtualColumns()), VirtualColumn::getOutputName), adapter.getAvailableDimensions(), adapter.getAvailableMetrics()));
allColumns.addAll(availableColumns);
if (legacy) {
allColumns.remove(ColumnHolder.TIME_COLUMN_NAME);
}
}
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
final SegmentId segmentId = segment.getId();
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter()));
// If the row count is not set, set it to 0, else do nothing.
responseContext.addRowScanCount(0);
final long limit = calculateRemainingScanRowsLimit(query, responseContext);
return Sequences.concat(adapter.makeCursors(filter, intervals.get(0), query.getVirtualColumns(), Granularities.ALL, query.getTimeOrder().equals(ScanQuery.Order.DESCENDING) || (query.getTimeOrder().equals(ScanQuery.Order.NONE) && query.isDescending()), null).map(cursor -> new BaseSequence<>(new BaseSequence.IteratorMaker<ScanResultValue, Iterator<ScanResultValue>>() {
@Override
public Iterator<ScanResultValue> make() {
final List<BaseObjectColumnValueSelector> columnSelectors = new ArrayList<>(allColumns.size());
for (String column : allColumns) {
final BaseObjectColumnValueSelector selector;
if (legacy && LEGACY_TIMESTAMP_KEY.equals(column)) {
selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME);
} else {
selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(column);
}
columnSelectors.add(selector);
}
final int batchSize = query.getBatchSize();
return new Iterator<ScanResultValue>() {
private long offset = 0;
@Override
public boolean hasNext() {
return !cursor.isDone() && offset < limit;
}
@Override
public ScanResultValue next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
if (hasTimeout && System.currentTimeMillis() >= timeoutAt) {
throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query [%s] timed out", query.getId()));
}
final long lastOffset = offset;
final Object events;
final ScanQuery.ResultFormat resultFormat = query.getResultFormat();
if (ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
events = rowsToCompactedList();
} else if (ScanQuery.ResultFormat.RESULT_FORMAT_LIST.equals(resultFormat)) {
events = rowsToList();
} else {
throw new UOE("resultFormat[%s] is not supported", resultFormat.toString());
}
responseContext.addRowScanCount(offset - lastOffset);
if (hasTimeout) {
responseContext.putTimeoutTime(timeoutAt - (System.currentTimeMillis() - start));
}
return new ScanResultValue(segmentId.toString(), allColumns, events);
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
private List<List<Object>> rowsToCompactedList() {
final List<List<Object>> events = new ArrayList<>(batchSize);
final long iterLimit = Math.min(limit, offset + batchSize);
for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
final List<Object> theEvent = new ArrayList<>(allColumns.size());
for (int j = 0; j < allColumns.size(); j++) {
theEvent.add(getColumnValue(j));
}
events.add(theEvent);
}
return events;
}
private List<Map<String, Object>> rowsToList() {
List<Map<String, Object>> events = Lists.newArrayListWithCapacity(batchSize);
final long iterLimit = Math.min(limit, offset + batchSize);
for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
final Map<String, Object> theEvent = new LinkedHashMap<>();
for (int j = 0; j < allColumns.size(); j++) {
theEvent.put(allColumns.get(j), getColumnValue(j));
}
events.add(theEvent);
}
return events;
}
private Object getColumnValue(int i) {
final BaseObjectColumnValueSelector selector = columnSelectors.get(i);
final Object value;
if (legacy && allColumns.get(i).equals(LEGACY_TIMESTAMP_KEY)) {
value = DateTimes.utc((long) selector.getObject());
} else {
value = selector == null ? null : selector.getObject();
}
return value;
}
};
}
@Override
public void cleanup(Iterator<ScanResultValue> iterFromMake) {
}
})));
}
use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.
the class CursorOnlyStrategy method getExecutionPlan.
@Override
public List<SearchQueryExecutor> getExecutionPlan(SearchQuery query, Segment segment) {
final StorageAdapter adapter = segment.asStorageAdapter();
final List<DimensionSpec> dimensionSpecs = getDimsToSearch(adapter.getAvailableDimensions(), query.getDimensions());
return ImmutableList.of(new CursorBasedExecutor(query, segment, filter, interval, dimensionSpecs));
}
use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.
the class TopNMetricSpecOptimizationsTest method testShouldNotOptimizeLexicographic.
@Test
public void testShouldNotOptimizeLexicographic() {
// query interval is smaller than segment interval, no filters, can ignoreAfterThreshold
int cardinality = 1234;
int threshold = 4;
TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).dimension(QueryRunnerTestHelper.MARKET_DIMENSION).metric(QueryRunnerTestHelper.INDEX_METRIC).threshold(threshold).intervals("2018-05-30T00:00:00Z/2018-05-30T01:00:00Z").aggregators(AGGS).postAggregators(QueryRunnerTestHelper.ADD_ROWS_INDEX_CONSTANT).build();
StorageAdapter adapter = makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-31T00:00:00Z", cardinality);
DimensionSelector dimSelector = makeFakeDimSelector(cardinality);
BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider(dimSelector, query, cardinality, adapter);
arrayProviderToTest.ignoreAfterThreshold();
Pair<Integer, Integer> thePair = arrayProviderToTest.computeStartEnd(cardinality);
Assert.assertEquals(new Integer(0), thePair.lhs);
Assert.assertEquals(new Integer(cardinality), thePair.rhs);
}
use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.
the class TopNMetricSpecOptimizationsTest method testAgainShouldNotOptimizeLexicographic.
@Test
public void testAgainShouldNotOptimizeLexicographic() {
// query interval is larger than segment interval, no filters, can NOT ignoreAfterThreshold
int cardinality = 1234;
int threshold = 4;
TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).dimension(QueryRunnerTestHelper.MARKET_DIMENSION).metric(QueryRunnerTestHelper.INDEX_METRIC).threshold(threshold).intervals("2018-05-30T00:00:00Z/2018-05-31T00:00:00Z").aggregators(AGGS).postAggregators(QueryRunnerTestHelper.ADD_ROWS_INDEX_CONSTANT).build();
StorageAdapter adapter = makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-30T01:00:00Z", cardinality);
DimensionSelector dimSelector = makeFakeDimSelector(cardinality);
BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider(dimSelector, query, cardinality, adapter);
Pair<Integer, Integer> thePair = arrayProviderToTest.computeStartEnd(cardinality);
Assert.assertEquals(new Integer(0), thePair.lhs);
Assert.assertEquals(new Integer(cardinality), thePair.rhs);
}
Aggregations