Search in sources :

Example 26 with StorageAdapter

use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.

the class SegmentAnalyzer method analyze.

public Map<String, ColumnAnalysis> analyze(Segment segment) {
    Preconditions.checkNotNull(segment, "segment");
    // index is null for incremental-index-based segments, but storageAdapter is always available
    final QueryableIndex index = segment.asQueryableIndex();
    final StorageAdapter storageAdapter = segment.asStorageAdapter();
    // get length and column names from storageAdapter
    final int length = storageAdapter.getNumRows();
    Map<String, ColumnAnalysis> columns = new TreeMap<>();
    final RowSignature rowSignature = storageAdapter.getRowSignature();
    for (String columnName : rowSignature.getColumnNames()) {
        final ColumnCapabilities capabilities;
        if (storageAdapter instanceof IncrementalIndexStorageAdapter) {
            // See javadocs for getSnapshotColumnCapabilities for a discussion of why we need to do this.
            capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName);
        } else {
            capabilities = storageAdapter.getColumnCapabilities(columnName);
        }
        final ColumnAnalysis analysis;
        switch(capabilities.getType()) {
            case LONG:
                final int bytesPerRow = ColumnHolder.TIME_COLUMN_NAME.equals(columnName) ? NUM_BYTES_IN_TIMESTAMP : Long.BYTES;
                analysis = analyzeNumericColumn(capabilities, length, bytesPerRow);
                break;
            case FLOAT:
                analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
                break;
            case DOUBLE:
                analysis = analyzeNumericColumn(capabilities, length, Double.BYTES);
                break;
            case STRING:
                if (index != null) {
                    analysis = analyzeStringColumn(capabilities, index.getColumnHolder(columnName));
                } else {
                    analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
                }
                break;
            case COMPLEX:
                final ColumnHolder columnHolder = index != null ? index.getColumnHolder(columnName) : null;
                analysis = analyzeComplexColumn(capabilities, columnHolder);
                break;
            default:
                log.warn("Unknown column type[%s].", capabilities.asTypeString());
                analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", capabilities.asTypeString()));
        }
        columns.put(columnName, analysis);
    }
    return columns;
}
Also used : ColumnHolder(org.apache.druid.segment.column.ColumnHolder) QueryableIndex(org.apache.druid.segment.QueryableIndex) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) StorageAdapter(org.apache.druid.segment.StorageAdapter) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) TreeMap(java.util.TreeMap) RowSignature(org.apache.druid.segment.column.RowSignature) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 27 with StorageAdapter

use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.

the class ScanQueryEngine method process.

public Sequence<ScanResultValue> process(final ScanQuery query, final Segment segment, final ResponseContext responseContext) {
    // "legacy" should be non-null due to toolChest.mergeResults
    final boolean legacy = Preconditions.checkNotNull(query.isLegacy(), "Expected non-null 'legacy' parameter");
    final Long numScannedRows = responseContext.getRowScanCount();
    if (numScannedRows != null && numScannedRows >= query.getScanRowsLimit() && query.getTimeOrder().equals(ScanQuery.Order.NONE)) {
        return Sequences.empty();
    }
    final boolean hasTimeout = QueryContexts.hasTimeout(query);
    final Long timeoutAt = responseContext.getTimeoutTime();
    final long start = System.currentTimeMillis();
    final StorageAdapter adapter = segment.asStorageAdapter();
    if (adapter == null) {
        throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
    }
    final List<String> allColumns = new ArrayList<>();
    if (query.getColumns() != null && !query.getColumns().isEmpty()) {
        if (legacy && !query.getColumns().contains(LEGACY_TIMESTAMP_KEY)) {
            allColumns.add(LEGACY_TIMESTAMP_KEY);
        }
        // Unless we're in legacy mode, allColumns equals query.getColumns() exactly. This is nice since it makes
        // the compactedList form easier to use.
        allColumns.addAll(query.getColumns());
    } else {
        final Set<String> availableColumns = Sets.newLinkedHashSet(Iterables.concat(Collections.singleton(legacy ? LEGACY_TIMESTAMP_KEY : ColumnHolder.TIME_COLUMN_NAME), Iterables.transform(Arrays.asList(query.getVirtualColumns().getVirtualColumns()), VirtualColumn::getOutputName), adapter.getAvailableDimensions(), adapter.getAvailableMetrics()));
        allColumns.addAll(availableColumns);
        if (legacy) {
            allColumns.remove(ColumnHolder.TIME_COLUMN_NAME);
        }
    }
    final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
    Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
    final SegmentId segmentId = segment.getId();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter()));
    // If the row count is not set, set it to 0, else do nothing.
    responseContext.addRowScanCount(0);
    final long limit = calculateRemainingScanRowsLimit(query, responseContext);
    return Sequences.concat(adapter.makeCursors(filter, intervals.get(0), query.getVirtualColumns(), Granularities.ALL, query.getTimeOrder().equals(ScanQuery.Order.DESCENDING) || (query.getTimeOrder().equals(ScanQuery.Order.NONE) && query.isDescending()), null).map(cursor -> new BaseSequence<>(new BaseSequence.IteratorMaker<ScanResultValue, Iterator<ScanResultValue>>() {

        @Override
        public Iterator<ScanResultValue> make() {
            final List<BaseObjectColumnValueSelector> columnSelectors = new ArrayList<>(allColumns.size());
            for (String column : allColumns) {
                final BaseObjectColumnValueSelector selector;
                if (legacy && LEGACY_TIMESTAMP_KEY.equals(column)) {
                    selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME);
                } else {
                    selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(column);
                }
                columnSelectors.add(selector);
            }
            final int batchSize = query.getBatchSize();
            return new Iterator<ScanResultValue>() {

                private long offset = 0;

                @Override
                public boolean hasNext() {
                    return !cursor.isDone() && offset < limit;
                }

                @Override
                public ScanResultValue next() {
                    if (!hasNext()) {
                        throw new NoSuchElementException();
                    }
                    if (hasTimeout && System.currentTimeMillis() >= timeoutAt) {
                        throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query [%s] timed out", query.getId()));
                    }
                    final long lastOffset = offset;
                    final Object events;
                    final ScanQuery.ResultFormat resultFormat = query.getResultFormat();
                    if (ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
                        events = rowsToCompactedList();
                    } else if (ScanQuery.ResultFormat.RESULT_FORMAT_LIST.equals(resultFormat)) {
                        events = rowsToList();
                    } else {
                        throw new UOE("resultFormat[%s] is not supported", resultFormat.toString());
                    }
                    responseContext.addRowScanCount(offset - lastOffset);
                    if (hasTimeout) {
                        responseContext.putTimeoutTime(timeoutAt - (System.currentTimeMillis() - start));
                    }
                    return new ScanResultValue(segmentId.toString(), allColumns, events);
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }

                private List<List<Object>> rowsToCompactedList() {
                    final List<List<Object>> events = new ArrayList<>(batchSize);
                    final long iterLimit = Math.min(limit, offset + batchSize);
                    for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
                        final List<Object> theEvent = new ArrayList<>(allColumns.size());
                        for (int j = 0; j < allColumns.size(); j++) {
                            theEvent.add(getColumnValue(j));
                        }
                        events.add(theEvent);
                    }
                    return events;
                }

                private List<Map<String, Object>> rowsToList() {
                    List<Map<String, Object>> events = Lists.newArrayListWithCapacity(batchSize);
                    final long iterLimit = Math.min(limit, offset + batchSize);
                    for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
                        final Map<String, Object> theEvent = new LinkedHashMap<>();
                        for (int j = 0; j < allColumns.size(); j++) {
                            theEvent.put(allColumns.get(j), getColumnValue(j));
                        }
                        events.add(theEvent);
                    }
                    return events;
                }

                private Object getColumnValue(int i) {
                    final BaseObjectColumnValueSelector selector = columnSelectors.get(i);
                    final Object value;
                    if (legacy && allColumns.get(i).equals(LEGACY_TIMESTAMP_KEY)) {
                        value = DateTimes.utc((long) selector.getObject());
                    } else {
                        value = selector == null ? null : selector.getObject();
                    }
                    return value;
                }
            };
        }

        @Override
        public void cleanup(Iterator<ScanResultValue> iterFromMake) {
        }
    })));
}
Also used : Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) StorageAdapter(org.apache.druid.segment.StorageAdapter) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) Map(java.util.Map) UOE(org.apache.druid.java.util.common.UOE) NoSuchElementException(java.util.NoSuchElementException) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) Sequences(org.apache.druid.java.util.common.guava.Sequences) Segment(org.apache.druid.segment.Segment) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Iterator(java.util.Iterator) ResponseContext(org.apache.druid.query.context.ResponseContext) VirtualColumn(org.apache.druid.segment.VirtualColumn) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Sets(com.google.common.collect.Sets) QueryContexts(org.apache.druid.query.QueryContexts) Granularities(org.apache.druid.java.util.common.granularity.Granularities) List(java.util.List) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) Preconditions(com.google.common.base.Preconditions) BaseSequence(org.apache.druid.java.util.common.guava.BaseSequence) SegmentId(org.apache.druid.timeline.SegmentId) Filters(org.apache.druid.segment.filter.Filters) Collections(java.util.Collections) Filter(org.apache.druid.query.filter.Filter) ArrayList(java.util.ArrayList) StorageAdapter(org.apache.druid.segment.StorageAdapter) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) Iterator(java.util.Iterator) ISE(org.apache.druid.java.util.common.ISE) ArrayList(java.util.ArrayList) List(java.util.List) SegmentId(org.apache.druid.timeline.SegmentId) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) UOE(org.apache.druid.java.util.common.UOE) BaseSequence(org.apache.druid.java.util.common.guava.BaseSequence) Filter(org.apache.druid.query.filter.Filter) VirtualColumn(org.apache.druid.segment.VirtualColumn) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) NoSuchElementException(java.util.NoSuchElementException) Interval(org.joda.time.Interval)

Example 28 with StorageAdapter

use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.

the class CursorOnlyStrategy method getExecutionPlan.

@Override
public List<SearchQueryExecutor> getExecutionPlan(SearchQuery query, Segment segment) {
    final StorageAdapter adapter = segment.asStorageAdapter();
    final List<DimensionSpec> dimensionSpecs = getDimsToSearch(adapter.getAvailableDimensions(), query.getDimensions());
    return ImmutableList.of(new CursorBasedExecutor(query, segment, filter, interval, dimensionSpecs));
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) StorageAdapter(org.apache.druid.segment.StorageAdapter)

Example 29 with StorageAdapter

use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.

the class TopNMetricSpecOptimizationsTest method testShouldNotOptimizeLexicographic.

@Test
public void testShouldNotOptimizeLexicographic() {
    // query interval is smaller than segment interval, no filters, can ignoreAfterThreshold
    int cardinality = 1234;
    int threshold = 4;
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).dimension(QueryRunnerTestHelper.MARKET_DIMENSION).metric(QueryRunnerTestHelper.INDEX_METRIC).threshold(threshold).intervals("2018-05-30T00:00:00Z/2018-05-30T01:00:00Z").aggregators(AGGS).postAggregators(QueryRunnerTestHelper.ADD_ROWS_INDEX_CONSTANT).build();
    StorageAdapter adapter = makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-31T00:00:00Z", cardinality);
    DimensionSelector dimSelector = makeFakeDimSelector(cardinality);
    BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider(dimSelector, query, cardinality, adapter);
    arrayProviderToTest.ignoreAfterThreshold();
    Pair<Integer, Integer> thePair = arrayProviderToTest.computeStartEnd(cardinality);
    Assert.assertEquals(new Integer(0), thePair.lhs);
    Assert.assertEquals(new Integer(cardinality), thePair.rhs);
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) StorageAdapter(org.apache.druid.segment.StorageAdapter) Test(org.junit.Test)

Example 30 with StorageAdapter

use of org.apache.druid.segment.StorageAdapter in project druid by druid-io.

the class TopNMetricSpecOptimizationsTest method testAgainShouldNotOptimizeLexicographic.

@Test
public void testAgainShouldNotOptimizeLexicographic() {
    // query interval is larger than segment interval, no filters, can NOT ignoreAfterThreshold
    int cardinality = 1234;
    int threshold = 4;
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).dimension(QueryRunnerTestHelper.MARKET_DIMENSION).metric(QueryRunnerTestHelper.INDEX_METRIC).threshold(threshold).intervals("2018-05-30T00:00:00Z/2018-05-31T00:00:00Z").aggregators(AGGS).postAggregators(QueryRunnerTestHelper.ADD_ROWS_INDEX_CONSTANT).build();
    StorageAdapter adapter = makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-30T01:00:00Z", cardinality);
    DimensionSelector dimSelector = makeFakeDimSelector(cardinality);
    BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider(dimSelector, query, cardinality, adapter);
    Pair<Integer, Integer> thePair = arrayProviderToTest.computeStartEnd(cardinality);
    Assert.assertEquals(new Integer(0), thePair.lhs);
    Assert.assertEquals(new Integer(cardinality), thePair.rhs);
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) StorageAdapter(org.apache.druid.segment.StorageAdapter) Test(org.junit.Test)

Aggregations

StorageAdapter (org.apache.druid.segment.StorageAdapter)39 Cursor (org.apache.druid.segment.Cursor)22 QueryableIndexStorageAdapter (org.apache.druid.segment.QueryableIndexStorageAdapter)22 Test (org.junit.Test)16 Benchmark (org.openjdk.jmh.annotations.Benchmark)14 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)14 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)14 Filter (org.apache.druid.query.filter.Filter)13 DimensionSelector (org.apache.druid.segment.DimensionSelector)11 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)11 DimFilter (org.apache.druid.query.filter.DimFilter)10 SelectorFilter (org.apache.druid.segment.filter.SelectorFilter)10 Interval (org.joda.time.Interval)10 List (java.util.List)9 AndDimFilter (org.apache.druid.query.filter.AndDimFilter)9 BoundDimFilter (org.apache.druid.query.filter.BoundDimFilter)9 OrDimFilter (org.apache.druid.query.filter.OrDimFilter)9 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)9 ColumnSelectorFactory (org.apache.druid.segment.ColumnSelectorFactory)8 Filters (org.apache.druid.segment.filter.Filters)8