Search in sources :

Example 21 with RowSignature

use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.

the class ResultRow method toMap.

/**
 * Returns a Map representation of the data in this row. Does not include the timestamp.
 */
public Map<String, Object> toMap(final GroupByQuery query) {
    final RowSignature signature = query.getResultRowSignature();
    final Map<String, Object> map = new HashMap<>();
    for (int i = query.getResultRowDimensionStart(); i < row.length; i++) {
        final String columnName = signature.getColumnName(i);
        map.put(columnName, row[i]);
    }
    return map;
}
Also used : HashMap(java.util.HashMap) RowSignature(org.apache.druid.segment.column.RowSignature)

Example 22 with RowSignature

use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.

the class RowBasedGrouperHelper method createResultRowBasedColumnSelectorFactory.

/**
 * Creates a {@link ColumnSelectorFactory} that can read rows which originate as results of the provided "query".
 *
 * @param query        a groupBy query
 * @param supplier     supplier of result rows from the query
 * @param finalization whether the column capabilities reported by this factory should reflect finalized types
 */
public static ColumnSelectorFactory createResultRowBasedColumnSelectorFactory(final GroupByQuery query, final Supplier<ResultRow> supplier, final RowSignature.Finalization finalization) {
    final RowSignature signature = query.getResultRowSignature(finalization);
    final RowAdapter<ResultRow> adapter = new RowAdapter<ResultRow>() {

        @Override
        public ToLongFunction<ResultRow> timestampFunction() {
            if (query.getResultRowHasTimestamp()) {
                return row -> row.getLong(0);
            } else {
                final long timestamp = query.getUniversalTimestamp().getMillis();
                return row -> timestamp;
            }
        }

        @Override
        public Function<ResultRow, Object> columnFunction(final String columnName) {
            final int columnIndex = signature.indexOf(columnName);
            if (columnIndex < 0) {
                return row -> null;
            } else {
                return row -> row.get(columnIndex);
            }
        }
    };
    // Decorate "signature" so that it returns hasMultipleValues = false. (groupBy does not return multiple values.)
    final ColumnInspector decoratedSignature = new ColumnInspector() {

        @Nullable
        @Override
        public ColumnCapabilities getColumnCapabilities(String column) {
            final ColumnCapabilities baseCapabilities = signature.getColumnCapabilities(column);
            if (baseCapabilities == null || baseCapabilities.hasMultipleValues().isFalse()) {
                return baseCapabilities;
            } else {
                return ColumnCapabilitiesImpl.copyOf(baseCapabilities).setHasMultipleValues(false);
            }
        }
    };
    return RowBasedColumnSelectorFactory.create(adapter, supplier::get, decoratedSignature, false);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) IntArrayUtils(org.apache.druid.common.utils.IntArrayUtils) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) AllGranularity(org.apache.druid.java.util.common.granularity.AllGranularity) IndexedInts(org.apache.druid.segment.data.IndexedInts) ByteBuffer(java.nio.ByteBuffer) Pair(org.apache.druid.java.util.common.Pair) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnSelectorStrategyFactory(org.apache.druid.query.dimension.ColumnSelectorStrategyFactory) JsonValue(com.fasterxml.jackson.annotation.JsonValue) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) BufferComparator(org.apache.druid.query.groupby.epinephelinae.Grouper.BufferComparator) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) IAE(org.apache.druid.java.util.common.IAE) ToLongFunction(java.util.function.ToLongFunction) Longs(com.google.common.primitives.Longs) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) ResultRow(org.apache.druid.query.groupby.ResultRow) Predicate(java.util.function.Predicate) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) Collectors(java.util.stream.Collectors) List(java.util.List) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl) BooleanValueMatcher(org.apache.druid.segment.filter.BooleanValueMatcher) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) BaseDoubleColumnValueSelector(org.apache.druid.segment.BaseDoubleColumnValueSelector) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) Accumulator(org.apache.druid.java.util.common.guava.Accumulator) IntStream(java.util.stream.IntStream) ColumnSelectorPlus(org.apache.druid.query.ColumnSelectorPlus) ComparableList(org.apache.druid.segment.data.ComparableList) Supplier(com.google.common.base.Supplier) BaseQuery(org.apache.druid.query.BaseQuery) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) ColumnSelectorStrategy(org.apache.druid.query.dimension.ColumnSelectorStrategy) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) DimensionSelector(org.apache.druid.segment.DimensionSelector) Nullable(javax.annotation.Nullable) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) ColumnInspector(org.apache.druid.segment.ColumnInspector) StringComparator(org.apache.druid.query.ordering.StringComparator) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) DateTime(org.joda.time.DateTime) Ints(com.google.common.primitives.Ints) BaseLongColumnValueSelector(org.apache.druid.segment.BaseLongColumnValueSelector) Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) Closeable(java.io.Closeable) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) BitSet(java.util.BitSet) IntArrays(it.unimi.dsi.fastutil.ints.IntArrays) Comparator(java.util.Comparator) Filters(org.apache.druid.segment.filter.Filters) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) Filter(org.apache.druid.query.filter.Filter) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnInspector(org.apache.druid.segment.ColumnInspector) RowSignature(org.apache.druid.segment.column.RowSignature) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 23 with RowSignature

use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.

the class QuantileSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
    final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0)));
    if (input == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final String histogramName = StringUtils.format("%s:agg", name);
    final RexNode probabilityArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
    if (!probabilityArg.isA(SqlKind.LITERAL)) {
        // Probability must be a literal in order to plan.
        return null;
    }
    final float probability = ((Number) RexLiteral.value(probabilityArg)).floatValue();
    final int resolution;
    if (aggregateCall.getArgList().size() >= 3) {
        final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
        if (!resolutionArg.isA(SqlKind.LITERAL)) {
            // Resolution must be a literal in order to plan.
            return null;
        }
        resolution = ((Number) RexLiteral.value(resolutionArg)).intValue();
    } else {
        resolution = ApproximateHistogram.DEFAULT_HISTOGRAM_SIZE;
    }
    final int numBuckets = ApproximateHistogram.DEFAULT_BUCKET_SIZE;
    final float lowerLimit = Float.NEGATIVE_INFINITY;
    final float upperLimit = Float.POSITIVE_INFINITY;
    // Look for existing matching aggregatorFactory.
    for (final Aggregation existing : existingAggregations) {
        for (AggregatorFactory factory : existing.getAggregatorFactories()) {
            if (factory instanceof ApproximateHistogramAggregatorFactory) {
                final ApproximateHistogramAggregatorFactory theFactory = (ApproximateHistogramAggregatorFactory) factory;
                // Check input for equivalence.
                final boolean inputMatches;
                final DruidExpression virtualInput = virtualColumnRegistry.findVirtualColumnExpressions(theFactory.requiredFields()).stream().findFirst().orElse(null);
                if (virtualInput == null) {
                    inputMatches = input.isDirectColumnAccess() && input.getDirectColumn().equals(theFactory.getFieldName());
                } else {
                    inputMatches = virtualInput.equals(input);
                }
                final boolean matches = inputMatches && theFactory.getResolution() == resolution && theFactory.getNumBuckets() == numBuckets && theFactory.getLowerLimit() == lowerLimit && theFactory.getUpperLimit() == upperLimit;
                if (matches) {
                    // Found existing one. Use this.
                    return Aggregation.create(ImmutableList.of(), new QuantilePostAggregator(name, factory.getName(), probability));
                }
            }
        }
    }
    // No existing match found. Create a new one.
    if (input.isDirectColumnAccess()) {
        if (rowSignature.getColumnType(input.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
            aggregatorFactory = new ApproximateHistogramFoldingAggregatorFactory(histogramName, input.getDirectColumn(), resolution, numBuckets, lowerLimit, upperLimit, false);
        } else {
            aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, input.getDirectColumn(), resolution, numBuckets, lowerLimit, upperLimit, false);
        }
    } else {
        final String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, ColumnType.FLOAT);
        aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, virtualColumnName, resolution, numBuckets, lowerLimit, upperLimit, false);
    }
    return Aggregation.create(ImmutableList.of(aggregatorFactory), new QuantilePostAggregator(name, histogramName, probability));
}
Also used : Project(org.apache.calcite.rel.core.Project) SqlAggregator(org.apache.druid.sql.calcite.aggregation.SqlAggregator) ReturnTypes(org.apache.calcite.sql.type.ReturnTypes) QuantilePostAggregator(org.apache.druid.query.aggregation.histogram.QuantilePostAggregator) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) ApproximateHistogram(org.apache.druid.query.aggregation.histogram.ApproximateHistogram) ImmutableList(com.google.common.collect.ImmutableList) RexNode(org.apache.calcite.rex.RexNode) VirtualColumnRegistry(org.apache.druid.sql.calcite.rel.VirtualColumnRegistry) PlannerContext(org.apache.druid.sql.calcite.planner.PlannerContext) Nullable(javax.annotation.Nullable) SqlKind(org.apache.calcite.sql.SqlKind) SqlTypeFamily(org.apache.calcite.sql.type.SqlTypeFamily) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) RexBuilder(org.apache.calcite.rex.RexBuilder) RexLiteral(org.apache.calcite.rex.RexLiteral) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SqlFunctionCategory(org.apache.calcite.sql.SqlFunctionCategory) ApproximateHistogramFoldingAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) StringUtils(org.apache.druid.java.util.common.StringUtils) ValueType(org.apache.druid.segment.column.ValueType) Aggregation(org.apache.druid.sql.calcite.aggregation.Aggregation) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) List(java.util.List) Aggregations(org.apache.druid.sql.calcite.aggregation.Aggregations) RowSignature(org.apache.druid.segment.column.RowSignature) OperandTypes(org.apache.calcite.sql.type.OperandTypes) ColumnType(org.apache.druid.segment.column.ColumnType) AggregateCall(org.apache.calcite.rel.core.AggregateCall) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) Expressions(org.apache.druid.sql.calcite.expression.Expressions) ApproximateHistogramFoldingAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) QuantilePostAggregator(org.apache.druid.query.aggregation.histogram.QuantilePostAggregator) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) ApproximateHistogramFoldingAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) Aggregation(org.apache.druid.sql.calcite.aggregation.Aggregation) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 24 with RowSignature

use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.

the class SegmentAnalyzer method analyze.

public Map<String, ColumnAnalysis> analyze(Segment segment) {
    Preconditions.checkNotNull(segment, "segment");
    // index is null for incremental-index-based segments, but storageAdapter is always available
    final QueryableIndex index = segment.asQueryableIndex();
    final StorageAdapter storageAdapter = segment.asStorageAdapter();
    // get length and column names from storageAdapter
    final int length = storageAdapter.getNumRows();
    Map<String, ColumnAnalysis> columns = new TreeMap<>();
    final RowSignature rowSignature = storageAdapter.getRowSignature();
    for (String columnName : rowSignature.getColumnNames()) {
        final ColumnCapabilities capabilities;
        if (storageAdapter instanceof IncrementalIndexStorageAdapter) {
            // See javadocs for getSnapshotColumnCapabilities for a discussion of why we need to do this.
            capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName);
        } else {
            capabilities = storageAdapter.getColumnCapabilities(columnName);
        }
        final ColumnAnalysis analysis;
        switch(capabilities.getType()) {
            case LONG:
                final int bytesPerRow = ColumnHolder.TIME_COLUMN_NAME.equals(columnName) ? NUM_BYTES_IN_TIMESTAMP : Long.BYTES;
                analysis = analyzeNumericColumn(capabilities, length, bytesPerRow);
                break;
            case FLOAT:
                analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
                break;
            case DOUBLE:
                analysis = analyzeNumericColumn(capabilities, length, Double.BYTES);
                break;
            case STRING:
                if (index != null) {
                    analysis = analyzeStringColumn(capabilities, index.getColumnHolder(columnName));
                } else {
                    analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
                }
                break;
            case COMPLEX:
                final ColumnHolder columnHolder = index != null ? index.getColumnHolder(columnName) : null;
                analysis = analyzeComplexColumn(capabilities, columnHolder);
                break;
            default:
                log.warn("Unknown column type[%s].", capabilities.asTypeString());
                analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", capabilities.asTypeString()));
        }
        columns.put(columnName, analysis);
    }
    return columns;
}
Also used : ColumnHolder(org.apache.druid.segment.column.ColumnHolder) QueryableIndex(org.apache.druid.segment.QueryableIndex) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) StorageAdapter(org.apache.druid.segment.StorageAdapter) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) TreeMap(java.util.TreeMap) RowSignature(org.apache.druid.segment.column.RowSignature) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 25 with RowSignature

use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.

the class InlineDataSourceTest method test_serde_untyped.

@Test
public void test_serde_untyped() throws Exception {
    // Create a row signature with no types set.
    final RowSignature.Builder builder = RowSignature.builder();
    for (String columnName : expectedRowSignature.getColumnNames()) {
        builder.add(columnName, null);
    }
    final RowSignature untypedSignature = builder.build();
    final InlineDataSource untypedDataSource = InlineDataSource.fromIterable(rows, untypedSignature);
    final ObjectMapper jsonMapper = TestHelper.makeJsonMapper();
    final InlineDataSource deserialized = (InlineDataSource) jsonMapper.readValue(jsonMapper.writeValueAsString(untypedDataSource), DataSource.class);
    Assert.assertEquals(untypedDataSource.getColumnNames(), deserialized.getColumnNames());
    Assert.assertEquals(untypedDataSource.getColumnTypes(), deserialized.getColumnTypes());
    Assert.assertEquals(untypedDataSource.getRowSignature(), deserialized.getRowSignature());
    Assert.assertNull(deserialized.getColumnTypes());
    assertRowsEqual(listDataSource.getRows(), deserialized.getRows());
}
Also used : RowSignature(org.apache.druid.segment.column.RowSignature) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Aggregations

RowSignature (org.apache.druid.segment.column.RowSignature)46 ColumnType (org.apache.druid.segment.column.ColumnType)17 List (java.util.List)14 Test (org.junit.Test)13 Collectors (java.util.stream.Collectors)12 Nullable (javax.annotation.Nullable)11 PlannerContext (org.apache.druid.sql.calcite.planner.PlannerContext)11 DruidExpression (org.apache.druid.sql.calcite.expression.DruidExpression)10 Expressions (org.apache.druid.sql.calcite.expression.Expressions)10 Project (org.apache.calcite.rel.core.Project)9 RexLiteral (org.apache.calcite.rex.RexLiteral)9 RexNode (org.apache.calcite.rex.RexNode)9 SqlKind (org.apache.calcite.sql.SqlKind)9 ISE (org.apache.druid.java.util.common.ISE)9 Aggregation (org.apache.druid.sql.calcite.aggregation.Aggregation)9 ArrayList (java.util.ArrayList)8 StringUtils (org.apache.druid.java.util.common.StringUtils)8 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)8 ImmutableList (com.google.common.collect.ImmutableList)6 ImmutableSet (com.google.common.collect.ImmutableSet)6