Search in sources :

Example 21 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class SegmentAnalyzer method analyze.

public Map<String, ColumnAnalysis> analyze(Segment segment) {
    Preconditions.checkNotNull(segment, "segment");
    // index is null for incremental-index-based segments, but storageAdapter is always available
    final QueryableIndex index = segment.asQueryableIndex();
    final StorageAdapter storageAdapter = segment.asStorageAdapter();
    // get length and column names from storageAdapter
    final int length = storageAdapter.getNumRows();
    final Set<String> columnNames = Sets.newHashSet();
    Iterables.addAll(columnNames, storageAdapter.getAvailableDimensions());
    Iterables.addAll(columnNames, storageAdapter.getAvailableMetrics());
    Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
    for (String columnName : columnNames) {
        final Column column = index == null ? null : index.getColumn(columnName);
        final ColumnCapabilities capabilities = column != null ? column.getCapabilities() : storageAdapter.getColumnCapabilities(columnName);
        final ColumnAnalysis analysis;
        final ValueType type = capabilities.getType();
        switch(type) {
            case LONG:
                analysis = analyzeNumericColumn(capabilities, length, Longs.BYTES);
                break;
            case FLOAT:
                analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
                break;
            case STRING:
                if (index != null) {
                    analysis = analyzeStringColumn(capabilities, column);
                } else {
                    analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
                }
                break;
            case COMPLEX:
                analysis = analyzeComplexColumn(capabilities, column, storageAdapter.getColumnTypeName(columnName));
                break;
            default:
                log.warn("Unknown column type[%s].", type);
                analysis = ColumnAnalysis.error(String.format("unknown_type_%s", type));
        }
        columns.put(columnName, analysis);
    }
    // Add time column too
    ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(Column.TIME_COLUMN_NAME);
    if (timeCapabilities == null) {
        timeCapabilities = new ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false);
    }
    columns.put(Column.TIME_COLUMN_NAME, analyzeNumericColumn(timeCapabilities, length, NUM_BYTES_IN_TIMESTAMP));
    return columns;
}
Also used : ComplexColumn(io.druid.segment.column.ComplexColumn) Column(io.druid.segment.column.Column) ValueType(io.druid.segment.column.ValueType) QueryableIndex(io.druid.segment.QueryableIndex) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) StorageAdapter(io.druid.segment.StorageAdapter) ColumnCapabilities(io.druid.segment.column.ColumnCapabilities) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl)

Example 22 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class GroupByQueryHelper method rowSignatureFor.

/**
   * Returns types for fields that will appear in the Rows output from "query". Useful for feeding them into
   * {@link RowBasedColumnSelectorFactory}.
   *
   * @param query groupBy query
   *
   * @return row types
   */
public static Map<String, ValueType> rowSignatureFor(final GroupByQuery query) {
    final ImmutableMap.Builder<String, ValueType> types = ImmutableMap.builder();
    for (DimensionSpec dimensionSpec : query.getDimensions()) {
        types.put(dimensionSpec.getOutputName(), dimensionSpec.getOutputType());
    }
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        final String typeName = aggregatorFactory.getTypeName();
        final ValueType valueType = typeName != null ? Enums.getIfPresent(ValueType.class, typeName.toUpperCase()).orNull() : null;
        if (valueType != null) {
            types.put(aggregatorFactory.getName(), valueType);
        }
    }
    // Don't include post-aggregators since we don't know what types they are.
    return types.build();
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) ValueType(io.druid.segment.column.ValueType) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 23 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class RowBasedGrouperHelper method getValueSuppliersForDimensions.

@SuppressWarnings("unchecked")
private static Supplier<Comparable>[] getValueSuppliersForDimensions(final ColumnSelectorFactory columnSelectorFactory, final List<DimensionSpec> dimensions, final Map<String, ValueType> rawInputRowSignature) {
    final Supplier[] inputRawSuppliers = new Supplier[dimensions.size()];
    for (int i = 0; i < dimensions.size(); i++) {
        final ColumnValueSelector selector = DimensionHandlerUtils.getColumnValueSelectorFromDimensionSpec(dimensions.get(i), columnSelectorFactory);
        ValueType type = rawInputRowSignature.get(dimensions.get(i).getDimension());
        if (type == null) {
            // Subquery post-aggs aren't added to the rowSignature (see rowSignatureFor() in GroupByQueryHelper) because
            // their types aren't known, so default to String handling.
            type = ValueType.STRING;
        }
        switch(type) {
            case STRING:
                inputRawSuppliers[i] = new Supplier<Comparable>() {

                    @Override
                    public Comparable get() {
                        final String value;
                        IndexedInts index = ((DimensionSelector) selector).getRow();
                        value = index.size() == 0 ? "" : ((DimensionSelector) selector).lookupName(index.get(0));
                        return Strings.nullToEmpty(value);
                    }
                };
                break;
            case LONG:
                inputRawSuppliers[i] = new Supplier<Comparable>() {

                    @Override
                    public Comparable get() {
                        return ((LongColumnSelector) selector).get();
                    }
                };
                break;
            case FLOAT:
                inputRawSuppliers[i] = new Supplier<Comparable>() {

                    @Override
                    public Comparable get() {
                        return ((FloatColumnSelector) selector).get();
                    }
                };
                break;
            default:
                throw new IAE("invalid type: [%s]", type);
        }
    }
    return inputRawSuppliers;
}
Also used : ValueType(io.druid.segment.column.ValueType) IndexedInts(io.druid.segment.data.IndexedInts) Supplier(com.google.common.base.Supplier) IAE(io.druid.java.util.common.IAE) ColumnValueSelector(io.druid.segment.ColumnValueSelector)

Example 24 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class RowBasedGrouperHelper method createGrouperAccumulatorPair.

/**
   * If isInputRaw is true, transformations such as timestamp truncation and extraction functions have not
   * been applied to the input rows yet, for example, in a nested query, if an extraction function is being
   * applied in the outer query to a field of the inner query. This method must apply those transformations.
   */
public static Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> createGrouperAccumulatorPair(final GroupByQuery query, final boolean isInputRaw, final Map<String, ValueType> rawInputRowSignature, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, final AggregatorFactory[] aggregatorFactories) {
    // concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
    Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
    final List<ValueType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final boolean includeTimestamp = GroupByStrategyV2.getUniversalTimestamp(query) == null;
    final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions().size(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes);
    final ThreadLocal<Row> columnSelectorRow = new ThreadLocal<>();
    final ColumnSelectorFactory columnSelectorFactory = query.getVirtualColumns().wrap(RowBasedColumnSelectorFactory.create(columnSelectorRow, rawInputRowSignature));
    final Grouper<RowBasedKey> grouper;
    if (concurrencyHint == -1) {
        grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true);
    } else {
        grouper = new ConcurrentGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, concurrencyHint);
    }
    final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
    final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, isInputRaw, includeTimestamp, columnSelectorFactory, rawInputRowSignature, valueTypes);
    final Accumulator<Grouper<RowBasedKey>, Row> accumulator = new Accumulator<Grouper<RowBasedKey>, Row>() {

        @Override
        public Grouper<RowBasedKey> accumulate(final Grouper<RowBasedKey> theGrouper, final Row row) {
            BaseQuery.checkInterrupted();
            if (theGrouper == null) {
                // Pass-through null returns without doing more work.
                return null;
            }
            if (!theGrouper.isInitialized()) {
                theGrouper.init();
            }
            columnSelectorRow.set(row);
            final Comparable[] key = new Comparable[keySize];
            valueExtractFn.apply(row, key);
            final boolean didAggregate = theGrouper.aggregate(new RowBasedKey(key));
            if (!didAggregate) {
                // null return means grouping resources were exhausted.
                return null;
            }
            columnSelectorRow.set(null);
            return theGrouper;
        }
    };
    return new Pair<>(grouper, accumulator);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) RowBasedColumnSelectorFactory(io.druid.query.groupby.RowBasedColumnSelectorFactory) ColumnSelectorFactory(io.druid.segment.ColumnSelectorFactory) ValueType(io.druid.segment.column.ValueType) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) Pair(io.druid.java.util.common.Pair)

Aggregations

ValueType (io.druid.segment.column.ValueType)24 ISE (io.druid.java.util.common.ISE)8 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)5 DimensionSpec (io.druid.query.dimension.DimensionSpec)4 ColumnCapabilities (io.druid.segment.column.ColumnCapabilities)4 Closer (com.google.common.io.Closer)3 Column (io.druid.segment.column.Column)3 ColumnCapabilitiesImpl (io.druid.segment.column.ColumnCapabilitiesImpl)3 ComplexColumn (io.druid.segment.column.ComplexColumn)3 Function (com.google.common.base.Function)2 Supplier (com.google.common.base.Supplier)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Row (io.druid.data.input.Row)2 IAE (io.druid.java.util.common.IAE)2 ColumnAnalysis (io.druid.query.metadata.metadata.ColumnAnalysis)2 ColumnDescriptor (io.druid.segment.column.ColumnDescriptor)2 DictionaryEncodedColumn (io.druid.segment.column.DictionaryEncodedColumn)2 GenericColumn (io.druid.segment.column.GenericColumn)2 IndexedFloatsGenericColumn (io.druid.segment.column.IndexedFloatsGenericColumn)2 IndexedLongsGenericColumn (io.druid.segment.column.IndexedLongsGenericColumn)2