use of io.druid.segment.column.ValueType in project druid by druid-io.
the class DefaultLimitSpec method build.
@Override
public Function<Sequence<Row>, Sequence<Row>> build(List<DimensionSpec> dimensions, List<AggregatorFactory> aggs, List<PostAggregator> postAggs) {
// Can avoid re-sorting if the natural ordering is good enough.
boolean sortingNeeded = false;
if (dimensions.size() < columns.size()) {
sortingNeeded = true;
}
final Set<String> aggAndPostAggNames = Sets.newHashSet();
for (AggregatorFactory agg : aggs) {
aggAndPostAggNames.add(agg.getName());
}
for (PostAggregator postAgg : postAggs) {
aggAndPostAggNames.add(postAgg.getName());
}
if (!sortingNeeded) {
for (int i = 0; i < columns.size(); i++) {
final OrderByColumnSpec columnSpec = columns.get(i);
if (aggAndPostAggNames.contains(columnSpec.getDimension())) {
sortingNeeded = true;
break;
}
final ValueType columnType = getOrderByType(columnSpec, dimensions);
final StringComparator naturalComparator;
if (columnType == ValueType.STRING) {
naturalComparator = StringComparators.LEXICOGRAPHIC;
} else if (columnType == ValueType.LONG || columnType == ValueType.FLOAT) {
naturalComparator = StringComparators.NUMERIC;
} else {
sortingNeeded = true;
break;
}
if (columnSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING || !columnSpec.getDimensionComparator().equals(naturalComparator) || !columnSpec.getDimension().equals(dimensions.get(i).getOutputName())) {
sortingNeeded = true;
break;
}
}
}
if (!sortingNeeded) {
return limit == Integer.MAX_VALUE ? Functions.<Sequence<Row>>identity() : new LimitingFn(limit);
}
// Materialize the Comparator first for fast-fail error checking.
final Ordering<Row> ordering = makeComparator(dimensions, aggs, postAggs);
if (limit == Integer.MAX_VALUE) {
return new SortingFn(ordering);
} else {
return new TopNFunction(ordering, limit);
}
}
use of io.druid.segment.column.ValueType in project druid by druid-io.
the class SegmentAnalyzer method analyze.
public Map<String, ColumnAnalysis> analyze(Segment segment) {
Preconditions.checkNotNull(segment, "segment");
// index is null for incremental-index-based segments, but storageAdapter is always available
final QueryableIndex index = segment.asQueryableIndex();
final StorageAdapter storageAdapter = segment.asStorageAdapter();
// get length and column names from storageAdapter
final int length = storageAdapter.getNumRows();
final Set<String> columnNames = Sets.newHashSet();
Iterables.addAll(columnNames, storageAdapter.getAvailableDimensions());
Iterables.addAll(columnNames, storageAdapter.getAvailableMetrics());
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
for (String columnName : columnNames) {
final Column column = index == null ? null : index.getColumn(columnName);
final ColumnCapabilities capabilities = column != null ? column.getCapabilities() : storageAdapter.getColumnCapabilities(columnName);
final ColumnAnalysis analysis;
final ValueType type = capabilities.getType();
switch(type) {
case LONG:
analysis = analyzeNumericColumn(capabilities, length, Longs.BYTES);
break;
case FLOAT:
analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
break;
case STRING:
if (index != null) {
analysis = analyzeStringColumn(capabilities, column);
} else {
analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
}
break;
case COMPLEX:
analysis = analyzeComplexColumn(capabilities, column, storageAdapter.getColumnTypeName(columnName));
break;
default:
log.warn("Unknown column type[%s].", type);
analysis = ColumnAnalysis.error(String.format("unknown_type_%s", type));
}
columns.put(columnName, analysis);
}
// Add time column too
ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(Column.TIME_COLUMN_NAME);
if (timeCapabilities == null) {
timeCapabilities = new ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false);
}
columns.put(Column.TIME_COLUMN_NAME, analyzeNumericColumn(timeCapabilities, length, NUM_BYTES_IN_TIMESTAMP));
return columns;
}
use of io.druid.segment.column.ValueType in project druid by druid-io.
the class GroupByQueryHelper method rowSignatureFor.
/**
* Returns types for fields that will appear in the Rows output from "query". Useful for feeding them into
* {@link RowBasedColumnSelectorFactory}.
*
* @param query groupBy query
*
* @return row types
*/
public static Map<String, ValueType> rowSignatureFor(final GroupByQuery query) {
final ImmutableMap.Builder<String, ValueType> types = ImmutableMap.builder();
for (DimensionSpec dimensionSpec : query.getDimensions()) {
types.put(dimensionSpec.getOutputName(), dimensionSpec.getOutputType());
}
for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
final String typeName = aggregatorFactory.getTypeName();
final ValueType valueType = typeName != null ? Enums.getIfPresent(ValueType.class, typeName.toUpperCase()).orNull() : null;
if (valueType != null) {
types.put(aggregatorFactory.getName(), valueType);
}
}
// Don't include post-aggregators since we don't know what types they are.
return types.build();
}
use of io.druid.segment.column.ValueType in project druid by druid-io.
the class RowBasedGrouperHelper method getValueSuppliersForDimensions.
@SuppressWarnings("unchecked")
private static Supplier<Comparable>[] getValueSuppliersForDimensions(final ColumnSelectorFactory columnSelectorFactory, final List<DimensionSpec> dimensions, final Map<String, ValueType> rawInputRowSignature) {
final Supplier[] inputRawSuppliers = new Supplier[dimensions.size()];
for (int i = 0; i < dimensions.size(); i++) {
final ColumnValueSelector selector = DimensionHandlerUtils.getColumnValueSelectorFromDimensionSpec(dimensions.get(i), columnSelectorFactory);
ValueType type = rawInputRowSignature.get(dimensions.get(i).getDimension());
if (type == null) {
// Subquery post-aggs aren't added to the rowSignature (see rowSignatureFor() in GroupByQueryHelper) because
// their types aren't known, so default to String handling.
type = ValueType.STRING;
}
switch(type) {
case STRING:
inputRawSuppliers[i] = new Supplier<Comparable>() {
@Override
public Comparable get() {
final String value;
IndexedInts index = ((DimensionSelector) selector).getRow();
value = index.size() == 0 ? "" : ((DimensionSelector) selector).lookupName(index.get(0));
return Strings.nullToEmpty(value);
}
};
break;
case LONG:
inputRawSuppliers[i] = new Supplier<Comparable>() {
@Override
public Comparable get() {
return ((LongColumnSelector) selector).get();
}
};
break;
case FLOAT:
inputRawSuppliers[i] = new Supplier<Comparable>() {
@Override
public Comparable get() {
return ((FloatColumnSelector) selector).get();
}
};
break;
default:
throw new IAE("invalid type: [%s]", type);
}
}
return inputRawSuppliers;
}
use of io.druid.segment.column.ValueType in project druid by druid-io.
the class RowBasedGrouperHelper method createGrouperAccumulatorPair.
/**
* If isInputRaw is true, transformations such as timestamp truncation and extraction functions have not
* been applied to the input rows yet, for example, in a nested query, if an extraction function is being
* applied in the outer query to a field of the inner query. This method must apply those transformations.
*/
public static Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> createGrouperAccumulatorPair(final GroupByQuery query, final boolean isInputRaw, final Map<String, ValueType> rawInputRowSignature, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, final AggregatorFactory[] aggregatorFactories) {
// concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
final List<ValueType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final boolean includeTimestamp = GroupByStrategyV2.getUniversalTimestamp(query) == null;
final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions().size(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes);
final ThreadLocal<Row> columnSelectorRow = new ThreadLocal<>();
final ColumnSelectorFactory columnSelectorFactory = query.getVirtualColumns().wrap(RowBasedColumnSelectorFactory.create(columnSelectorRow, rawInputRowSignature));
final Grouper<RowBasedKey> grouper;
if (concurrencyHint == -1) {
grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true);
} else {
grouper = new ConcurrentGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, concurrencyHint);
}
final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, isInputRaw, includeTimestamp, columnSelectorFactory, rawInputRowSignature, valueTypes);
final Accumulator<Grouper<RowBasedKey>, Row> accumulator = new Accumulator<Grouper<RowBasedKey>, Row>() {
@Override
public Grouper<RowBasedKey> accumulate(final Grouper<RowBasedKey> theGrouper, final Row row) {
BaseQuery.checkInterrupted();
if (theGrouper == null) {
// Pass-through null returns without doing more work.
return null;
}
if (!theGrouper.isInitialized()) {
theGrouper.init();
}
columnSelectorRow.set(row);
final Comparable[] key = new Comparable[keySize];
valueExtractFn.apply(row, key);
final boolean didAggregate = theGrouper.aggregate(new RowBasedKey(key));
if (!didAggregate) {
// null return means grouping resources were exhausted.
return null;
}
columnSelectorRow.set(null);
return theGrouper;
}
};
return new Pair<>(grouper, accumulator);
}
Aggregations