use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByBinaryFnV2 method apply.
@Override
@Nullable
public ResultRow apply(@Nullable final ResultRow arg1, @Nullable final ResultRow arg2) {
if (arg1 == null) {
return arg2;
} else if (arg2 == null) {
return arg1;
}
final ResultRow newResult = ResultRow.create(query.getResultRowSizeWithoutPostAggregators());
// Add timestamp.
if (query.getResultRowHasTimestamp()) {
newResult.set(0, adjustTimestamp(arg1));
}
// Add dimensions.
final int dimensionStart = query.getResultRowDimensionStart();
final List<DimensionSpec> dimensions = query.getDimensions();
for (int i = 0; i < dimensions.size(); i++) {
final int rowIndex = dimensionStart + i;
newResult.set(rowIndex, arg1.get(rowIndex));
}
// Add aggregations.
final int aggregatorStart = query.getResultRowAggregatorStart();
final List<AggregatorFactory> aggregatorSpecs = query.getAggregatorSpecs();
for (int i = 0; i < aggregatorSpecs.size(); i++) {
final AggregatorFactory aggregatorFactory = aggregatorSpecs.get(i);
final int rowIndex = aggregatorStart + i;
newResult.set(rowIndex, aggregatorFactory.combine(arg1.get(rowIndex), arg2.get(rowIndex)));
}
return newResult;
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByQueryHelper method createIndexAccumulatorPair.
public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config) {
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final Granularity gran = query.getGranularity();
final long timeStart = query.getIntervals().get(0).getStartMillis();
final boolean combine = subquery == null;
long granTimeStart = timeStart;
if (!(Granularities.ALL.equals(gran))) {
granTimeStart = gran.bucketStart(timeStart);
}
final List<AggregatorFactory> aggs;
if (combine) {
aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {
@Override
public AggregatorFactory apply(AggregatorFactory input) {
return input.getCombiningFactory();
}
});
} else {
aggs = query.getAggregatorSpecs();
}
final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {
@Override
public String apply(DimensionSpec input) {
return input.getOutputName();
}
});
final IncrementalIndex index;
final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
// All groupBy dimensions are strings, for now.
final List<DimensionSchema> dimensionSchemas = new ArrayList<>();
for (DimensionSpec dimension : query.getDimensions()) {
dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
}
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas)).withMetrics(aggs.toArray(new AggregatorFactory[0])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
final AppendableIndexBuilder indexBuilder;
if (query.getContextValue("useOffheap", false)) {
throw new UnsupportedOperationException("The 'useOffheap' option is no longer available for groupBy v1. Please move to the newer groupBy engine, " + "which always operates off-heap, by removing any custom 'druid.query.groupBy.defaultStrategy' runtime " + "properties and 'groupByStrategy' query context parameters that you have set.");
} else {
indexBuilder = new OnheapIncrementalIndex.Builder();
}
index = indexBuilder.setIndexSchema(indexSchema).setDeserializeComplexMetrics(false).setConcurrentEventAdd(true).setSortFacts(sortResults).setMaxRowCount(querySpecificConfig.getMaxResults()).build();
Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {
@Override
public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
final MapBasedRow mapBasedRow;
if (in instanceof MapBasedRow) {
mapBasedRow = (MapBasedRow) in;
} else if (in instanceof ResultRow) {
final ResultRow row = (ResultRow) in;
mapBasedRow = row.toMapBasedRow(combine ? query : subquery);
} else {
throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
}
try {
accumulated.add(new MapBasedInputRow(mapBasedRow.getTimestamp(), dimensions, mapBasedRow.getEvent()));
} catch (IndexSizeExceededException e) {
throw new ResourceLimitExceededException(e.getMessage());
}
return accumulated;
}
};
return new Pair<>(index, accumulator);
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class DefaultLimitSpec method makeComparator.
private Ordering<ResultRow> makeComparator(RowSignature rowSignature, boolean hasTimestamp, List<DimensionSpec> dimensions, List<AggregatorFactory> aggs, List<PostAggregator> postAggs, boolean sortByDimsFirst) {
final Ordering<ResultRow> timeOrdering;
if (hasTimestamp) {
timeOrdering = new Ordering<ResultRow>() {
@Override
public int compare(ResultRow left, ResultRow right) {
return Longs.compare(left.getLong(0), right.getLong(0));
}
};
} else {
timeOrdering = null;
}
Map<String, DimensionSpec> dimensionsMap = new HashMap<>();
for (DimensionSpec spec : dimensions) {
dimensionsMap.put(spec.getOutputName(), spec);
}
Map<String, AggregatorFactory> aggregatorsMap = new HashMap<>();
for (final AggregatorFactory agg : aggs) {
aggregatorsMap.put(agg.getName(), agg);
}
Map<String, PostAggregator> postAggregatorsMap = new HashMap<>();
for (PostAggregator postAgg : postAggs) {
postAggregatorsMap.put(postAgg.getName(), postAgg);
}
Ordering<ResultRow> ordering = null;
for (OrderByColumnSpec columnSpec : columns) {
String columnName = columnSpec.getDimension();
Ordering<ResultRow> nextOrdering = null;
final int columnIndex = rowSignature.indexOf(columnName);
if (columnIndex >= 0) {
if (postAggregatorsMap.containsKey(columnName)) {
// noinspection unchecked
nextOrdering = metricOrdering(columnIndex, postAggregatorsMap.get(columnName).getComparator());
} else if (aggregatorsMap.containsKey(columnName)) {
// noinspection unchecked
nextOrdering = metricOrdering(columnIndex, aggregatorsMap.get(columnName).getComparator());
} else if (dimensionsMap.containsKey(columnName)) {
Optional<DimensionSpec> dimensionSpec = dimensions.stream().filter(ds -> ds.getOutputName().equals(columnName)).findFirst();
if (!dimensionSpec.isPresent()) {
throw new ISE("Could not find the dimension spec for ordering column %s", columnName);
}
nextOrdering = dimensionOrdering(columnIndex, dimensionSpec.get().getOutputType(), columnSpec.getDimensionComparator());
}
}
if (nextOrdering == null) {
throw new ISE("Unknown column in order clause[%s]", columnSpec);
}
if (columnSpec.getDirection() == OrderByColumnSpec.Direction.DESCENDING) {
nextOrdering = nextOrdering.reverse();
}
ordering = ordering == null ? nextOrdering : ordering.compound(nextOrdering);
}
if (ordering == null) {
ordering = timeOrdering;
} else if (timeOrdering != null) {
ordering = sortByDimsFirst ? ordering.compound(timeOrdering) : timeOrdering.compound(ordering);
}
// noinspection unchecked
return ordering != null ? ordering : (Ordering) Ordering.allEqual();
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class QueryableIndexVectorColumnSelectorFactory method makeSingleValueDimensionSelector.
@Override
public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(final DimensionSpec dimensionSpec) {
if (!dimensionSpec.canVectorize()) {
throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec);
}
Function<DimensionSpec, SingleValueDimensionVectorSelector> mappingFunction = spec -> {
if (virtualColumns.exists(spec.getDimension())) {
SingleValueDimensionVectorSelector dimensionSelector = virtualColumns.makeSingleValueDimensionVectorSelector(dimensionSpec, index, offset);
if (dimensionSelector == null) {
return virtualColumns.makeSingleValueDimensionVectorSelector(dimensionSpec, this);
} else {
return dimensionSelector;
}
}
final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
if (holder == null || !holder.getCapabilities().isDictionaryEncoded().isTrue() || !holder.getCapabilities().is(ValueType.STRING)) {
// Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls.
return NilVectorSelector.create(offset);
}
if (holder.getCapabilities().hasMultipleValues().isMaybeTrue()) {
// Asking for a single-value dimension selector on a multi-value column gets you an error.
throw new ISE("Column[%s] is multi-value, do not ask for a single-value selector", spec.getDimension());
}
@SuppressWarnings("unchecked") final DictionaryEncodedColumn<String> dictionaryEncodedColumn = (DictionaryEncodedColumn<String>) getCachedColumn(spec.getDimension());
// dictionaryEncodedColumn is not null because of holder null check above
assert dictionaryEncodedColumn != null;
final SingleValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeSingleValueDimensionVectorSelector(offset);
return spec.decorate(selector);
};
// We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through
// virtual column references, triggering a ConcurrentModificationException in JDK 9 and above.
SingleValueDimensionVectorSelector selector = singleValueDimensionSelectorCache.get(dimensionSpec);
if (selector == null) {
selector = mappingFunction.apply(dimensionSpec);
singleValueDimensionSelectorCache.put(dimensionSpec, selector);
}
return selector;
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class QueryableIndexVectorColumnSelectorFactory method makeMultiValueDimensionSelector.
@Override
public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final DimensionSpec dimensionSpec) {
if (!dimensionSpec.canVectorize()) {
throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec);
}
Function<DimensionSpec, MultiValueDimensionVectorSelector> mappingFunction = spec -> {
if (virtualColumns.exists(spec.getDimension())) {
MultiValueDimensionVectorSelector dimensionSelector = virtualColumns.makeMultiValueDimensionVectorSelector(dimensionSpec, index, offset);
if (dimensionSelector == null) {
return virtualColumns.makeMultiValueDimensionVectorSelector(dimensionSpec, this);
} else {
return dimensionSelector;
}
}
final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
if (holder == null || holder.getCapabilities().isDictionaryEncoded().isFalse() || !holder.getCapabilities().is(ValueType.STRING) || holder.getCapabilities().hasMultipleValues().isFalse()) {
throw new ISE("Column[%s] is not a multi-value string column, do not ask for a multi-value selector", spec.getDimension());
}
@SuppressWarnings("unchecked") final DictionaryEncodedColumn<String> dictionaryEncodedColumn = (DictionaryEncodedColumn<String>) getCachedColumn(spec.getDimension());
// dictionaryEncodedColumn is not null because of holder null check above
assert dictionaryEncodedColumn != null;
final MultiValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeMultiValueDimensionVectorSelector(offset);
return spec.decorate(selector);
};
// We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through
// virtual column references, triggering a ConcurrentModificationException in JDK 9 and above.
MultiValueDimensionVectorSelector selector = multiValueDimensionSelectorCache.get(dimensionSpec);
if (selector == null) {
selector = mappingFunction.apply(dimensionSpec);
multiValueDimensionSelectorCache.put(dimensionSpec, selector);
}
return selector;
}
Aggregations