use of io.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class DruidQueryBuilder method toTopNQuery.
/**
* Return this query as a TopN query, or null if this query is not compatible with TopN.
*
* @param dataSource data source to query
* @param sourceRowSignature row signature of the dataSource
* @param context query context
* @param maxTopNLimit maxTopNLimit from a PlannerConfig
* @param useApproximateTopN from a PlannerConfig
*
* @return query or null
*/
public TopNQuery toTopNQuery(final DataSource dataSource, final RowSignature sourceRowSignature, final Map<String, Object> context, final int maxTopNLimit, final boolean useApproximateTopN) {
// Must have GROUP BY one column, ORDER BY zero or one column, limit less than maxTopNLimit, and no HAVING.
final boolean topNOk = grouping != null && grouping.getDimensions().size() == 1 && limitSpec != null && (limitSpec.getColumns().size() <= 1 && limitSpec.getLimit() <= maxTopNLimit) && having == null;
if (!topNOk) {
return null;
}
final DimensionSpec dimensionSpec = Iterables.getOnlyElement(grouping.getDimensions());
final OrderByColumnSpec limitColumn;
if (limitSpec.getColumns().isEmpty()) {
limitColumn = new OrderByColumnSpec(dimensionSpec.getOutputName(), OrderByColumnSpec.Direction.ASCENDING, StringComparators.LEXICOGRAPHIC);
} else {
limitColumn = Iterables.getOnlyElement(limitSpec.getColumns());
}
final TopNMetricSpec topNMetricSpec;
if (limitColumn.getDimension().equals(dimensionSpec.getOutputName())) {
// DimensionTopNMetricSpec is exact; always return it even if allowApproximate is false.
final DimensionTopNMetricSpec baseMetricSpec = new DimensionTopNMetricSpec(null, limitColumn.getDimensionComparator());
topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? baseMetricSpec : new InvertedTopNMetricSpec(baseMetricSpec);
} else if (useApproximateTopN) {
// ORDER BY metric
final NumericTopNMetricSpec baseMetricSpec = new NumericTopNMetricSpec(limitColumn.getDimension());
topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? new InvertedTopNMetricSpec(baseMetricSpec) : baseMetricSpec;
} else {
return null;
}
final Filtration filtration = Filtration.create(filter).optimize(sourceRowSignature);
return new TopNQuery(dataSource, VirtualColumns.EMPTY, Iterables.getOnlyElement(grouping.getDimensions()), topNMetricSpec, limitSpec.getLimit(), filtration.getQuerySegmentSpec(), filtration.getDimFilter(), Granularities.ALL, grouping.getAggregatorFactories(), grouping.getPostAggregators(), context);
}
use of io.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class RowBasedGrouperHelper method makeValueConvertFunctions.
@SuppressWarnings("unchecked")
private static Function<Comparable, Comparable>[] makeValueConvertFunctions(final Map<String, ValueType> rawInputRowSignature, final List<DimensionSpec> dimensions) {
final List<ValueType> valueTypes = Lists.newArrayListWithCapacity(dimensions.size());
for (DimensionSpec dimensionSpec : dimensions) {
final ValueType valueType = rawInputRowSignature.get(dimensionSpec);
valueTypes.add(valueType == null ? ValueType.STRING : valueType);
}
return makeValueConvertFunctions(valueTypes);
}
use of io.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByQueryHelper method createIndexAccumulatorPair.
public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, final GroupByQueryConfig config, StupidPool<ByteBuffer> bufferPool, final boolean combine) {
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final Granularity gran = query.getGranularity();
final long timeStart = query.getIntervals().get(0).getStartMillis();
long granTimeStart = timeStart;
if (!(Granularities.ALL.equals(gran))) {
granTimeStart = gran.bucketStart(new DateTime(timeStart)).getMillis();
}
final List<AggregatorFactory> aggs;
if (combine) {
aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {
@Override
public AggregatorFactory apply(AggregatorFactory input) {
return input.getCombiningFactory();
}
});
} else {
aggs = query.getAggregatorSpecs();
}
final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {
@Override
public String apply(DimensionSpec input) {
return input.getOutputName();
}
});
final IncrementalIndex index;
final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
// All groupBy dimensions are strings, for now.
final List<DimensionSchema> dimensionSchemas = Lists.newArrayList();
for (DimensionSpec dimension : query.getDimensions()) {
dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
}
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas, null, null)).withMetrics(aggs.toArray(new AggregatorFactory[aggs.size()])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
if (query.getContextValue("useOffheap", false)) {
index = new OffheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults(), bufferPool);
} else {
index = new OnheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults());
}
Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {
@Override
public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
if (in instanceof MapBasedRow) {
try {
MapBasedRow row = (MapBasedRow) in;
accumulated.add(new MapBasedInputRow(row.getTimestamp(), dimensions, row.getEvent()));
} catch (IndexSizeExceededException e) {
throw new ResourceLimitExceededException(e.getMessage());
}
} else {
throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
}
return accumulated;
}
};
return new Pair<>(index, accumulator);
}
use of io.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<Row, Object, GroupByQuery> getCacheStrategy(final GroupByQuery query) {
return new CacheStrategy<Row, Object, GroupByQuery>() {
private static final byte CACHE_STRATEGY_VERSION = 0x1;
private final List<AggregatorFactory> aggs = query.getAggregatorSpecs();
private final List<DimensionSpec> dims = query.getDimensions();
@Override
public boolean isCacheable(GroupByQuery query, boolean willMergeRunners) {
return strategySelector.strategize(query).isCacheable(willMergeRunners);
}
@Override
public byte[] computeCacheKey(GroupByQuery query) {
return new CacheKeyBuilder(GROUPBY_QUERY).appendByte(CACHE_STRATEGY_VERSION).appendCacheable(query.getGranularity()).appendCacheable(query.getDimFilter()).appendCacheablesIgnoringOrder(query.getAggregatorSpecs()).appendCacheablesIgnoringOrder(query.getDimensions()).appendCacheable(query.getVirtualColumns()).build();
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<Row, Object> prepareForCache() {
return new Function<Row, Object>() {
@Override
public Object apply(Row input) {
if (input instanceof MapBasedRow) {
final MapBasedRow row = (MapBasedRow) input;
final List<Object> retVal = Lists.newArrayListWithCapacity(1 + dims.size() + aggs.size());
retVal.add(row.getTimestamp().getMillis());
Map<String, Object> event = row.getEvent();
for (DimensionSpec dim : dims) {
retVal.add(event.get(dim.getOutputName()));
}
for (AggregatorFactory agg : aggs) {
retVal.add(event.get(agg.getName()));
}
return retVal;
}
throw new ISE("Don't know how to cache input rows of type[%s]", input.getClass());
}
};
}
@Override
public Function<Object, Row> pullFromCache() {
return new Function<Object, Row>() {
private final Granularity granularity = query.getGranularity();
@Override
public Row apply(Object input) {
Iterator<Object> results = ((List<Object>) input).iterator();
DateTime timestamp = granularity.toDateTime(((Number) results.next()).longValue());
Map<String, Object> event = Maps.newLinkedHashMap();
Iterator<DimensionSpec> dimsIter = dims.iterator();
while (dimsIter.hasNext() && results.hasNext()) {
final DimensionSpec factory = dimsIter.next();
event.put(factory.getOutputName(), results.next());
}
Iterator<AggregatorFactory> aggsIter = aggs.iterator();
while (aggsIter.hasNext() && results.hasNext()) {
final AggregatorFactory factory = aggsIter.next();
event.put(factory.getName(), factory.deserialize(results.next()));
}
if (dimsIter.hasNext() || aggsIter.hasNext() || results.hasNext()) {
throw new ISE("Found left over objects while reading from cache!! dimsIter[%s] aggsIter[%s] results[%s]", dimsIter.hasNext(), aggsIter.hasNext(), results.hasNext());
}
return new MapBasedRow(timestamp, event);
}
};
}
};
}
use of io.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByQueryQueryToolChest method makePostComputeManipulatorFn.
@Override
public Function<Row, Row> makePostComputeManipulatorFn(final GroupByQuery query, final MetricManipulationFn fn) {
final Set<String> optimizedDims = ImmutableSet.copyOf(Iterables.transform(extractionsToRewrite(query), new Function<DimensionSpec, String>() {
@Override
public String apply(DimensionSpec input) {
return input.getOutputName();
}
}));
final Function<Row, Row> preCompute = makePreComputeManipulatorFn(query, fn);
if (optimizedDims.isEmpty()) {
return preCompute;
}
// If we have optimizations that can be done at this level, we apply them here
final Map<String, ExtractionFn> extractionFnMap = new HashMap<>();
for (DimensionSpec dimensionSpec : query.getDimensions()) {
final String dimension = dimensionSpec.getOutputName();
if (optimizedDims.contains(dimension)) {
extractionFnMap.put(dimension, dimensionSpec.getExtractionFn());
}
}
return new Function<Row, Row>() {
@Nullable
@Override
public Row apply(Row input) {
Row preRow = preCompute.apply(input);
if (preRow instanceof MapBasedRow) {
MapBasedRow preMapRow = (MapBasedRow) preRow;
Map<String, Object> event = Maps.newHashMap(preMapRow.getEvent());
for (String dim : optimizedDims) {
final Object eventVal = event.get(dim);
event.put(dim, extractionFnMap.get(dim).apply(eventVal));
}
return new MapBasedRow(preMapRow.getTimestamp(), event);
} else {
return preRow;
}
}
};
}
Aggregations