use of io.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByRules method applyLimit.
/**
* Applies a sort to an aggregating druidRel, as a LimitSpec. Do not call this method unless
* {@link #canApplyLimit(DruidRel)} returns true.
*
* @return new rel, or null if the sort cannot be applied
*/
private static DruidRel applyLimit(final DruidRel druidRel, final Sort sort) {
Preconditions.checkState(canApplyLimit(druidRel), "Cannot applyLimit.");
final Grouping grouping = druidRel.getQueryBuilder().getGrouping();
final DefaultLimitSpec limitSpec = toLimitSpec(druidRel.getQueryBuilder().getRowOrder(), sort);
if (limitSpec == null) {
return null;
}
final List<OrderByColumnSpec> orderBys = limitSpec.getColumns();
final List<DimensionSpec> newDimensions = Lists.newArrayList(grouping.getDimensions());
// Reorder dimensions, maybe, to allow groupBy to consider pushing down sorting (see DefaultLimitSpec).
if (!orderBys.isEmpty()) {
final Map<String, Integer> dimensionOrderByOutputName = Maps.newHashMap();
for (int i = 0; i < newDimensions.size(); i++) {
dimensionOrderByOutputName.put(newDimensions.get(i).getOutputName(), i);
}
for (int i = 0; i < orderBys.size(); i++) {
final OrderByColumnSpec orderBy = orderBys.get(i);
final Integer dimensionOrder = dimensionOrderByOutputName.get(orderBy.getDimension());
if (dimensionOrder != null && dimensionOrder != i && orderBy.getDirection() == OrderByColumnSpec.Direction.ASCENDING && orderBy.getDimensionComparator().equals(StringComparators.LEXICOGRAPHIC)) {
final DimensionSpec tmp = newDimensions.get(i);
newDimensions.set(i, newDimensions.get(dimensionOrder));
newDimensions.set(dimensionOrder, tmp);
dimensionOrderByOutputName.put(newDimensions.get(i).getOutputName(), i);
dimensionOrderByOutputName.put(newDimensions.get(dimensionOrder).getOutputName(), dimensionOrder);
}
}
}
if (!orderBys.isEmpty() || limitSpec.getLimit() < Integer.MAX_VALUE) {
return druidRel.withQueryBuilder(druidRel.getQueryBuilder().withAdjustedGrouping(Grouping.create(newDimensions, grouping.getAggregations()), druidRel.getQueryBuilder().getRowType(), druidRel.getQueryBuilder().getRowOrder()).withLimitSpec(limitSpec));
} else {
return druidRel;
}
}
use of io.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class ApproxCountDistinctSqlAggregator method toDruidAggregation.
@Override
public Aggregation toDruidAggregation(final String name, final RowSignature rowSignature, final DruidOperatorTable operatorTable, final PlannerContext plannerContext, final List<Aggregation> existingAggregations, final Project project, final AggregateCall aggregateCall, final DimFilter filter) {
final RexNode rexNode = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
final RowExtraction rex = Expressions.toRowExtraction(operatorTable, plannerContext, rowSignature.getRowOrder(), rexNode);
if (rex == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
if (rowSignature.getColumnType(rex.getColumn()) == ValueType.COMPLEX) {
aggregatorFactory = new HyperUniquesAggregatorFactory(name, rex.getColumn());
} else {
final SqlTypeName sqlTypeName = rexNode.getType().getSqlTypeName();
final ValueType outputType = Calcites.getValueTypeForSqlTypeName(sqlTypeName);
if (outputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", sqlTypeName, name);
}
final DimensionSpec dimensionSpec = rex.toDimensionSpec(rowSignature, null, ValueType.STRING);
if (dimensionSpec == null) {
return null;
}
aggregatorFactory = new CardinalityAggregatorFactory(name, ImmutableList.of(dimensionSpec), false);
}
return Aggregation.createFinalizable(ImmutableList.<AggregatorFactory>of(aggregatorFactory), null, new PostAggregatorFactory() {
@Override
public PostAggregator factorize(String outputName) {
return new HyperUniqueFinalizingPostAggregator(outputName, name);
}
}).filter(filter);
}
use of io.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class DruidQueryBuilder method toTopNQuery.
/**
* Return this query as a TopN query, or null if this query is not compatible with TopN.
*
* @param dataSource data source to query
* @param sourceRowSignature row signature of the dataSource
* @param context query context
* @param maxTopNLimit maxTopNLimit from a PlannerConfig
* @param useApproximateTopN from a PlannerConfig
*
* @return query or null
*/
public TopNQuery toTopNQuery(final DataSource dataSource, final RowSignature sourceRowSignature, final Map<String, Object> context, final int maxTopNLimit, final boolean useApproximateTopN) {
// Must have GROUP BY one column, ORDER BY zero or one column, limit less than maxTopNLimit, and no HAVING.
final boolean topNOk = grouping != null && grouping.getDimensions().size() == 1 && limitSpec != null && (limitSpec.getColumns().size() <= 1 && limitSpec.getLimit() <= maxTopNLimit) && having == null;
if (!topNOk) {
return null;
}
final DimensionSpec dimensionSpec = Iterables.getOnlyElement(grouping.getDimensions());
final OrderByColumnSpec limitColumn;
if (limitSpec.getColumns().isEmpty()) {
limitColumn = new OrderByColumnSpec(dimensionSpec.getOutputName(), OrderByColumnSpec.Direction.ASCENDING, StringComparators.LEXICOGRAPHIC);
} else {
limitColumn = Iterables.getOnlyElement(limitSpec.getColumns());
}
final TopNMetricSpec topNMetricSpec;
if (limitColumn.getDimension().equals(dimensionSpec.getOutputName())) {
// DimensionTopNMetricSpec is exact; always return it even if allowApproximate is false.
final DimensionTopNMetricSpec baseMetricSpec = new DimensionTopNMetricSpec(null, limitColumn.getDimensionComparator());
topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? baseMetricSpec : new InvertedTopNMetricSpec(baseMetricSpec);
} else if (useApproximateTopN) {
// ORDER BY metric
final NumericTopNMetricSpec baseMetricSpec = new NumericTopNMetricSpec(limitColumn.getDimension());
topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? new InvertedTopNMetricSpec(baseMetricSpec) : baseMetricSpec;
} else {
return null;
}
final Filtration filtration = Filtration.create(filter).optimize(sourceRowSignature);
return new TopNQuery(dataSource, VirtualColumns.EMPTY, Iterables.getOnlyElement(grouping.getDimensions()), topNMetricSpec, limitSpec.getLimit(), filtration.getQuerySegmentSpec(), filtration.getDimFilter(), Granularities.ALL, grouping.getAggregatorFactories(), grouping.getPostAggregators(), context);
}
use of io.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class ScanQueryEngine method process.
public Sequence<ScanResultValue> process(final ScanQuery query, final Segment segment, final Map<String, Object> responseContext) {
if (responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) != null) {
int count = (int) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT);
if (count >= query.getLimit()) {
return Sequences.empty();
}
}
final Long timeoutAt = (long) responseContext.get(ScanQueryRunnerFactory.CTX_TIMEOUT_AT);
final long start = System.currentTimeMillis();
final StorageAdapter adapter = segment.asStorageAdapter();
if (adapter == null) {
throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
List<String> allDims = Lists.newLinkedList(adapter.getAvailableDimensions());
List<String> allMetrics = Lists.newLinkedList(adapter.getAvailableMetrics());
final List<String> allColumns = Lists.newLinkedList();
if (query.getColumns() != null && !query.getColumns().isEmpty()) {
if (!query.getColumns().contains(ScanResultValue.timestampKey)) {
allColumns.add(ScanResultValue.timestampKey);
}
allColumns.addAll(query.getColumns());
allDims.retainAll(query.getColumns());
allMetrics.retainAll(query.getColumns());
} else {
if (!allDims.contains(ScanResultValue.timestampKey)) {
allColumns.add(ScanResultValue.timestampKey);
}
allColumns.addAll(allDims);
allColumns.addAll(allMetrics);
}
final List<DimensionSpec> dims = DefaultDimensionSpec.toSpec(allDims);
final List<String> metrics = allMetrics;
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
final String segmentId = segment.getIdentifier();
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
if (responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) == null) {
responseContext.put(ScanQueryRunnerFactory.CTX_COUNT, 0);
}
final int limit = query.getLimit() - (int) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT);
return Sequences.concat(Sequences.map(adapter.makeCursors(filter, intervals.get(0), VirtualColumns.EMPTY, Granularities.ALL, query.isDescending()), new Function<Cursor, Sequence<ScanResultValue>>() {
@Override
public Sequence<ScanResultValue> apply(final Cursor cursor) {
return new BaseSequence<>(new BaseSequence.IteratorMaker<ScanResultValue, Iterator<ScanResultValue>>() {
@Override
public Iterator<ScanResultValue> make() {
final LongColumnSelector timestampColumnSelector = cursor.makeLongColumnSelector(Column.TIME_COLUMN_NAME);
final List<ColumnSelectorPlus<SelectQueryEngine.SelectColumnSelectorStrategy>> selectorPlusList = Arrays.asList(DimensionHandlerUtils.createColumnSelectorPluses(STRATEGY_FACTORY, Lists.newArrayList(dims), cursor));
final Map<String, ObjectColumnSelector> metSelectors = Maps.newHashMap();
for (String metric : metrics) {
final ObjectColumnSelector metricSelector = cursor.makeObjectColumnSelector(metric);
metSelectors.put(metric, metricSelector);
}
final int batchSize = query.getBatchSize();
return new Iterator<ScanResultValue>() {
private int offset = 0;
@Override
public boolean hasNext() {
return !cursor.isDone() && offset < limit;
}
@Override
public ScanResultValue next() {
if (System.currentTimeMillis() >= timeoutAt) {
throw new QueryInterruptedException(new TimeoutException());
}
int lastOffset = offset;
Object events = null;
String resultFormat = query.getResultFormat();
if (ScanQuery.RESULT_FORMAT_VALUE_VECTOR.equals(resultFormat)) {
throw new UnsupportedOperationException("valueVector is not supported now");
} else if (ScanQuery.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
events = rowsToCompactedList();
} else {
events = rowsToList();
}
responseContext.put(ScanQueryRunnerFactory.CTX_COUNT, (int) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) + (offset - lastOffset));
responseContext.put(ScanQueryRunnerFactory.CTX_TIMEOUT_AT, timeoutAt - (System.currentTimeMillis() - start));
return new ScanResultValue(segmentId, allColumns, events);
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
private Object rowsToCompactedList() {
return Lists.transform((List<Map<String, Object>>) rowsToList(), new Function<Map<String, Object>, Object>() {
@Override
public Object apply(Map<String, Object> input) {
List eventValues = Lists.newArrayListWithExpectedSize(allColumns.size());
for (String expectedColumn : allColumns) {
eventValues.add(input.get(expectedColumn));
}
return eventValues;
}
});
}
private Object rowsToList() {
List<Map<String, Object>> events = Lists.newArrayListWithCapacity(batchSize);
for (int i = 0; !cursor.isDone() && i < batchSize && offset < limit; cursor.advance(), i++, offset++) {
final Map<String, Object> theEvent = SelectQueryEngine.singleEvent(ScanResultValue.timestampKey, timestampColumnSelector, selectorPlusList, metSelectors);
events.add(theEvent);
}
return events;
}
private Object rowsToValueVector() {
// only support list now, we can support ValueVector or Arrow in future
return rowsToList();
}
};
}
@Override
public void cleanup(Iterator<ScanResultValue> iterFromMake) {
}
});
}
}));
}
use of io.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class CachingClusteredClientTest method testGroupByCaching.
@Test
public void testGroupByCaching() throws Exception {
List<AggregatorFactory> aggsWithUniques = ImmutableList.<AggregatorFactory>builder().addAll(AGGS).add(new HyperUniquesAggregatorFactory("uniques", "uniques")).build();
final HashFunction hashFn = Hashing.murmur3_128();
GroupByQuery.Builder builder = new GroupByQuery.Builder().setDataSource(DATA_SOURCE).setQuerySegmentSpec(SEG_SPEC).setDimFilter(DIM_FILTER).setGranularity(GRANULARITY).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("a", "a"))).setAggregatorSpecs(aggsWithUniques).setPostAggregatorSpecs(POST_AGGS).setContext(CONTEXT);
final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
collector.add(hashFn.hashString("abc123", Charsets.UTF_8).asBytes());
collector.add(hashFn.hashString("123abc", Charsets.UTF_8).asBytes());
testQueryCaching(client, builder.build(), new Interval("2011-01-01/2011-01-02"), makeGroupByResults(new DateTime("2011-01-01"), ImmutableMap.of("a", "a", "rows", 1, "imps", 1, "impers", 1, "uniques", collector)), new Interval("2011-01-02/2011-01-03"), makeGroupByResults(new DateTime("2011-01-02"), ImmutableMap.of("a", "b", "rows", 2, "imps", 2, "impers", 2, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)));
QueryRunner runner = new FinalizeResultsQueryRunner(client, GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig()).getToolchest());
HashMap<String, Object> context = new HashMap<String, Object>();
TestHelper.assertExpectedObjects(makeGroupByResults(new DateTime("2011-01-05T"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), runner.run(builder.setInterval("2011-01-05/2011-01-10").build(), context), "");
}
Aggregations