use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class DruidPlanner method planWithDruidConvention.
/**
* Construct a {@link PlannerResult} for a {@link RelNode} that is directly translatable to a native Druid query.
*/
private PlannerResult planWithDruidConvention(final RelRoot root, @Nullable final SqlExplain explain, @Nullable final SqlInsert insert) throws ValidationException, RelConversionException {
final RelRoot possiblyLimitedRoot = possiblyWrapRootWithOuterLimitFromContext(root);
final QueryMaker queryMaker = buildQueryMaker(root, insert);
plannerContext.setQueryMaker(queryMaker);
RelNode parameterized = rewriteRelDynamicParameters(possiblyLimitedRoot.rel);
final DruidRel<?> druidRel = (DruidRel<?>) planner.transform(Rules.DRUID_CONVENTION_RULES, planner.getEmptyTraitSet().replace(DruidConvention.instance()).plus(root.collation), parameterized);
if (explain != null) {
return planExplanation(druidRel, explain, true);
} else {
final Supplier<Sequence<Object[]>> resultsSupplier = () -> {
// sanity check
final Set<ResourceAction> readResourceActions = plannerContext.getResourceActions().stream().filter(action -> action.getAction() == Action.READ).collect(Collectors.toSet());
Preconditions.checkState(readResourceActions.isEmpty() == druidRel.getDataSourceNames().isEmpty() || // them with InlineDataSource of empty rows.
readResourceActions.size() >= druidRel.getDataSourceNames().size(), "Authorization sanity check failed");
return druidRel.runQuery();
};
return new PlannerResult(resultsSupplier, queryMaker.getResultType());
}
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class DruidPlanner method planWithBindableConvention.
/**
* Construct a {@link PlannerResult} for a fall-back 'bindable' rel, for things that are not directly translatable
* to native Druid queries such as system tables and just a general purpose (but definitely not optimized) fall-back.
*
* See {@link #planWithDruidConvention} which will handle things which are directly translatable
* to native Druid queries.
*/
private PlannerResult planWithBindableConvention(final RelRoot root, @Nullable final SqlExplain explain) throws RelConversionException {
BindableRel bindableRel = (BindableRel) planner.transform(Rules.BINDABLE_CONVENTION_RULES, planner.getEmptyTraitSet().replace(BindableConvention.INSTANCE).plus(root.collation), root.rel);
if (!root.isRefTrivial()) {
// Add a projection on top to accommodate root.fields.
final List<RexNode> projects = new ArrayList<>();
final RexBuilder rexBuilder = bindableRel.getCluster().getRexBuilder();
for (int field : Pair.left(root.fields)) {
projects.add(rexBuilder.makeInputRef(bindableRel, field));
}
bindableRel = new Bindables.BindableProject(bindableRel.getCluster(), bindableRel.getTraitSet(), bindableRel, projects, root.validatedRowType);
}
if (explain != null) {
return planExplanation(bindableRel, explain, false);
} else {
final BindableRel theRel = bindableRel;
final DataContext dataContext = plannerContext.createDataContext((JavaTypeFactory) planner.getTypeFactory(), plannerContext.getParameters());
final Supplier<Sequence<Object[]>> resultsSupplier = () -> {
final Enumerable<?> enumerable = theRel.bind(dataContext);
final Enumerator<?> enumerator = enumerable.enumerator();
return Sequences.withBaggage(new BaseSequence<>(new BaseSequence.IteratorMaker<Object[], EnumeratorIterator<Object[]>>() {
@Override
public EnumeratorIterator<Object[]> make() {
return new EnumeratorIterator<>(new Iterator<Object[]>() {
@Override
public boolean hasNext() {
return enumerator.moveNext();
}
@Override
public Object[] next() {
return (Object[]) enumerator.current();
}
});
}
@Override
public void cleanup(EnumeratorIterator<Object[]> iterFromMake) {
}
}), enumerator::close);
};
return new PlannerResult(resultsSupplier, root.validatedRowType);
}
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class IndexTaskTest method testTransformSpec.
@Test
public void testTransformSpec() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("2014-01-01T00:00:10Z,a,an|array,1|2|3,1\n");
writer.write("2014-01-01T01:00:20Z,b,another|array,3|4,1\n");
writer.write("2014-01-01T02:00:30Z,c,and|another,0|1,1\n");
}
final DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "dimt", "dimtarray1", "dimtarray2", "dimtnum_array")));
final List<String> columns = Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "val");
final String listDelimiter = "|";
final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim", "b", null), ImmutableList.of(new ExpressionTransform("dimt", "concat(dim,dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray1", "array(dim, dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray2", "map(d -> concat(d, 'foo'), dim_array)", ExprMacroTable.nil()), new ExpressionTransform("dimtnum_array", "map(d -> d + 3, dim_num_array)", ExprMacroTable.nil())));
final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, false);
final IndexIngestionSpec indexIngestionSpec;
if (useInputFormatApi) {
indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, new CsvInputFormat(columns, listDelimiter, null, false, 0), transformSpec, null, tuningConfig, false, false);
} else {
indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0), transformSpec, null, tuningConfig, false, false);
}
IndexTask indexTask = new IndexTask(null, null, indexIngestionSpec, null);
Assert.assertEquals(indexTask.getId(), indexTask.getGroupId());
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(1, segments.size());
DataSegment segment = segments.get(0);
final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
final List<Map<String, Object>> transforms = cursorSequence.map(cursor -> {
final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimt", "dimt"));
final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray1", "dimtarray1"));
final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray2", "dimtarray2"));
final DimensionSelector selector4 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtnum_array", "dimtnum_array"));
Map<String, Object> row = new HashMap<>();
row.put("dimt", selector1.defaultGetObject());
row.put("dimtarray1", selector2.defaultGetObject());
row.put("dimtarray2", selector3.defaultGetObject());
row.put("dimtnum_array", selector4.defaultGetObject());
cursor.advance();
return row;
}).toList();
Assert.assertEquals(1, transforms.size());
Assert.assertEquals("bb", transforms.get(0).get("dimt"));
Assert.assertEquals(ImmutableList.of("b", "b"), transforms.get(0).get("dimtarray1"));
Assert.assertEquals(ImmutableList.of("anotherfoo", "arrayfoo"), transforms.get(0).get("dimtarray2"));
Assert.assertEquals(ImmutableList.of("6.0", "7.0"), transforms.get(0).get("dimtnum_array"));
Assert.assertEquals(DATASOURCE, segments.get(0).getDataSource());
Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
Assert.assertEquals(NumberedShardSpec.class, segments.get(0).getShardSpec().getClass());
Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class GroupByStrategyV1 method processSubqueryResult.
@Override
public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
final Set<AggregatorFactory> aggs = new HashSet<>();
// Nested group-bys work by first running the inner query and then materializing the results in an incremental
// index which the outer query is then run against. To build the incremental index, we use the fieldNames from
// the aggregators for the outer query to define the column names so that the index will match the query. If
// there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
// multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
// aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
// subsequent ones) and return an error if the aggregator types are different.
final Set<String> dimensionNames = new HashSet<>();
for (DimensionSpec dimension : subquery.getDimensions()) {
dimensionNames.add(dimension.getOutputName());
}
for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
if (dimensionNames.contains(transferAgg.getName())) {
// doesn't have this problem.
continue;
}
if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {
@Override
public boolean apply(AggregatorFactory agg) {
return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
}
})) {
throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
}
aggs.add(transferAgg);
}
}
// We need the inner incremental index to have all the columns required by the outer query
final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(ImmutableList.copyOf(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(new ArrayList<>()).build();
final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), subquery, configSupplier.get(), subqueryResult);
// Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
// is ensured by QuerySegmentSpec.
// GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
// and concatenate the results.
final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, null, configSupplier.get(), Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<ResultRow>>() {
@Override
public Sequence<ResultRow> apply(Interval interval) {
return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
}
})));
innerQueryResultIndex.close();
return Sequences.withBaggage(outerQuery.postProcess(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class GroupByStrategyV2 method processSubqueryResult.
@Override
public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
// Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
// while creating the sequence.
GroupByRowProcessor.ResultSupplier resultSupplier = null;
try {
final GroupByQuery queryToRun;
if (wasQueryPushedDown) {
// If the query was pushed down, filters would have been applied downstream, so skip it here.
queryToRun = query.withDimFilter(null).withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY));
} else {
queryToRun = query;
}
resultSupplier = GroupByRowProcessor.process(queryToRun, wasQueryPushedDown ? queryToRun : subquery, subqueryResult, configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
final GroupByRowProcessor.ResultSupplier finalResultSupplier = resultSupplier;
return Sequences.withBaggage(mergeResults((queryPlus, responseContext) -> finalResultSupplier.results(null), query, null), finalResultSupplier);
} catch (Throwable e) {
throw CloseableUtils.closeAndWrapInCatch(e, resultSupplier);
}
}
Aggregations