use of io.crate.planner.node.dql.Collect in project crate by crate.
the class CopyStatementPlanner method planCopyFrom.
public Plan planCopyFrom(CopyFromAnalyzedStatement analysis, Planner.Context context) {
/**
* copy from has two "modes":
*
* 1: non-partitioned tables or partitioned tables with partition ident --> import into single es index
* -> collect raw source and import as is
*
* 2: partitioned table without partition ident
* -> collect document and partition by values
* -> exclude partitioned by columns from document
* -> insert into es index (partition determined by partition by value)
*/
DocTableInfo table = analysis.table();
int clusteredByPrimaryKeyIdx = table.primaryKey().indexOf(analysis.table().clusteredBy());
List<String> partitionedByNames;
String partitionIdent = null;
List<BytesRef> partitionValues;
if (analysis.partitionIdent() == null) {
if (table.isPartitioned()) {
partitionedByNames = Lists.newArrayList(Lists.transform(table.partitionedBy(), ColumnIdent::fqn));
} else {
partitionedByNames = Collections.emptyList();
}
partitionValues = ImmutableList.of();
} else {
assert table.isPartitioned() : "table must be partitioned if partitionIdent is set";
// partitionIdent is present -> possible to index raw source into concrete es index
partitionValues = PartitionName.decodeIdent(analysis.partitionIdent());
partitionIdent = analysis.partitionIdent();
partitionedByNames = Collections.emptyList();
}
SourceIndexWriterProjection sourceIndexWriterProjection = new SourceIndexWriterProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), table.primaryKey(), table.partitionedBy(), partitionValues, table.clusteredBy(), clusteredByPrimaryKeyIdx, analysis.settings(), null, partitionedByNames.size() > 0 ? partitionedByNames.toArray(new String[partitionedByNames.size()]) : null, // autoCreateIndices
table.isPartitioned());
List<Projection> projections = Collections.<Projection>singletonList(sourceIndexWriterProjection);
partitionedByNames.removeAll(Lists.transform(table.primaryKey(), ColumnIdent::fqn));
int referencesSize = table.primaryKey().size() + partitionedByNames.size() + 1;
referencesSize = clusteredByPrimaryKeyIdx == -1 ? referencesSize + 1 : referencesSize;
List<Symbol> toCollect = new ArrayList<>(referencesSize);
// add primaryKey columns
for (ColumnIdent primaryKey : table.primaryKey()) {
toCollect.add(table.getReference(primaryKey));
}
// add partitioned columns (if not part of primaryKey)
Set<Reference> referencedReferences = new HashSet<>();
for (String partitionedColumn : partitionedByNames) {
Reference reference = table.getReference(ColumnIdent.fromPath(partitionedColumn));
Symbol symbol;
if (reference instanceof GeneratedReference) {
symbol = ((GeneratedReference) reference).generatedExpression();
referencedReferences.addAll(((GeneratedReference) reference).referencedReferences());
} else {
symbol = reference;
}
toCollect.add(symbol);
}
// add clusteredBy column (if not part of primaryKey)
if (clusteredByPrimaryKeyIdx == -1 && table.clusteredBy() != null && !DocSysColumns.ID.equals(table.clusteredBy())) {
toCollect.add(table.getReference(table.clusteredBy()));
}
// add _raw or _doc
if (table.isPartitioned() && analysis.partitionIdent() == null) {
toCollect.add(table.getReference(DocSysColumns.DOC));
} else {
toCollect.add(table.getReference(DocSysColumns.RAW));
}
// add columns referenced by generated columns which are used as partitioned by column
for (Reference reference : referencedReferences) {
if (!toCollect.contains(reference)) {
toCollect.add(reference);
}
}
DiscoveryNodes allNodes = clusterService.state().nodes();
FileUriCollectPhase collectPhase = new FileUriCollectPhase(context.jobId(), context.nextExecutionPhaseId(), "copyFrom", getExecutionNodes(allNodes, analysis.settings().getAsInt("num_readers", allNodes.getSize()), analysis.nodePredicate()), analysis.uri(), toCollect, projections, analysis.settings().get("compression", null), analysis.settings().getAsBoolean("shared", null));
Collect collect = new Collect(collectPhase, TopN.NO_LIMIT, 0, 1, 1, null);
return Merge.ensureOnHandler(collect, context, Collections.singletonList(MergeCountProjection.INSTANCE));
}
use of io.crate.planner.node.dql.Collect in project crate by crate.
the class TableFunction method build.
@Override
public ExecutionPlan build(PlannerContext plannerContext, Set<PlanHint> planHints, ProjectionBuilder projectionBuilder, int limit, int offset, @Nullable OrderBy order, @Nullable Integer pageSizeHint, Row params, SubQueryResults subQueryResults) {
List<Symbol> args = relation.function().arguments();
ArrayList<Literal<?>> functionArguments = new ArrayList<>(args.size());
EvaluatingNormalizer normalizer = new EvaluatingNormalizer(plannerContext.nodeContext(), RowGranularity.CLUSTER, null, relation);
var binder = new SubQueryAndParamBinder(params, subQueryResults).andThen(x -> normalizer.normalize(x, plannerContext.transactionContext()));
for (Symbol arg : args) {
// It's not possible to use columns as argument to a table function, so it's safe to evaluate at this point.
functionArguments.add(Literal.ofUnchecked(arg.valueType(), SymbolEvaluator.evaluate(plannerContext.transactionContext(), plannerContext.nodeContext(), arg, params, subQueryResults)));
}
TableFunctionCollectPhase collectPhase = new TableFunctionCollectPhase(plannerContext.jobId(), plannerContext.nextExecutionPhaseId(), plannerContext.handlerNode(), relation.functionImplementation(), functionArguments, Lists2.map(toCollect, binder), binder.apply(where.queryOrFallback()));
return new Collect(collectPhase, TopN.NO_LIMIT, 0, toCollect.size(), TopN.NO_LIMIT, null);
}
use of io.crate.planner.node.dql.Collect in project crate by crate.
the class GroupByPlannerTest method testNestedGroupByAggregation.
@Test
public void testNestedGroupByAggregation() throws Exception {
var e = SQLExecutor.builder(clusterService, 2, RandomizedTest.getRandom(), List.of()).build();
Collect collect = e.plan("select count(*) from (" + " select max(load['1']) as maxLoad, hostname " + " from sys.nodes " + " group by hostname having max(load['1']) > 50) as nodes " + "group by hostname");
assertThat("would require merge if more than 1 nodeIds", collect.nodeIds().size(), is(1));
CollectPhase collectPhase = collect.collectPhase();
assertThat(collectPhase.projections(), contains(instanceOf(GroupProjection.class), instanceOf(FilterProjection.class), instanceOf(EvalProjection.class), instanceOf(GroupProjection.class), instanceOf(EvalProjection.class)));
Projection firstGroupProjection = collectPhase.projections().get(0);
assertThat(((GroupProjection) firstGroupProjection).mode(), is(AggregateMode.ITER_FINAL));
Projection secondGroupProjection = collectPhase.projections().get(3);
assertThat(((GroupProjection) secondGroupProjection).mode(), is(AggregateMode.ITER_FINAL));
}
use of io.crate.planner.node.dql.Collect in project crate by crate.
the class GroupByPlannerTest method testGroupByOnClusteredByColumnPartitionedOnePartition.
@Test
public void testGroupByOnClusteredByColumnPartitionedOnePartition() throws Exception {
var e = SQLExecutor.builder(clusterService, 2, RandomizedTest.getRandom(), List.of()).addPartitionedTable("create table doc.clustered_parted (" + " id integer," + " date timestamp with time zone," + " city string" + ") clustered by (city) partitioned by (date) ", new PartitionName(new RelationName("doc", "clustered_parted"), singletonList("1395874800000")).asIndexName(), new PartitionName(new RelationName("doc", "clustered_parted"), singletonList("1395961200000")).asIndexName()).build();
// only one partition hit
Merge optimizedPlan = e.plan("select count(*), city from clustered_parted where date=1395874800000 group by city");
Collect collect = (Collect) optimizedPlan.subPlan();
assertThat(collect.collectPhase().projections(), contains(instanceOf(GroupProjection.class), instanceOf(EvalProjection.class)));
assertThat(collect.collectPhase().projections().get(0), instanceOf(GroupProjection.class));
assertThat(optimizedPlan.mergePhase().projections().size(), is(0));
// > 1 partition hit
ExecutionPlan executionPlan = e.plan("select count(*), city from clustered_parted where date=1395874800000 or date=1395961200000 group by city");
assertThat(executionPlan, instanceOf(Merge.class));
assertThat(((Merge) executionPlan).subPlan(), instanceOf(Merge.class));
}
use of io.crate.planner.node.dql.Collect in project crate by crate.
the class InsertFromSubQueryPlannerTest method test_insert_from_subquery_with_order_by_symbols_match_collect_symbols.
@Test
public void test_insert_from_subquery_with_order_by_symbols_match_collect_symbols() {
// Ensures that order by symbols may also be rewritten to source lookup refs if collect symbols are rewritten
Merge localMerge = e.plan("insert into target (id, name) " + "select id, name from users order by id, name");
Collect collect = (Collect) localMerge.subPlan();
RoutedCollectPhase collectPhase = (RoutedCollectPhase) collect.collectPhase();
assertThat(collectPhase.orderBy().orderBySymbols(), is(collectPhase.toCollect()));
}
Aggregations