Search in sources :

Example 36 with Collect

use of io.crate.planner.node.dql.Collect in project crate by crate.

the class CopyStatementPlanner method planCopyFrom.

public Plan planCopyFrom(CopyFromAnalyzedStatement analysis, Planner.Context context) {
    /**
         * copy from has two "modes":
         *
         * 1: non-partitioned tables or partitioned tables with partition ident --> import into single es index
         *    -> collect raw source and import as is
         *
         * 2: partitioned table without partition ident
         *    -> collect document and partition by values
         *    -> exclude partitioned by columns from document
         *    -> insert into es index (partition determined by partition by value)
         */
    DocTableInfo table = analysis.table();
    int clusteredByPrimaryKeyIdx = table.primaryKey().indexOf(analysis.table().clusteredBy());
    List<String> partitionedByNames;
    String partitionIdent = null;
    List<BytesRef> partitionValues;
    if (analysis.partitionIdent() == null) {
        if (table.isPartitioned()) {
            partitionedByNames = Lists.newArrayList(Lists.transform(table.partitionedBy(), ColumnIdent::fqn));
        } else {
            partitionedByNames = Collections.emptyList();
        }
        partitionValues = ImmutableList.of();
    } else {
        assert table.isPartitioned() : "table must be partitioned if partitionIdent is set";
        // partitionIdent is present -> possible to index raw source into concrete es index
        partitionValues = PartitionName.decodeIdent(analysis.partitionIdent());
        partitionIdent = analysis.partitionIdent();
        partitionedByNames = Collections.emptyList();
    }
    SourceIndexWriterProjection sourceIndexWriterProjection = new SourceIndexWriterProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), table.primaryKey(), table.partitionedBy(), partitionValues, table.clusteredBy(), clusteredByPrimaryKeyIdx, analysis.settings(), null, partitionedByNames.size() > 0 ? partitionedByNames.toArray(new String[partitionedByNames.size()]) : null, // autoCreateIndices
    table.isPartitioned());
    List<Projection> projections = Collections.<Projection>singletonList(sourceIndexWriterProjection);
    partitionedByNames.removeAll(Lists.transform(table.primaryKey(), ColumnIdent::fqn));
    int referencesSize = table.primaryKey().size() + partitionedByNames.size() + 1;
    referencesSize = clusteredByPrimaryKeyIdx == -1 ? referencesSize + 1 : referencesSize;
    List<Symbol> toCollect = new ArrayList<>(referencesSize);
    // add primaryKey columns
    for (ColumnIdent primaryKey : table.primaryKey()) {
        toCollect.add(table.getReference(primaryKey));
    }
    // add partitioned columns (if not part of primaryKey)
    Set<Reference> referencedReferences = new HashSet<>();
    for (String partitionedColumn : partitionedByNames) {
        Reference reference = table.getReference(ColumnIdent.fromPath(partitionedColumn));
        Symbol symbol;
        if (reference instanceof GeneratedReference) {
            symbol = ((GeneratedReference) reference).generatedExpression();
            referencedReferences.addAll(((GeneratedReference) reference).referencedReferences());
        } else {
            symbol = reference;
        }
        toCollect.add(symbol);
    }
    // add clusteredBy column (if not part of primaryKey)
    if (clusteredByPrimaryKeyIdx == -1 && table.clusteredBy() != null && !DocSysColumns.ID.equals(table.clusteredBy())) {
        toCollect.add(table.getReference(table.clusteredBy()));
    }
    // add _raw or _doc
    if (table.isPartitioned() && analysis.partitionIdent() == null) {
        toCollect.add(table.getReference(DocSysColumns.DOC));
    } else {
        toCollect.add(table.getReference(DocSysColumns.RAW));
    }
    // add columns referenced by generated columns which are used as partitioned by column
    for (Reference reference : referencedReferences) {
        if (!toCollect.contains(reference)) {
            toCollect.add(reference);
        }
    }
    DiscoveryNodes allNodes = clusterService.state().nodes();
    FileUriCollectPhase collectPhase = new FileUriCollectPhase(context.jobId(), context.nextExecutionPhaseId(), "copyFrom", getExecutionNodes(allNodes, analysis.settings().getAsInt("num_readers", allNodes.getSize()), analysis.nodePredicate()), analysis.uri(), toCollect, projections, analysis.settings().get("compression", null), analysis.settings().getAsBoolean("shared", null));
    Collect collect = new Collect(collectPhase, TopN.NO_LIMIT, 0, 1, 1, null);
    return Merge.ensureOnHandler(collect, context, Collections.singletonList(MergeCountProjection.INSTANCE));
}
Also used : DocTableInfo(io.crate.metadata.doc.DocTableInfo) GeneratedReference(io.crate.metadata.GeneratedReference) Collect(io.crate.planner.node.dql.Collect) Symbol(io.crate.analyze.symbol.Symbol) GeneratedReference(io.crate.metadata.GeneratedReference) Reference(io.crate.metadata.Reference) SourceIndexWriterProjection(io.crate.planner.projection.SourceIndexWriterProjection) WriterProjection(io.crate.planner.projection.WriterProjection) MergeCountProjection(io.crate.planner.projection.MergeCountProjection) SourceIndexWriterProjection(io.crate.planner.projection.SourceIndexWriterProjection) Projection(io.crate.planner.projection.Projection) FileUriCollectPhase(io.crate.planner.node.dql.FileUriCollectPhase) ColumnIdent(io.crate.metadata.ColumnIdent) BytesRef(org.apache.lucene.util.BytesRef) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes)

Example 37 with Collect

use of io.crate.planner.node.dql.Collect in project crate by crate.

the class TableFunction method build.

@Override
public ExecutionPlan build(PlannerContext plannerContext, Set<PlanHint> planHints, ProjectionBuilder projectionBuilder, int limit, int offset, @Nullable OrderBy order, @Nullable Integer pageSizeHint, Row params, SubQueryResults subQueryResults) {
    List<Symbol> args = relation.function().arguments();
    ArrayList<Literal<?>> functionArguments = new ArrayList<>(args.size());
    EvaluatingNormalizer normalizer = new EvaluatingNormalizer(plannerContext.nodeContext(), RowGranularity.CLUSTER, null, relation);
    var binder = new SubQueryAndParamBinder(params, subQueryResults).andThen(x -> normalizer.normalize(x, plannerContext.transactionContext()));
    for (Symbol arg : args) {
        // It's not possible to use columns as argument to a table function, so it's safe to evaluate at this point.
        functionArguments.add(Literal.ofUnchecked(arg.valueType(), SymbolEvaluator.evaluate(plannerContext.transactionContext(), plannerContext.nodeContext(), arg, params, subQueryResults)));
    }
    TableFunctionCollectPhase collectPhase = new TableFunctionCollectPhase(plannerContext.jobId(), plannerContext.nextExecutionPhaseId(), plannerContext.handlerNode(), relation.functionImplementation(), functionArguments, Lists2.map(toCollect, binder), binder.apply(where.queryOrFallback()));
    return new Collect(collectPhase, TopN.NO_LIMIT, 0, toCollect.size(), TopN.NO_LIMIT, null);
}
Also used : EvaluatingNormalizer(io.crate.expression.eval.EvaluatingNormalizer) Collect(io.crate.planner.node.dql.Collect) SelectSymbol(io.crate.expression.symbol.SelectSymbol) Symbol(io.crate.expression.symbol.Symbol) Literal(io.crate.expression.symbol.Literal) ArrayList(java.util.ArrayList) TableFunctionCollectPhase(io.crate.execution.dsl.phases.TableFunctionCollectPhase)

Example 38 with Collect

use of io.crate.planner.node.dql.Collect in project crate by crate.

the class GroupByPlannerTest method testNestedGroupByAggregation.

@Test
public void testNestedGroupByAggregation() throws Exception {
    var e = SQLExecutor.builder(clusterService, 2, RandomizedTest.getRandom(), List.of()).build();
    Collect collect = e.plan("select count(*) from (" + "  select max(load['1']) as maxLoad, hostname " + "  from sys.nodes " + "  group by hostname having max(load['1']) > 50) as nodes " + "group by hostname");
    assertThat("would require merge if more than 1 nodeIds", collect.nodeIds().size(), is(1));
    CollectPhase collectPhase = collect.collectPhase();
    assertThat(collectPhase.projections(), contains(instanceOf(GroupProjection.class), instanceOf(FilterProjection.class), instanceOf(EvalProjection.class), instanceOf(GroupProjection.class), instanceOf(EvalProjection.class)));
    Projection firstGroupProjection = collectPhase.projections().get(0);
    assertThat(((GroupProjection) firstGroupProjection).mode(), is(AggregateMode.ITER_FINAL));
    Projection secondGroupProjection = collectPhase.projections().get(3);
    assertThat(((GroupProjection) secondGroupProjection).mode(), is(AggregateMode.ITER_FINAL));
}
Also used : Collect(io.crate.planner.node.dql.Collect) OrderedTopNProjection(io.crate.execution.dsl.projection.OrderedTopNProjection) GroupProjection(io.crate.execution.dsl.projection.GroupProjection) FilterProjection(io.crate.execution.dsl.projection.FilterProjection) Projection(io.crate.execution.dsl.projection.Projection) TopNProjection(io.crate.execution.dsl.projection.TopNProjection) EvalProjection(io.crate.execution.dsl.projection.EvalProjection) CollectPhase(io.crate.execution.dsl.phases.CollectPhase) RoutedCollectPhase(io.crate.execution.dsl.phases.RoutedCollectPhase) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test) RandomizedTest(com.carrotsearch.randomizedtesting.RandomizedTest)

Example 39 with Collect

use of io.crate.planner.node.dql.Collect in project crate by crate.

the class GroupByPlannerTest method testGroupByOnClusteredByColumnPartitionedOnePartition.

@Test
public void testGroupByOnClusteredByColumnPartitionedOnePartition() throws Exception {
    var e = SQLExecutor.builder(clusterService, 2, RandomizedTest.getRandom(), List.of()).addPartitionedTable("create table doc.clustered_parted (" + "   id integer," + "   date timestamp with time zone," + "   city string" + ") clustered by (city) partitioned by (date) ", new PartitionName(new RelationName("doc", "clustered_parted"), singletonList("1395874800000")).asIndexName(), new PartitionName(new RelationName("doc", "clustered_parted"), singletonList("1395961200000")).asIndexName()).build();
    // only one partition hit
    Merge optimizedPlan = e.plan("select count(*), city from clustered_parted where date=1395874800000 group by city");
    Collect collect = (Collect) optimizedPlan.subPlan();
    assertThat(collect.collectPhase().projections(), contains(instanceOf(GroupProjection.class), instanceOf(EvalProjection.class)));
    assertThat(collect.collectPhase().projections().get(0), instanceOf(GroupProjection.class));
    assertThat(optimizedPlan.mergePhase().projections().size(), is(0));
    // > 1 partition hit
    ExecutionPlan executionPlan = e.plan("select count(*), city from clustered_parted where date=1395874800000 or date=1395961200000 group by city");
    assertThat(executionPlan, instanceOf(Merge.class));
    assertThat(((Merge) executionPlan).subPlan(), instanceOf(Merge.class));
}
Also used : PartitionName(io.crate.metadata.PartitionName) ExecutionPlan(io.crate.planner.ExecutionPlan) Merge(io.crate.planner.Merge) Collect(io.crate.planner.node.dql.Collect) RelationName(io.crate.metadata.RelationName) GroupProjection(io.crate.execution.dsl.projection.GroupProjection) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test) RandomizedTest(com.carrotsearch.randomizedtesting.RandomizedTest)

Example 40 with Collect

use of io.crate.planner.node.dql.Collect in project crate by crate.

the class InsertFromSubQueryPlannerTest method test_insert_from_subquery_with_order_by_symbols_match_collect_symbols.

@Test
public void test_insert_from_subquery_with_order_by_symbols_match_collect_symbols() {
    // Ensures that order by symbols may also be rewritten to source lookup refs if collect symbols are rewritten
    Merge localMerge = e.plan("insert into target (id, name) " + "select id, name from users order by id, name");
    Collect collect = (Collect) localMerge.subPlan();
    RoutedCollectPhase collectPhase = (RoutedCollectPhase) collect.collectPhase();
    assertThat(collectPhase.orderBy().orderBySymbols(), is(collectPhase.toCollect()));
}
Also used : Merge(io.crate.planner.Merge) Collect(io.crate.planner.node.dql.Collect) RoutedCollectPhase(io.crate.execution.dsl.phases.RoutedCollectPhase) Test(org.junit.Test) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) RandomizedTest(com.carrotsearch.randomizedtesting.RandomizedTest)

Aggregations

Collect (io.crate.planner.node.dql.Collect)66 Test (org.junit.Test)57 CrateDummyClusterServiceUnitTest (io.crate.test.integration.CrateDummyClusterServiceUnitTest)55 RoutedCollectPhase (io.crate.execution.dsl.phases.RoutedCollectPhase)31 RandomizedTest (com.carrotsearch.randomizedtesting.RandomizedTest)27 GroupProjection (io.crate.execution.dsl.projection.GroupProjection)18 EvalProjection (io.crate.execution.dsl.projection.EvalProjection)16 Merge (io.crate.planner.Merge)16 MergePhase (io.crate.execution.dsl.phases.MergePhase)14 Projection (io.crate.execution.dsl.projection.Projection)12 Reference (io.crate.metadata.Reference)11 FilterProjection (io.crate.execution.dsl.projection.FilterProjection)10 TopNProjection (io.crate.execution.dsl.projection.TopNProjection)10 OrderedTopNProjection (io.crate.execution.dsl.projection.OrderedTopNProjection)9 CollectPhase (io.crate.execution.dsl.phases.CollectPhase)8 Symbol (io.crate.expression.symbol.Symbol)8 AggregationProjection (io.crate.execution.dsl.projection.AggregationProjection)6 QueryThenFetch (io.crate.planner.node.dql.QueryThenFetch)6 ColumnIndexWriterProjection (io.crate.execution.dsl.projection.ColumnIndexWriterProjection)5 InputColumn (io.crate.expression.symbol.InputColumn)5