Examples with Collect - io.crate.planner.node.dql.Collect

Example 46 with Collect

use of io.crate.planner.node.dql.Collect in project crate by crate.

the class CopyFromPlan method planCopyFromExecution.

public static ExecutionPlan planCopyFromExecution(AnalyzedCopyFrom copyFrom, DiscoveryNodes allNodes, PlannerContext context, Row params, SubQueryResults subQueryResults) {
    var boundedCopyFrom = bind(copyFrom, context.transactionContext(), context.nodeContext(), params, subQueryResults);
    /*
         * Create a plan that reads json-objects-lines from a file
         * and then executes upsert requests to index the data
         */
    DocTableInfo table = boundedCopyFrom.tableInfo();
    String partitionIdent = boundedCopyFrom.partitionIdent();
    List<String> partitionedByNames = Collections.emptyList();
    List<String> partitionValues = Collections.emptyList();
    if (partitionIdent == null) {
        if (table.isPartitioned()) {
            partitionedByNames = Lists2.map(table.partitionedBy(), ColumnIdent::fqn);
        }
    } else {
        assert table.isPartitioned() : "table must be partitioned if partitionIdent is set";
        // partitionIdent is present -> possible to index raw source into concrete es index
        partitionValues = PartitionName.decodeIdent(partitionIdent);
    }
    // need to exclude _id columns; they're auto generated and won't be available in the files being imported
    ColumnIdent clusteredBy = table.clusteredBy();
    if (DocSysColumns.ID.equals(clusteredBy)) {
        clusteredBy = null;
    }
    List<Reference> primaryKeyRefs = table.primaryKey().stream().filter(r -> !r.equals(DocSysColumns.ID)).map(table::getReference).collect(Collectors.toList());
    List<Symbol> toCollect = getSymbolsRequiredForShardIdCalc(primaryKeyRefs, table.partitionedByColumns(), clusteredBy == null ? null : table.getReference(clusteredBy));
    Reference rawOrDoc = rawOrDoc(table, partitionIdent);
    final int rawOrDocIdx = toCollect.size();
    toCollect.add(rawOrDoc);
    String[] excludes = partitionedByNames.size() > 0 ? partitionedByNames.toArray(new String[0]) : null;
    InputColumns.SourceSymbols sourceSymbols = new InputColumns.SourceSymbols(toCollect);
    Symbol clusteredByInputCol = null;
    if (clusteredBy != null) {
        clusteredByInputCol = InputColumns.create(table.getReference(clusteredBy), sourceSymbols);
    }
    SourceIndexWriterProjection sourceIndexWriterProjection;
    List<? extends Symbol> projectionOutputs = AbstractIndexWriterProjection.OUTPUTS;
    boolean returnSummary = copyFrom instanceof AnalyzedCopyFromReturnSummary;
    boolean failFast = boundedCopyFrom.settings().getAsBoolean("fail_fast", false);
    if (returnSummary || failFast) {
        final InputColumn sourceUriSymbol = new InputColumn(toCollect.size(), DataTypes.STRING);
        toCollect.add(SourceUriExpression.getReferenceForRelation(table.ident()));
        final InputColumn sourceUriFailureSymbol = new InputColumn(toCollect.size(), DataTypes.STRING);
        toCollect.add(SourceUriFailureExpression.getReferenceForRelation(table.ident()));
        final InputColumn lineNumberSymbol = new InputColumn(toCollect.size(), DataTypes.LONG);
        toCollect.add(SourceLineNumberExpression.getReferenceForRelation(table.ident()));
        if (returnSummary) {
            List<? extends Symbol> fields = ((AnalyzedCopyFromReturnSummary) copyFrom).outputs();
            projectionOutputs = InputColumns.create(fields, new InputColumns.SourceSymbols(fields));
        }
        sourceIndexWriterProjection = new SourceIndexWriterReturnSummaryProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), new InputColumn(rawOrDocIdx, rawOrDoc.valueType()), table.primaryKey(), InputColumns.create(table.partitionedByColumns(), sourceSymbols), clusteredBy, boundedCopyFrom.settings(), null, excludes, InputColumns.create(primaryKeyRefs, sourceSymbols), clusteredByInputCol, projectionOutputs, // autoCreateIndices
        table.isPartitioned(), sourceUriSymbol, sourceUriFailureSymbol, lineNumberSymbol);
    } else {
        sourceIndexWriterProjection = new SourceIndexWriterProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), new InputColumn(rawOrDocIdx, rawOrDoc.valueType()), table.primaryKey(), InputColumns.create(table.partitionedByColumns(), sourceSymbols), clusteredBy, boundedCopyFrom.settings(), null, excludes, InputColumns.create(primaryKeyRefs, sourceSymbols), clusteredByInputCol, projectionOutputs, // autoCreateIndices
        table.isPartitioned());
    }
    // the partitionedBy-inputColumns created for the projection are still valid because the positions are not changed
    if (partitionValues != null) {
        rewriteToCollectToUsePartitionValues(table.partitionedByColumns(), partitionValues, toCollect);
    }
    FileUriCollectPhase collectPhase = new FileUriCollectPhase(context.jobId(), context.nextExecutionPhaseId(), "copyFrom", getExecutionNodes(allNodes, boundedCopyFrom.settings().getAsInt("num_readers", allNodes.getSize()), boundedCopyFrom.nodePredicate()), boundedCopyFrom.uri(), toCollect, Collections.emptyList(), boundedCopyFrom.settings().get("compression", null), boundedCopyFrom.settings().getAsBoolean("shared", null), CopyFromParserProperties.of(boundedCopyFrom.settings()), boundedCopyFrom.inputFormat(), boundedCopyFrom.settings());
    Collect collect = new Collect(collectPhase, TopN.NO_LIMIT, 0, 1, -1, null);
    // add the projection to the plan to ensure that the outputs are correctly set to the projection outputs
    collect.addProjection(sourceIndexWriterProjection);
    List<Projection> handlerProjections;
    if (returnSummary) {
        handlerProjections = Collections.emptyList();
    } else {
        handlerProjections = List.of(MergeCountProjection.INSTANCE);
    }
    return Merge.ensureOnHandler(collect, context, handlerProjections);
}

Also used : DocTableInfo(io.crate.metadata.doc.DocTableInfo) InputColumns(io.crate.execution.dsl.projection.builder.InputColumns) SourceIndexWriterReturnSummaryProjection(io.crate.execution.dsl.projection.SourceIndexWriterReturnSummaryProjection) Collect(io.crate.planner.node.dql.Collect) GeneratedReference(io.crate.metadata.GeneratedReference) Reference(io.crate.metadata.Reference) Symbol(io.crate.expression.symbol.Symbol) SourceIndexWriterProjection(io.crate.execution.dsl.projection.SourceIndexWriterProjection) Projection(io.crate.execution.dsl.projection.Projection) SourceIndexWriterProjection(io.crate.execution.dsl.projection.SourceIndexWriterProjection) MergeCountProjection(io.crate.execution.dsl.projection.MergeCountProjection) AbstractIndexWriterProjection(io.crate.execution.dsl.projection.AbstractIndexWriterProjection) SourceIndexWriterReturnSummaryProjection(io.crate.execution.dsl.projection.SourceIndexWriterReturnSummaryProjection) FileUriCollectPhase(io.crate.execution.dsl.phases.FileUriCollectPhase) ColumnIdent(io.crate.metadata.ColumnIdent) InputColumn(io.crate.expression.symbol.InputColumn) AnalyzedCopyFromReturnSummary(io.crate.analyze.AnalyzedCopyFromReturnSummary)

Example 47 with Collect

use of io.crate.planner.node.dql.Collect in project crate by crate.

the class WindowDefinitionTest method testUnboundedPrecedingUnboundedFollowingFrameIsAllowed.

@Test
public void testUnboundedPrecedingUnboundedFollowingFrameIsAllowed() {
    Collect collect = e.plan("select sum(col1) over(RANGE BETWEEN UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING) FROM " + "unnest([1, 2, 1, 1, 1, 4])");
    List<Projection> projections = collect.collectPhase().projections();
    assertThat(projections.size(), is(2));
    WindowAggProjection windowProjection = null;
    for (Projection projection : projections) {
        if (projection instanceof WindowAggProjection) {
            windowProjection = (WindowAggProjection) projection;
            break;
        }
    }
    assertThat(windowProjection, is(notNullValue()));
    List<? extends Symbol> outputs = windowProjection.outputs();
    // IC and window function
    assertThat(outputs.size(), is(2));
    WindowFunction windowFunction = null;
    for (Symbol output : outputs) {
        if (output instanceof WindowFunction) {
            windowFunction = (WindowFunction) output;
        }
    }
    assertThat(windowFunction, is(notNullValue()));
    assertThat(windowFunction.windowDefinition().windowFrameDefinition().start().type(), is(UNBOUNDED_PRECEDING));
    assertThat(windowFunction.windowDefinition().windowFrameDefinition().end().type(), is(UNBOUNDED_FOLLOWING));
}

Also used : WindowFunction(io.crate.expression.symbol.WindowFunction) Collect(io.crate.planner.node.dql.Collect) Symbol(io.crate.expression.symbol.Symbol) Projection(io.crate.execution.dsl.projection.Projection) WindowAggProjection(io.crate.execution.dsl.projection.WindowAggProjection) WindowAggProjection(io.crate.execution.dsl.projection.WindowAggProjection) Test(org.junit.Test) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) RandomizedTest(com.carrotsearch.randomizedtesting.RandomizedTest)

Example 48 with Collect

use of io.crate.planner.node.dql.Collect in project crate by crate.

the class CopyFromPlannerTest method testNodeFiltersNoMatch.

@Test
public void testNodeFiltersNoMatch() {
    Collect cm = plan("copy users from '/path' with (node_filters={name='foobar'})");
    assertThat(cm.collectPhase().nodeIds().size(), is(0));
}

Also used : Collect(io.crate.planner.node.dql.Collect) Test(org.junit.Test) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest)

Example 49 with Collect

use of io.crate.planner.node.dql.Collect in project crate by crate.

the class CopyFromPlannerTest method testCopyFromPlan.

@Test
public void testCopyFromPlan() {
    Collect plan = plan("copy users from '/path/to/file.extension'");
    assertThat(plan.collectPhase(), instanceOf(FileUriCollectPhase.class));
    FileUriCollectPhase collectPhase = (FileUriCollectPhase) plan.collectPhase();
    assertThat(((Literal) collectPhase.targetUri()).value(), is("/path/to/file.extension"));
}

Also used : Collect(io.crate.planner.node.dql.Collect) FileUriCollectPhase(io.crate.execution.dsl.phases.FileUriCollectPhase) Test(org.junit.Test) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest)

Example 50 with Collect

use of io.crate.planner.node.dql.Collect in project crate by crate.

the class CopyToPlannerTest method testCopyToWithPartitionInWhereClauseRoutesToPartitionIndexOnly.

@Test
public void testCopyToWithPartitionInWhereClauseRoutesToPartitionIndexOnly() {
    Merge merge = plan("copy parted where date = 1395874800000 to directory '/tmp/foo'");
    Collect collect = (Collect) merge.subPlan();
    String expectedIndex = new PartitionName(new RelationName("doc", "parted"), singletonList("1395874800000")).asIndexName();
    assertThat(((RoutedCollectPhase) collect.collectPhase()).routing().locations().values().stream().flatMap(shardsByIndices -> shardsByIndices.keySet().stream()).collect(Collectors.toSet()), contains(expectedIndex));
}

Also used : PartitionName(io.crate.metadata.PartitionName) Merge(io.crate.planner.Merge) Collect(io.crate.planner.node.dql.Collect) RelationName(io.crate.metadata.RelationName) RoutedCollectPhase(io.crate.execution.dsl.phases.RoutedCollectPhase) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test) RandomizedTest(com.carrotsearch.randomizedtesting.RandomizedTest)

Aggregations

Collect (io.crate.planner.node.dql.Collect)66 Test (org.junit.Test)57 CrateDummyClusterServiceUnitTest (io.crate.test.integration.CrateDummyClusterServiceUnitTest)55 RoutedCollectPhase (io.crate.execution.dsl.phases.RoutedCollectPhase)31 RandomizedTest (com.carrotsearch.randomizedtesting.RandomizedTest)27 GroupProjection (io.crate.execution.dsl.projection.GroupProjection)18 EvalProjection (io.crate.execution.dsl.projection.EvalProjection)16 Merge (io.crate.planner.Merge)16 MergePhase (io.crate.execution.dsl.phases.MergePhase)14 Projection (io.crate.execution.dsl.projection.Projection)12 Reference (io.crate.metadata.Reference)11 FilterProjection (io.crate.execution.dsl.projection.FilterProjection)10 TopNProjection (io.crate.execution.dsl.projection.TopNProjection)10 OrderedTopNProjection (io.crate.execution.dsl.projection.OrderedTopNProjection)9 CollectPhase (io.crate.execution.dsl.phases.CollectPhase)8 Symbol (io.crate.expression.symbol.Symbol)8 AggregationProjection (io.crate.execution.dsl.projection.AggregationProjection)6 QueryThenFetch (io.crate.planner.node.dql.QueryThenFetch)6 ColumnIndexWriterProjection (io.crate.execution.dsl.projection.ColumnIndexWriterProjection)5 InputColumn (io.crate.expression.symbol.InputColumn)5