Search in sources :

Example 1 with FileUriCollectPhase

use of io.crate.execution.dsl.phases.FileUriCollectPhase in project crate by crate.

the class CopyFromPlannerTest method testCopyFromNumReadersSetting.

@Test
public void testCopyFromNumReadersSetting() {
    Collect plan = plan("copy users from '/path/to/file.extension' with (num_readers=1)");
    assertThat(plan.collectPhase(), instanceOf(FileUriCollectPhase.class));
    FileUriCollectPhase collectPhase = (FileUriCollectPhase) plan.collectPhase();
    assertThat(collectPhase.nodeIds().size(), is(1));
}
Also used : Collect(io.crate.planner.node.dql.Collect) FileUriCollectPhase(io.crate.execution.dsl.phases.FileUriCollectPhase) Test(org.junit.Test) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest)

Example 2 with FileUriCollectPhase

use of io.crate.execution.dsl.phases.FileUriCollectPhase in project crate by crate.

the class CopyFromPlannerTest method testCopyFromPlanWithParameters.

@Test
public void testCopyFromPlanWithParameters() {
    Collect collect = plan("copy users " + "from '/path/to/file.ext' with (bulk_size=30, compression='gzip', shared=true, fail_fast = true, protocol = 'http')");
    assertThat(collect.collectPhase(), instanceOf(FileUriCollectPhase.class));
    FileUriCollectPhase collectPhase = (FileUriCollectPhase) collect.collectPhase();
    SourceIndexWriterProjection indexWriterProjection = (SourceIndexWriterProjection) collectPhase.projections().get(0);
    assertThat(indexWriterProjection.bulkActions(), is(30));
    assertThat(collectPhase.compression(), is("gzip"));
    assertThat(collectPhase.sharedStorage(), is(true));
    assertThat(indexWriterProjection.failFast(), is(true));
    assertThat(collectPhase.withClauseOptions().get("protocol"), is("http"));
    // verify defaults:
    collect = plan("copy users from '/path/to/file.ext'");
    collectPhase = (FileUriCollectPhase) collect.collectPhase();
    indexWriterProjection = (SourceIndexWriterProjection) collectPhase.projections().get(0);
    assertThat(collectPhase.compression(), is(nullValue()));
    assertThat(collectPhase.sharedStorage(), is(nullValue()));
    assertThat(indexWriterProjection.failFast(), is(false));
    assertThat(collectPhase.withClauseOptions(), is(Settings.EMPTY));
}
Also used : Collect(io.crate.planner.node.dql.Collect) SourceIndexWriterProjection(io.crate.execution.dsl.projection.SourceIndexWriterProjection) FileUriCollectPhase(io.crate.execution.dsl.phases.FileUriCollectPhase) Test(org.junit.Test) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest)

Example 3 with FileUriCollectPhase

use of io.crate.execution.dsl.phases.FileUriCollectPhase in project crate by crate.

the class MapSideDataCollectOperationTest method testFileUriCollect.

@Test
public void testFileUriCollect() throws Exception {
    FileCollectSource fileCollectSource = new FileCollectSource(createNodeContext(), clusterService, Collections.emptyMap());
    File tmpFile = temporaryFolder.newFile("fileUriCollectOperation.json");
    try (OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tmpFile), StandardCharsets.UTF_8)) {
        writer.write("{\"name\": \"Arthur\", \"id\": 4, \"details\": {\"age\": 38}}\n");
        writer.write("{\"id\": 5, \"name\": \"Trillian\", \"details\": {\"age\": 33}}\n");
    }
    FileUriCollectPhase collectNode = new FileUriCollectPhase(UUID.randomUUID(), 0, "test", Collections.singletonList("noop_id"), Literal.of(Paths.get(tmpFile.toURI()).toUri().toString()), Arrays.asList(createReference("name", DataTypes.STRING), createReference(new ColumnIdent("details", "age"), DataTypes.INTEGER)), Collections.emptyList(), null, false, CopyFromParserProperties.DEFAULT, FileUriCollectPhase.InputFormat.JSON, Settings.EMPTY);
    TestingRowConsumer consumer = new TestingRowConsumer();
    CollectTask collectTask = mock(CollectTask.class);
    BatchIterator<Row> iterator = fileCollectSource.getIterator(CoordinatorTxnCtx.systemTransactionContext(), collectNode, collectTask, false).get(5, TimeUnit.SECONDS);
    consumer.accept(iterator, null);
    assertThat(new CollectionBucket(consumer.getResult()), contains(isRow("Arthur", 38), isRow("Trillian", 33)));
}
Also used : ColumnIdent(io.crate.metadata.ColumnIdent) FileCollectSource(io.crate.execution.engine.collect.sources.FileCollectSource) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) FileUriCollectPhase(io.crate.execution.dsl.phases.FileUriCollectPhase) TestingHelpers.isRow(io.crate.testing.TestingHelpers.isRow) Row(io.crate.data.Row) File(java.io.File) TestingRowConsumer(io.crate.testing.TestingRowConsumer) CollectionBucket(io.crate.data.CollectionBucket) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test)

Example 4 with FileUriCollectPhase

use of io.crate.execution.dsl.phases.FileUriCollectPhase in project crate by crate.

the class FileCollectSource method getIterator.

@Override
public CompletableFuture<BatchIterator<Row>> getIterator(TransactionContext txnCtx, CollectPhase collectPhase, CollectTask collectTask, boolean supportMoveToStart) {
    FileUriCollectPhase fileUriCollectPhase = (FileUriCollectPhase) collectPhase;
    InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(txnCtx, FileLineReferenceResolver::getImplementation);
    ctx.add(collectPhase.toCollect());
    List<String> fileUris = targetUriToStringList(txnCtx, nodeCtx, fileUriCollectPhase.targetUri());
    return CompletableFuture.completedFuture(FileReadingIterator.newInstance(fileUris, ctx.topLevelInputs(), ctx.expressions(), fileUriCollectPhase.compression(), fileInputFactoryMap, fileUriCollectPhase.sharedStorage(), fileUriCollectPhase.nodeIds().size(), getReaderNumber(fileUriCollectPhase.nodeIds(), clusterService.state().nodes().getLocalNodeId()), fileUriCollectPhase.parserProperties(), fileUriCollectPhase.inputFormat(), fileUriCollectPhase.withClauseOptions()));
}
Also used : FileInputFactory(io.crate.execution.engine.collect.files.FileInputFactory) InputFactory(io.crate.expression.InputFactory) LineCollectorExpression(io.crate.execution.engine.collect.files.LineCollectorExpression) FileLineReferenceResolver(io.crate.expression.reference.file.FileLineReferenceResolver) FileUriCollectPhase(io.crate.execution.dsl.phases.FileUriCollectPhase)

Example 5 with FileUriCollectPhase

use of io.crate.execution.dsl.phases.FileUriCollectPhase in project crate by crate.

the class CopyFromPlan method planCopyFromExecution.

public static ExecutionPlan planCopyFromExecution(AnalyzedCopyFrom copyFrom, DiscoveryNodes allNodes, PlannerContext context, Row params, SubQueryResults subQueryResults) {
    var boundedCopyFrom = bind(copyFrom, context.transactionContext(), context.nodeContext(), params, subQueryResults);
    /*
         * Create a plan that reads json-objects-lines from a file
         * and then executes upsert requests to index the data
         */
    DocTableInfo table = boundedCopyFrom.tableInfo();
    String partitionIdent = boundedCopyFrom.partitionIdent();
    List<String> partitionedByNames = Collections.emptyList();
    List<String> partitionValues = Collections.emptyList();
    if (partitionIdent == null) {
        if (table.isPartitioned()) {
            partitionedByNames = Lists2.map(table.partitionedBy(), ColumnIdent::fqn);
        }
    } else {
        assert table.isPartitioned() : "table must be partitioned if partitionIdent is set";
        // partitionIdent is present -> possible to index raw source into concrete es index
        partitionValues = PartitionName.decodeIdent(partitionIdent);
    }
    // need to exclude _id columns; they're auto generated and won't be available in the files being imported
    ColumnIdent clusteredBy = table.clusteredBy();
    if (DocSysColumns.ID.equals(clusteredBy)) {
        clusteredBy = null;
    }
    List<Reference> primaryKeyRefs = table.primaryKey().stream().filter(r -> !r.equals(DocSysColumns.ID)).map(table::getReference).collect(Collectors.toList());
    List<Symbol> toCollect = getSymbolsRequiredForShardIdCalc(primaryKeyRefs, table.partitionedByColumns(), clusteredBy == null ? null : table.getReference(clusteredBy));
    Reference rawOrDoc = rawOrDoc(table, partitionIdent);
    final int rawOrDocIdx = toCollect.size();
    toCollect.add(rawOrDoc);
    String[] excludes = partitionedByNames.size() > 0 ? partitionedByNames.toArray(new String[0]) : null;
    InputColumns.SourceSymbols sourceSymbols = new InputColumns.SourceSymbols(toCollect);
    Symbol clusteredByInputCol = null;
    if (clusteredBy != null) {
        clusteredByInputCol = InputColumns.create(table.getReference(clusteredBy), sourceSymbols);
    }
    SourceIndexWriterProjection sourceIndexWriterProjection;
    List<? extends Symbol> projectionOutputs = AbstractIndexWriterProjection.OUTPUTS;
    boolean returnSummary = copyFrom instanceof AnalyzedCopyFromReturnSummary;
    boolean failFast = boundedCopyFrom.settings().getAsBoolean("fail_fast", false);
    if (returnSummary || failFast) {
        final InputColumn sourceUriSymbol = new InputColumn(toCollect.size(), DataTypes.STRING);
        toCollect.add(SourceUriExpression.getReferenceForRelation(table.ident()));
        final InputColumn sourceUriFailureSymbol = new InputColumn(toCollect.size(), DataTypes.STRING);
        toCollect.add(SourceUriFailureExpression.getReferenceForRelation(table.ident()));
        final InputColumn lineNumberSymbol = new InputColumn(toCollect.size(), DataTypes.LONG);
        toCollect.add(SourceLineNumberExpression.getReferenceForRelation(table.ident()));
        if (returnSummary) {
            List<? extends Symbol> fields = ((AnalyzedCopyFromReturnSummary) copyFrom).outputs();
            projectionOutputs = InputColumns.create(fields, new InputColumns.SourceSymbols(fields));
        }
        sourceIndexWriterProjection = new SourceIndexWriterReturnSummaryProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), new InputColumn(rawOrDocIdx, rawOrDoc.valueType()), table.primaryKey(), InputColumns.create(table.partitionedByColumns(), sourceSymbols), clusteredBy, boundedCopyFrom.settings(), null, excludes, InputColumns.create(primaryKeyRefs, sourceSymbols), clusteredByInputCol, projectionOutputs, // autoCreateIndices
        table.isPartitioned(), sourceUriSymbol, sourceUriFailureSymbol, lineNumberSymbol);
    } else {
        sourceIndexWriterProjection = new SourceIndexWriterProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), new InputColumn(rawOrDocIdx, rawOrDoc.valueType()), table.primaryKey(), InputColumns.create(table.partitionedByColumns(), sourceSymbols), clusteredBy, boundedCopyFrom.settings(), null, excludes, InputColumns.create(primaryKeyRefs, sourceSymbols), clusteredByInputCol, projectionOutputs, // autoCreateIndices
        table.isPartitioned());
    }
    // the partitionedBy-inputColumns created for the projection are still valid because the positions are not changed
    if (partitionValues != null) {
        rewriteToCollectToUsePartitionValues(table.partitionedByColumns(), partitionValues, toCollect);
    }
    FileUriCollectPhase collectPhase = new FileUriCollectPhase(context.jobId(), context.nextExecutionPhaseId(), "copyFrom", getExecutionNodes(allNodes, boundedCopyFrom.settings().getAsInt("num_readers", allNodes.getSize()), boundedCopyFrom.nodePredicate()), boundedCopyFrom.uri(), toCollect, Collections.emptyList(), boundedCopyFrom.settings().get("compression", null), boundedCopyFrom.settings().getAsBoolean("shared", null), CopyFromParserProperties.of(boundedCopyFrom.settings()), boundedCopyFrom.inputFormat(), boundedCopyFrom.settings());
    Collect collect = new Collect(collectPhase, TopN.NO_LIMIT, 0, 1, -1, null);
    // add the projection to the plan to ensure that the outputs are correctly set to the projection outputs
    collect.addProjection(sourceIndexWriterProjection);
    List<Projection> handlerProjections;
    if (returnSummary) {
        handlerProjections = Collections.emptyList();
    } else {
        handlerProjections = List.of(MergeCountProjection.INSTANCE);
    }
    return Merge.ensureOnHandler(collect, context, handlerProjections);
}
Also used : DocTableInfo(io.crate.metadata.doc.DocTableInfo) InputColumns(io.crate.execution.dsl.projection.builder.InputColumns) SourceIndexWriterReturnSummaryProjection(io.crate.execution.dsl.projection.SourceIndexWriterReturnSummaryProjection) Collect(io.crate.planner.node.dql.Collect) GeneratedReference(io.crate.metadata.GeneratedReference) Reference(io.crate.metadata.Reference) Symbol(io.crate.expression.symbol.Symbol) SourceIndexWriterProjection(io.crate.execution.dsl.projection.SourceIndexWriterProjection) Projection(io.crate.execution.dsl.projection.Projection) SourceIndexWriterProjection(io.crate.execution.dsl.projection.SourceIndexWriterProjection) MergeCountProjection(io.crate.execution.dsl.projection.MergeCountProjection) AbstractIndexWriterProjection(io.crate.execution.dsl.projection.AbstractIndexWriterProjection) SourceIndexWriterReturnSummaryProjection(io.crate.execution.dsl.projection.SourceIndexWriterReturnSummaryProjection) FileUriCollectPhase(io.crate.execution.dsl.phases.FileUriCollectPhase) ColumnIdent(io.crate.metadata.ColumnIdent) InputColumn(io.crate.expression.symbol.InputColumn) AnalyzedCopyFromReturnSummary(io.crate.analyze.AnalyzedCopyFromReturnSummary)

Aggregations

FileUriCollectPhase (io.crate.execution.dsl.phases.FileUriCollectPhase)6 Collect (io.crate.planner.node.dql.Collect)4 CrateDummyClusterServiceUnitTest (io.crate.test.integration.CrateDummyClusterServiceUnitTest)4 Test (org.junit.Test)4 SourceIndexWriterProjection (io.crate.execution.dsl.projection.SourceIndexWriterProjection)2 ColumnIdent (io.crate.metadata.ColumnIdent)2 AnalyzedCopyFromReturnSummary (io.crate.analyze.AnalyzedCopyFromReturnSummary)1 CollectionBucket (io.crate.data.CollectionBucket)1 Row (io.crate.data.Row)1 AbstractIndexWriterProjection (io.crate.execution.dsl.projection.AbstractIndexWriterProjection)1 MergeCountProjection (io.crate.execution.dsl.projection.MergeCountProjection)1 Projection (io.crate.execution.dsl.projection.Projection)1 SourceIndexWriterReturnSummaryProjection (io.crate.execution.dsl.projection.SourceIndexWriterReturnSummaryProjection)1 InputColumns (io.crate.execution.dsl.projection.builder.InputColumns)1 FileInputFactory (io.crate.execution.engine.collect.files.FileInputFactory)1 LineCollectorExpression (io.crate.execution.engine.collect.files.LineCollectorExpression)1 FileCollectSource (io.crate.execution.engine.collect.sources.FileCollectSource)1 InputFactory (io.crate.expression.InputFactory)1 FileLineReferenceResolver (io.crate.expression.reference.file.FileLineReferenceResolver)1 InputColumn (io.crate.expression.symbol.InputColumn)1