Search in sources :

Example 46 with InputColumn

use of io.crate.expression.symbol.InputColumn in project crate by crate.

the class CopyFromPlan method planCopyFromExecution.

public static ExecutionPlan planCopyFromExecution(AnalyzedCopyFrom copyFrom, DiscoveryNodes allNodes, PlannerContext context, Row params, SubQueryResults subQueryResults) {
    var boundedCopyFrom = bind(copyFrom, context.transactionContext(), context.nodeContext(), params, subQueryResults);
    /*
         * Create a plan that reads json-objects-lines from a file
         * and then executes upsert requests to index the data
         */
    DocTableInfo table = boundedCopyFrom.tableInfo();
    String partitionIdent = boundedCopyFrom.partitionIdent();
    List<String> partitionedByNames = Collections.emptyList();
    List<String> partitionValues = Collections.emptyList();
    if (partitionIdent == null) {
        if (table.isPartitioned()) {
            partitionedByNames = Lists2.map(table.partitionedBy(), ColumnIdent::fqn);
        }
    } else {
        assert table.isPartitioned() : "table must be partitioned if partitionIdent is set";
        // partitionIdent is present -> possible to index raw source into concrete es index
        partitionValues = PartitionName.decodeIdent(partitionIdent);
    }
    // need to exclude _id columns; they're auto generated and won't be available in the files being imported
    ColumnIdent clusteredBy = table.clusteredBy();
    if (DocSysColumns.ID.equals(clusteredBy)) {
        clusteredBy = null;
    }
    List<Reference> primaryKeyRefs = table.primaryKey().stream().filter(r -> !r.equals(DocSysColumns.ID)).map(table::getReference).collect(Collectors.toList());
    List<Symbol> toCollect = getSymbolsRequiredForShardIdCalc(primaryKeyRefs, table.partitionedByColumns(), clusteredBy == null ? null : table.getReference(clusteredBy));
    Reference rawOrDoc = rawOrDoc(table, partitionIdent);
    final int rawOrDocIdx = toCollect.size();
    toCollect.add(rawOrDoc);
    String[] excludes = partitionedByNames.size() > 0 ? partitionedByNames.toArray(new String[0]) : null;
    InputColumns.SourceSymbols sourceSymbols = new InputColumns.SourceSymbols(toCollect);
    Symbol clusteredByInputCol = null;
    if (clusteredBy != null) {
        clusteredByInputCol = InputColumns.create(table.getReference(clusteredBy), sourceSymbols);
    }
    SourceIndexWriterProjection sourceIndexWriterProjection;
    List<? extends Symbol> projectionOutputs = AbstractIndexWriterProjection.OUTPUTS;
    boolean returnSummary = copyFrom instanceof AnalyzedCopyFromReturnSummary;
    boolean failFast = boundedCopyFrom.settings().getAsBoolean("fail_fast", false);
    if (returnSummary || failFast) {
        final InputColumn sourceUriSymbol = new InputColumn(toCollect.size(), DataTypes.STRING);
        toCollect.add(SourceUriExpression.getReferenceForRelation(table.ident()));
        final InputColumn sourceUriFailureSymbol = new InputColumn(toCollect.size(), DataTypes.STRING);
        toCollect.add(SourceUriFailureExpression.getReferenceForRelation(table.ident()));
        final InputColumn lineNumberSymbol = new InputColumn(toCollect.size(), DataTypes.LONG);
        toCollect.add(SourceLineNumberExpression.getReferenceForRelation(table.ident()));
        if (returnSummary) {
            List<? extends Symbol> fields = ((AnalyzedCopyFromReturnSummary) copyFrom).outputs();
            projectionOutputs = InputColumns.create(fields, new InputColumns.SourceSymbols(fields));
        }
        sourceIndexWriterProjection = new SourceIndexWriterReturnSummaryProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), new InputColumn(rawOrDocIdx, rawOrDoc.valueType()), table.primaryKey(), InputColumns.create(table.partitionedByColumns(), sourceSymbols), clusteredBy, boundedCopyFrom.settings(), null, excludes, InputColumns.create(primaryKeyRefs, sourceSymbols), clusteredByInputCol, projectionOutputs, // autoCreateIndices
        table.isPartitioned(), sourceUriSymbol, sourceUriFailureSymbol, lineNumberSymbol);
    } else {
        sourceIndexWriterProjection = new SourceIndexWriterProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), new InputColumn(rawOrDocIdx, rawOrDoc.valueType()), table.primaryKey(), InputColumns.create(table.partitionedByColumns(), sourceSymbols), clusteredBy, boundedCopyFrom.settings(), null, excludes, InputColumns.create(primaryKeyRefs, sourceSymbols), clusteredByInputCol, projectionOutputs, // autoCreateIndices
        table.isPartitioned());
    }
    // the partitionedBy-inputColumns created for the projection are still valid because the positions are not changed
    if (partitionValues != null) {
        rewriteToCollectToUsePartitionValues(table.partitionedByColumns(), partitionValues, toCollect);
    }
    FileUriCollectPhase collectPhase = new FileUriCollectPhase(context.jobId(), context.nextExecutionPhaseId(), "copyFrom", getExecutionNodes(allNodes, boundedCopyFrom.settings().getAsInt("num_readers", allNodes.getSize()), boundedCopyFrom.nodePredicate()), boundedCopyFrom.uri(), toCollect, Collections.emptyList(), boundedCopyFrom.settings().get("compression", null), boundedCopyFrom.settings().getAsBoolean("shared", null), CopyFromParserProperties.of(boundedCopyFrom.settings()), boundedCopyFrom.inputFormat(), boundedCopyFrom.settings());
    Collect collect = new Collect(collectPhase, TopN.NO_LIMIT, 0, 1, -1, null);
    // add the projection to the plan to ensure that the outputs are correctly set to the projection outputs
    collect.addProjection(sourceIndexWriterProjection);
    List<Projection> handlerProjections;
    if (returnSummary) {
        handlerProjections = Collections.emptyList();
    } else {
        handlerProjections = List.of(MergeCountProjection.INSTANCE);
    }
    return Merge.ensureOnHandler(collect, context, handlerProjections);
}
Also used : DocTableInfo(io.crate.metadata.doc.DocTableInfo) InputColumns(io.crate.execution.dsl.projection.builder.InputColumns) SourceIndexWriterReturnSummaryProjection(io.crate.execution.dsl.projection.SourceIndexWriterReturnSummaryProjection) Collect(io.crate.planner.node.dql.Collect) GeneratedReference(io.crate.metadata.GeneratedReference) Reference(io.crate.metadata.Reference) Symbol(io.crate.expression.symbol.Symbol) SourceIndexWriterProjection(io.crate.execution.dsl.projection.SourceIndexWriterProjection) Projection(io.crate.execution.dsl.projection.Projection) SourceIndexWriterProjection(io.crate.execution.dsl.projection.SourceIndexWriterProjection) MergeCountProjection(io.crate.execution.dsl.projection.MergeCountProjection) AbstractIndexWriterProjection(io.crate.execution.dsl.projection.AbstractIndexWriterProjection) SourceIndexWriterReturnSummaryProjection(io.crate.execution.dsl.projection.SourceIndexWriterReturnSummaryProjection) FileUriCollectPhase(io.crate.execution.dsl.phases.FileUriCollectPhase) ColumnIdent(io.crate.metadata.ColumnIdent) InputColumn(io.crate.expression.symbol.InputColumn) AnalyzedCopyFromReturnSummary(io.crate.analyze.AnalyzedCopyFromReturnSummary)

Example 47 with InputColumn

use of io.crate.expression.symbol.InputColumn in project crate by crate.

the class ProjectionToProjectorVisitorTest method testSortingTopNProjection.

@Test
public void testSortingTopNProjection() throws Exception {
    List<Symbol> outputs = Arrays.asList(Literal.of("foo"), new InputColumn(0), new InputColumn(1));
    OrderedTopNProjection projection = new OrderedTopNProjection(10, 0, outputs, Arrays.asList(new InputColumn(0), new InputColumn(1)), new boolean[] { false, false }, new boolean[] { false, false });
    Projector projector = visitor.create(projection, txnCtx, RamAccounting.NO_ACCOUNTING, memoryManager, UUID.randomUUID());
    assertThat(projector, instanceOf(SortingTopNProjector.class));
}
Also used : Projector(io.crate.data.Projector) SortingProjector(io.crate.execution.engine.sort.SortingProjector) SortingTopNProjector(io.crate.execution.engine.sort.SortingTopNProjector) GroupingProjector(io.crate.execution.engine.aggregation.GroupingProjector) SortingTopNProjector(io.crate.execution.engine.sort.SortingTopNProjector) Symbol(io.crate.expression.symbol.Symbol) InputColumn(io.crate.expression.symbol.InputColumn) OrderedTopNProjection(io.crate.execution.dsl.projection.OrderedTopNProjection) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test)

Example 48 with InputColumn

use of io.crate.expression.symbol.InputColumn in project crate by crate.

the class ProjectionToProjectorVisitorTest method testGroupProjector.

@Test
public void testGroupProjector() throws Exception {
    // in(0)  in(1)      in(0),      in(2)
    // select  race, avg(age), count(race), gender  ... group by race, gender
    List<Symbol> keys = Arrays.asList(new InputColumn(0, DataTypes.STRING), new InputColumn(2, DataTypes.STRING));
    List<Aggregation> aggregations = Arrays.asList(new Aggregation(avgSignature, avgSignature.getReturnType().createType(), Collections.singletonList(new InputColumn(1))), new Aggregation(CountAggregation.SIGNATURE, CountAggregation.SIGNATURE.getReturnType().createType(), Collections.singletonList(new InputColumn(0))));
    GroupProjection projection = new GroupProjection(keys, aggregations, AggregateMode.ITER_FINAL, RowGranularity.CLUSTER);
    Projector projector = visitor.create(projection, txnCtx, RamAccounting.NO_ACCOUNTING, memoryManager, UUID.randomUUID());
    assertThat(projector, instanceOf(GroupingProjector.class));
    // use a topN projection in order to get sorted outputs
    List<Symbol> outputs = Arrays.asList(new InputColumn(0, DataTypes.STRING), new InputColumn(1, DataTypes.STRING), new InputColumn(2, DataTypes.DOUBLE), new InputColumn(3, DataTypes.LONG));
    OrderedTopNProjection topNProjection = new OrderedTopNProjection(10, 0, outputs, List.of(new InputColumn(2, DataTypes.DOUBLE)), new boolean[] { false }, new boolean[] { false });
    Projector topNProjector = visitor.create(topNProjection, txnCtx, RamAccounting.NO_ACCOUNTING, memoryManager, UUID.randomUUID());
    String human = "human";
    String vogon = "vogon";
    String male = "male";
    String female = "female";
    List<Object[]> rows = new ArrayList<>();
    rows.add($(human, 34, male));
    rows.add($(human, 22, female));
    rows.add($(vogon, 40, male));
    rows.add($(vogon, 48, male));
    rows.add($(human, 34, male));
    BatchIterator<Row> batchIterator = topNProjector.apply(projector.apply(InMemoryBatchIterator.of(new CollectionBucket(rows), SENTINEL, true)));
    TestingRowConsumer consumer = new TestingRowConsumer();
    consumer.accept(batchIterator, null);
    Bucket bucket = consumer.getBucket();
    assertThat(bucket, contains(isRow(human, female, 22.0, 1L), isRow(human, male, 34.0, 2L), isRow(vogon, male, 44.0, 2L)));
}
Also used : Projector(io.crate.data.Projector) SortingProjector(io.crate.execution.engine.sort.SortingProjector) SortingTopNProjector(io.crate.execution.engine.sort.SortingTopNProjector) GroupingProjector(io.crate.execution.engine.aggregation.GroupingProjector) Symbol(io.crate.expression.symbol.Symbol) ArrayList(java.util.ArrayList) Aggregation(io.crate.expression.symbol.Aggregation) CountAggregation(io.crate.execution.engine.aggregation.impl.CountAggregation) Bucket(io.crate.data.Bucket) CollectionBucket(io.crate.data.CollectionBucket) InputColumn(io.crate.expression.symbol.InputColumn) GroupingProjector(io.crate.execution.engine.aggregation.GroupingProjector) Row(io.crate.data.Row) TestingHelpers.isRow(io.crate.testing.TestingHelpers.isRow) GroupProjection(io.crate.execution.dsl.projection.GroupProjection) OrderedTopNProjection(io.crate.execution.dsl.projection.OrderedTopNProjection) CollectionBucket(io.crate.data.CollectionBucket) TestingRowConsumer(io.crate.testing.TestingRowConsumer) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test)

Example 49 with InputColumn

use of io.crate.expression.symbol.InputColumn in project crate by crate.

the class IndexWriterProjectorTest method testIndexWriter.

@Test
public void testIndexWriter() throws Throwable {
    execute("create table bulk_import (id int primary key, name string) with (number_of_replicas=0)");
    ensureGreen();
    InputCollectExpression sourceInput = new InputCollectExpression(1);
    List<CollectExpression<Row, ?>> collectExpressions = Collections.<CollectExpression<Row, ?>>singletonList(sourceInput);
    RelationName bulkImportIdent = new RelationName(sqlExecutor.getCurrentSchema(), "bulk_import");
    ClusterState state = clusterService().state();
    Settings tableSettings = TableSettingsResolver.get(state.getMetadata(), bulkImportIdent, false);
    ThreadPool threadPool = internalCluster().getInstance(ThreadPool.class);
    IndexWriterProjector writerProjector = new IndexWriterProjector(clusterService(), new NodeLimits(new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new NoopCircuitBreaker("dummy"), RamAccounting.NO_ACCOUNTING, threadPool.scheduler(), threadPool.executor(ThreadPool.Names.SEARCH), CoordinatorTxnCtx.systemTransactionContext(), new NodeContext(internalCluster().getInstance(Functions.class)), Settings.EMPTY, IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.get(tableSettings), NumberOfReplicas.fromSettings(tableSettings, state.getNodes().getSize()), internalCluster().getInstance(TransportCreatePartitionsAction.class), internalCluster().getInstance(TransportShardUpsertAction.class)::execute, IndexNameResolver.forTable(bulkImportIdent), new Reference(new ReferenceIdent(bulkImportIdent, DocSysColumns.RAW), RowGranularity.DOC, DataTypes.STRING, 0, null), Collections.singletonList(ID_IDENT), Collections.<Symbol>singletonList(new InputColumn(0)), null, null, sourceInput, collectExpressions, 20, null, null, false, false, UUID.randomUUID(), UpsertResultContext.forRowCount(), false);
    BatchIterator rowsIterator = InMemoryBatchIterator.of(IntStream.range(0, 100).mapToObj(i -> new RowN(new Object[] { i, "{\"id\": " + i + ", \"name\": \"Arthur\"}" })).collect(Collectors.toList()), SENTINEL, true);
    TestingRowConsumer consumer = new TestingRowConsumer();
    consumer.accept(writerProjector.apply(rowsIterator), null);
    Bucket objects = consumer.getBucket();
    assertThat(objects, contains(isRow(100L)));
    execute("refresh table bulk_import");
    execute("select count(*) from bulk_import");
    assertThat(response.rowCount(), is(1L));
    assertThat(response.rows()[0][0], is(100L));
}
Also used : TransportCreatePartitionsAction(org.elasticsearch.action.admin.indices.create.TransportCreatePartitionsAction) ClusterState(org.elasticsearch.cluster.ClusterState) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) NodeContext(io.crate.metadata.NodeContext) Reference(io.crate.metadata.Reference) ThreadPool(org.elasticsearch.threadpool.ThreadPool) BatchIterator(io.crate.data.BatchIterator) InMemoryBatchIterator(io.crate.data.InMemoryBatchIterator) CollectExpression(io.crate.execution.engine.collect.CollectExpression) InputCollectExpression(io.crate.execution.engine.collect.InputCollectExpression) ReferenceIdent(io.crate.metadata.ReferenceIdent) RowN(io.crate.data.RowN) InputCollectExpression(io.crate.execution.engine.collect.InputCollectExpression) Bucket(io.crate.data.Bucket) InputColumn(io.crate.expression.symbol.InputColumn) NodeLimits(io.crate.execution.jobs.NodeLimits) RelationName(io.crate.metadata.RelationName) NoopCircuitBreaker(org.elasticsearch.common.breaker.NoopCircuitBreaker) Settings(org.elasticsearch.common.settings.Settings) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) TestingRowConsumer(io.crate.testing.TestingRowConsumer) Test(org.junit.Test)

Example 50 with InputColumn

use of io.crate.expression.symbol.InputColumn in project crate by crate.

the class IndexWriterProjectorUnitTest method testNullPKValue.

@Test
public void testNullPKValue() throws Throwable {
    InputCollectExpression sourceInput = new InputCollectExpression(0);
    List<CollectExpression<Row, ?>> collectExpressions = Collections.<CollectExpression<Row, ?>>singletonList(sourceInput);
    TransportCreatePartitionsAction transportCreatePartitionsAction = mock(TransportCreatePartitionsAction.class);
    IndexWriterProjector indexWriter = new IndexWriterProjector(clusterService, new NodeLimits(new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new NoopCircuitBreaker("dummy"), RamAccounting.NO_ACCOUNTING, scheduler, executor, CoordinatorTxnCtx.systemTransactionContext(), createNodeContext(), Settings.EMPTY, 5, 1, transportCreatePartitionsAction, (request, listener) -> {
    }, IndexNameResolver.forTable(BULK_IMPORT_IDENT), RAW_SOURCE_REFERENCE, Collections.singletonList(ID_IDENT), Collections.<Symbol>singletonList(new InputColumn(1)), null, null, sourceInput, collectExpressions, 20, null, null, false, false, UUID.randomUUID(), UpsertResultContext.forRowCount(), false);
    RowN rowN = new RowN(new Object[] { new BytesRef("{\"y\": \"x\"}"), null });
    BatchIterator<Row> batchIterator = InMemoryBatchIterator.of(Collections.singletonList(rowN), SENTINEL, true);
    batchIterator = indexWriter.apply(batchIterator);
    TestingRowConsumer testingBatchConsumer = new TestingRowConsumer();
    testingBatchConsumer.accept(batchIterator, null);
    List<Object[]> result = testingBatchConsumer.getResult();
    // Zero affected rows as a NULL as a PK value will result in an exception.
    // It must never bubble up as other rows might already have been written.
    assertThat(result.get(0)[0], is(0L));
}
Also used : TransportCreatePartitionsAction(org.elasticsearch.action.admin.indices.create.TransportCreatePartitionsAction) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) CollectExpression(io.crate.execution.engine.collect.CollectExpression) InputCollectExpression(io.crate.execution.engine.collect.InputCollectExpression) RowN(io.crate.data.RowN) InputCollectExpression(io.crate.execution.engine.collect.InputCollectExpression) InputColumn(io.crate.expression.symbol.InputColumn) NodeLimits(io.crate.execution.jobs.NodeLimits) Row(io.crate.data.Row) NoopCircuitBreaker(org.elasticsearch.common.breaker.NoopCircuitBreaker) BytesRef(org.apache.lucene.util.BytesRef) TestingRowConsumer(io.crate.testing.TestingRowConsumer) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test)

Aggregations

InputColumn (io.crate.expression.symbol.InputColumn)61 Test (org.junit.Test)47 Symbol (io.crate.expression.symbol.Symbol)38 CrateDummyClusterServiceUnitTest (io.crate.test.integration.CrateDummyClusterServiceUnitTest)25 Reference (io.crate.metadata.Reference)15 BytesStreamOutput (org.elasticsearch.common.io.stream.BytesStreamOutput)11 StreamInput (org.elasticsearch.common.io.stream.StreamInput)11 MergePhase (io.crate.execution.dsl.phases.MergePhase)10 GroupProjection (io.crate.execution.dsl.projection.GroupProjection)10 FilterProjection (io.crate.execution.dsl.projection.FilterProjection)9 OrderedTopNProjection (io.crate.execution.dsl.projection.OrderedTopNProjection)9 Aggregation (io.crate.expression.symbol.Aggregation)9 Function (io.crate.expression.symbol.Function)9 ArrayList (java.util.ArrayList)9 TopNProjection (io.crate.execution.dsl.projection.TopNProjection)8 RandomizedTest (com.carrotsearch.randomizedtesting.RandomizedTest)7 Row (io.crate.data.Row)7 EvalProjection (io.crate.execution.dsl.projection.EvalProjection)7 Projection (io.crate.execution.dsl.projection.Projection)7 CountAggregation (io.crate.execution.engine.aggregation.impl.CountAggregation)7