Examples with InputColumn - io.crate.expression.symbol.InputColumn

Example 46 with InputColumn

use of io.crate.expression.symbol.InputColumn in project crate by crate.

the class CopyFromPlan method planCopyFromExecution.

public static ExecutionPlan planCopyFromExecution(AnalyzedCopyFrom copyFrom, DiscoveryNodes allNodes, PlannerContext context, Row params, SubQueryResults subQueryResults) {
    var boundedCopyFrom = bind(copyFrom, context.transactionContext(), context.nodeContext(), params, subQueryResults);
    /*
         * Create a plan that reads json-objects-lines from a file
         * and then executes upsert requests to index the data
         */
    DocTableInfo table = boundedCopyFrom.tableInfo();
    String partitionIdent = boundedCopyFrom.partitionIdent();
    List<String> partitionedByNames = Collections.emptyList();
    List<String> partitionValues = Collections.emptyList();
    if (partitionIdent == null) {
        if (table.isPartitioned()) {
            partitionedByNames = Lists2.map(table.partitionedBy(), ColumnIdent::fqn);
        }
    } else {
        assert table.isPartitioned() : "table must be partitioned if partitionIdent is set";
        // partitionIdent is present -> possible to index raw source into concrete es index
        partitionValues = PartitionName.decodeIdent(partitionIdent);
    }
    // need to exclude _id columns; they're auto generated and won't be available in the files being imported
    ColumnIdent clusteredBy = table.clusteredBy();
    if (DocSysColumns.ID.equals(clusteredBy)) {
        clusteredBy = null;
    }
    List<Reference> primaryKeyRefs = table.primaryKey().stream().filter(r -> !r.equals(DocSysColumns.ID)).map(table::getReference).collect(Collectors.toList());
    List<Symbol> toCollect = getSymbolsRequiredForShardIdCalc(primaryKeyRefs, table.partitionedByColumns(), clusteredBy == null ? null : table.getReference(clusteredBy));
    Reference rawOrDoc = rawOrDoc(table, partitionIdent);
    final int rawOrDocIdx = toCollect.size();
    toCollect.add(rawOrDoc);
    String[] excludes = partitionedByNames.size() > 0 ? partitionedByNames.toArray(new String[0]) : null;
    InputColumns.SourceSymbols sourceSymbols = new InputColumns.SourceSymbols(toCollect);
    Symbol clusteredByInputCol = null;
    if (clusteredBy != null) {
        clusteredByInputCol = InputColumns.create(table.getReference(clusteredBy), sourceSymbols);
    }
    SourceIndexWriterProjection sourceIndexWriterProjection;
    List<? extends Symbol> projectionOutputs = AbstractIndexWriterProjection.OUTPUTS;
    boolean returnSummary = copyFrom instanceof AnalyzedCopyFromReturnSummary;
    boolean failFast = boundedCopyFrom.settings().getAsBoolean("fail_fast", false);
    if (returnSummary || failFast) {
        final InputColumn sourceUriSymbol = new InputColumn(toCollect.size(), DataTypes.STRING);
        toCollect.add(SourceUriExpression.getReferenceForRelation(table.ident()));
        final InputColumn sourceUriFailureSymbol = new InputColumn(toCollect.size(), DataTypes.STRING);
        toCollect.add(SourceUriFailureExpression.getReferenceForRelation(table.ident()));
        final InputColumn lineNumberSymbol = new InputColumn(toCollect.size(), DataTypes.LONG);
        toCollect.add(SourceLineNumberExpression.getReferenceForRelation(table.ident()));
        if (returnSummary) {
            List<? extends Symbol> fields = ((AnalyzedCopyFromReturnSummary) copyFrom).outputs();
            projectionOutputs = InputColumns.create(fields, new InputColumns.SourceSymbols(fields));
        }
        sourceIndexWriterProjection = new SourceIndexWriterReturnSummaryProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), new InputColumn(rawOrDocIdx, rawOrDoc.valueType()), table.primaryKey(), InputColumns.create(table.partitionedByColumns(), sourceSymbols), clusteredBy, boundedCopyFrom.settings(), null, excludes, InputColumns.create(primaryKeyRefs, sourceSymbols), clusteredByInputCol, projectionOutputs, // autoCreateIndices
        table.isPartitioned(), sourceUriSymbol, sourceUriFailureSymbol, lineNumberSymbol);
    } else {
        sourceIndexWriterProjection = new SourceIndexWriterProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), new InputColumn(rawOrDocIdx, rawOrDoc.valueType()), table.primaryKey(), InputColumns.create(table.partitionedByColumns(), sourceSymbols), clusteredBy, boundedCopyFrom.settings(), null, excludes, InputColumns.create(primaryKeyRefs, sourceSymbols), clusteredByInputCol, projectionOutputs, // autoCreateIndices
        table.isPartitioned());
    }
    // the partitionedBy-inputColumns created for the projection are still valid because the positions are not changed
    if (partitionValues != null) {
        rewriteToCollectToUsePartitionValues(table.partitionedByColumns(), partitionValues, toCollect);
    }
    FileUriCollectPhase collectPhase = new FileUriCollectPhase(context.jobId(), context.nextExecutionPhaseId(), "copyFrom", getExecutionNodes(allNodes, boundedCopyFrom.settings().getAsInt("num_readers", allNodes.getSize()), boundedCopyFrom.nodePredicate()), boundedCopyFrom.uri(), toCollect, Collections.emptyList(), boundedCopyFrom.settings().get("compression", null), boundedCopyFrom.settings().getAsBoolean("shared", null), CopyFromParserProperties.of(boundedCopyFrom.settings()), boundedCopyFrom.inputFormat(), boundedCopyFrom.settings());
    Collect collect = new Collect(collectPhase, TopN.NO_LIMIT, 0, 1, -1, null);
    // add the projection to the plan to ensure that the outputs are correctly set to the projection outputs
    collect.addProjection(sourceIndexWriterProjection);
    List<Projection> handlerProjections;
    if (returnSummary) {
        handlerProjections = Collections.emptyList();
    } else {
        handlerProjections = List.of(MergeCountProjection.INSTANCE);
    }
    return Merge.ensureOnHandler(collect, context, handlerProjections);
}

Also used : DocTableInfo(io.crate.metadata.doc.DocTableInfo) InputColumns(io.crate.execution.dsl.projection.builder.InputColumns) SourceIndexWriterReturnSummaryProjection(io.crate.execution.dsl.projection.SourceIndexWriterReturnSummaryProjection) Collect(io.crate.planner.node.dql.Collect) GeneratedReference(io.crate.metadata.GeneratedReference) Reference(io.crate.metadata.Reference) Symbol(io.crate.expression.symbol.Symbol) SourceIndexWriterProjection(io.crate.execution.dsl.projection.SourceIndexWriterProjection) Projection(io.crate.execution.dsl.projection.Projection) SourceIndexWriterProjection(io.crate.execution.dsl.projection.SourceIndexWriterProjection) MergeCountProjection(io.crate.execution.dsl.projection.MergeCountProjection) AbstractIndexWriterProjection(io.crate.execution.dsl.projection.AbstractIndexWriterProjection) SourceIndexWriterReturnSummaryProjection(io.crate.execution.dsl.projection.SourceIndexWriterReturnSummaryProjection) FileUriCollectPhase(io.crate.execution.dsl.phases.FileUriCollectPhase) ColumnIdent(io.crate.metadata.ColumnIdent) InputColumn(io.crate.expression.symbol.InputColumn) AnalyzedCopyFromReturnSummary(io.crate.analyze.AnalyzedCopyFromReturnSummary)

Example 47 with InputColumn

use of io.crate.expression.symbol.InputColumn in project crate by crate.

the class ProjectionToProjectorVisitorTest method testSortingTopNProjection.

@Test
public void testSortingTopNProjection() throws Exception {
    List<Symbol> outputs = Arrays.asList(Literal.of("foo"), new InputColumn(0), new InputColumn(1));
    OrderedTopNProjection projection = new OrderedTopNProjection(10, 0, outputs, Arrays.asList(new InputColumn(0), new InputColumn(1)), new boolean[] { false, false }, new boolean[] { false, false });
    Projector projector = visitor.create(projection, txnCtx, RamAccounting.NO_ACCOUNTING, memoryManager, UUID.randomUUID());
    assertThat(projector, instanceOf(SortingTopNProjector.class));
}

Example 48 with InputColumn

use of io.crate.expression.symbol.InputColumn in project crate by crate.

the class ProjectionToProjectorVisitorTest method testGroupProjector.

@Test
public void testGroupProjector() throws Exception {
    // in(0)  in(1)      in(0),      in(2)
    // select  race, avg(age), count(race), gender  ... group by race, gender
    List<Symbol> keys = Arrays.asList(new InputColumn(0, DataTypes.STRING), new InputColumn(2, DataTypes.STRING));
    List<Aggregation> aggregations = Arrays.asList(new Aggregation(avgSignature, avgSignature.getReturnType().createType(), Collections.singletonList(new InputColumn(1))), new Aggregation(CountAggregation.SIGNATURE, CountAggregation.SIGNATURE.getReturnType().createType(), Collections.singletonList(new InputColumn(0))));
    GroupProjection projection = new GroupProjection(keys, aggregations, AggregateMode.ITER_FINAL, RowGranularity.CLUSTER);
    Projector projector = visitor.create(projection, txnCtx, RamAccounting.NO_ACCOUNTING, memoryManager, UUID.randomUUID());
    assertThat(projector, instanceOf(GroupingProjector.class));
    // use a topN projection in order to get sorted outputs
    List<Symbol> outputs = Arrays.asList(new InputColumn(0, DataTypes.STRING), new InputColumn(1, DataTypes.STRING), new InputColumn(2, DataTypes.DOUBLE), new InputColumn(3, DataTypes.LONG));
    OrderedTopNProjection topNProjection = new OrderedTopNProjection(10, 0, outputs, List.of(new InputColumn(2, DataTypes.DOUBLE)), new boolean[] { false }, new boolean[] { false });
    Projector topNProjector = visitor.create(topNProjection, txnCtx, RamAccounting.NO_ACCOUNTING, memoryManager, UUID.randomUUID());
    String human = "human";
    String vogon = "vogon";
    String male = "male";
    String female = "female";
    List<Object[]> rows = new ArrayList<>();
    rows.add($(human, 34, male));
    rows.add($(human, 22, female));
    rows.add($(vogon, 40, male));
    rows.add($(vogon, 48, male));
    rows.add($(human, 34, male));
    BatchIterator<Row> batchIterator = topNProjector.apply(projector.apply(InMemoryBatchIterator.of(new CollectionBucket(rows), SENTINEL, true)));
    TestingRowConsumer consumer = new TestingRowConsumer();
    consumer.accept(batchIterator, null);
    Bucket bucket = consumer.getBucket();
    assertThat(bucket, contains(isRow(human, female, 22.0, 1L), isRow(human, male, 34.0, 2L), isRow(vogon, male, 44.0, 2L)));
}

Also used : Projector(io.crate.data.Projector) SortingProjector(io.crate.execution.engine.sort.SortingProjector) SortingTopNProjector(io.crate.execution.engine.sort.SortingTopNProjector) GroupingProjector(io.crate.execution.engine.aggregation.GroupingProjector) Symbol(io.crate.expression.symbol.Symbol) ArrayList(java.util.ArrayList) Aggregation(io.crate.expression.symbol.Aggregation) CountAggregation(io.crate.execution.engine.aggregation.impl.CountAggregation) Bucket(io.crate.data.Bucket) CollectionBucket(io.crate.data.CollectionBucket) InputColumn(io.crate.expression.symbol.InputColumn) GroupingProjector(io.crate.execution.engine.aggregation.GroupingProjector) Row(io.crate.data.Row) TestingHelpers.isRow(io.crate.testing.TestingHelpers.isRow) GroupProjection(io.crate.execution.dsl.projection.GroupProjection) OrderedTopNProjection(io.crate.execution.dsl.projection.OrderedTopNProjection) CollectionBucket(io.crate.data.CollectionBucket) TestingRowConsumer(io.crate.testing.TestingRowConsumer) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test)

Example 49 with InputColumn

use of io.crate.expression.symbol.InputColumn in project crate by crate.

the class IndexWriterProjectorTest method testIndexWriter.

@Test
public void testIndexWriter() throws Throwable {
    execute("create table bulk_import (id int primary key, name string) with (number_of_replicas=0)");
    ensureGreen();
    InputCollectExpression sourceInput = new InputCollectExpression(1);
    List<CollectExpression<Row, ?>> collectExpressions = Collections.<CollectExpression<Row, ?>>singletonList(sourceInput);
    RelationName bulkImportIdent = new RelationName(sqlExecutor.getCurrentSchema(), "bulk_import");
    ClusterState state = clusterService().state();
    Settings tableSettings = TableSettingsResolver.get(state.getMetadata(), bulkImportIdent, false);
    ThreadPool threadPool = internalCluster().getInstance(ThreadPool.class);
    IndexWriterProjector writerProjector = new IndexWriterProjector(clusterService(), new NodeLimits(new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new NoopCircuitBreaker("dummy"), RamAccounting.NO_ACCOUNTING, threadPool.scheduler(), threadPool.executor(ThreadPool.Names.SEARCH), CoordinatorTxnCtx.systemTransactionContext(), new NodeContext(internalCluster().getInstance(Functions.class)), Settings.EMPTY, IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.get(tableSettings), NumberOfReplicas.fromSettings(tableSettings, state.getNodes().getSize()), internalCluster().getInstance(TransportCreatePartitionsAction.class), internalCluster().getInstance(TransportShardUpsertAction.class)::execute, IndexNameResolver.forTable(bulkImportIdent), new Reference(new ReferenceIdent(bulkImportIdent, DocSysColumns.RAW), RowGranularity.DOC, DataTypes.STRING, 0, null), Collections.singletonList(ID_IDENT), Collections.<Symbol>singletonList(new InputColumn(0)), null, null, sourceInput, collectExpressions, 20, null, null, false, false, UUID.randomUUID(), UpsertResultContext.forRowCount(), false);
    BatchIterator rowsIterator = InMemoryBatchIterator.of(IntStream.range(0, 100).mapToObj(i -> new RowN(new Object[] { i, "{\"id\": " + i + ", \"name\": \"Arthur\"}" })).collect(Collectors.toList()), SENTINEL, true);
    TestingRowConsumer consumer = new TestingRowConsumer();
    consumer.accept(writerProjector.apply(rowsIterator), null);
    Bucket objects = consumer.getBucket();
    assertThat(objects, contains(isRow(100L)));
    execute("refresh table bulk_import");
    execute("select count(*) from bulk_import");
    assertThat(response.rowCount(), is(1L));
    assertThat(response.rows()[0][0], is(100L));
}

Also used : TransportCreatePartitionsAction(org.elasticsearch.action.admin.indices.create.TransportCreatePartitionsAction) ClusterState(org.elasticsearch.cluster.ClusterState) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) NodeContext(io.crate.metadata.NodeContext) Reference(io.crate.metadata.Reference) ThreadPool(org.elasticsearch.threadpool.ThreadPool) BatchIterator(io.crate.data.BatchIterator) InMemoryBatchIterator(io.crate.data.InMemoryBatchIterator) CollectExpression(io.crate.execution.engine.collect.CollectExpression) InputCollectExpression(io.crate.execution.engine.collect.InputCollectExpression) ReferenceIdent(io.crate.metadata.ReferenceIdent) RowN(io.crate.data.RowN) InputCollectExpression(io.crate.execution.engine.collect.InputCollectExpression) Bucket(io.crate.data.Bucket) InputColumn(io.crate.expression.symbol.InputColumn) NodeLimits(io.crate.execution.jobs.NodeLimits) RelationName(io.crate.metadata.RelationName) NoopCircuitBreaker(org.elasticsearch.common.breaker.NoopCircuitBreaker) Settings(org.elasticsearch.common.settings.Settings) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) TestingRowConsumer(io.crate.testing.TestingRowConsumer) Test(org.junit.Test)

Example 50 with InputColumn

use of io.crate.expression.symbol.InputColumn in project crate by crate.

the class IndexWriterProjectorUnitTest method testNullPKValue.

@Test
public void testNullPKValue() throws Throwable {
    InputCollectExpression sourceInput = new InputCollectExpression(0);
    List<CollectExpression<Row, ?>> collectExpressions = Collections.<CollectExpression<Row, ?>>singletonList(sourceInput);
    TransportCreatePartitionsAction transportCreatePartitionsAction = mock(TransportCreatePartitionsAction.class);
    IndexWriterProjector indexWriter = new IndexWriterProjector(clusterService, new NodeLimits(new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new NoopCircuitBreaker("dummy"), RamAccounting.NO_ACCOUNTING, scheduler, executor, CoordinatorTxnCtx.systemTransactionContext(), createNodeContext(), Settings.EMPTY, 5, 1, transportCreatePartitionsAction, (request, listener) -> {
    }, IndexNameResolver.forTable(BULK_IMPORT_IDENT), RAW_SOURCE_REFERENCE, Collections.singletonList(ID_IDENT), Collections.<Symbol>singletonList(new InputColumn(1)), null, null, sourceInput, collectExpressions, 20, null, null, false, false, UUID.randomUUID(), UpsertResultContext.forRowCount(), false);
    RowN rowN = new RowN(new Object[] { new BytesRef("{\"y\": \"x\"}"), null });
    BatchIterator<Row> batchIterator = InMemoryBatchIterator.of(Collections.singletonList(rowN), SENTINEL, true);
    batchIterator = indexWriter.apply(batchIterator);
    TestingRowConsumer testingBatchConsumer = new TestingRowConsumer();
    testingBatchConsumer.accept(batchIterator, null);
    List<Object[]> result = testingBatchConsumer.getResult();
    // Zero affected rows as a NULL as a PK value will result in an exception.
    // It must never bubble up as other rows might already have been written.
    assertThat(result.get(0)[0], is(0L));
}

Also used : TransportCreatePartitionsAction(org.elasticsearch.action.admin.indices.create.TransportCreatePartitionsAction) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) CollectExpression(io.crate.execution.engine.collect.CollectExpression) InputCollectExpression(io.crate.execution.engine.collect.InputCollectExpression) RowN(io.crate.data.RowN) InputCollectExpression(io.crate.execution.engine.collect.InputCollectExpression) InputColumn(io.crate.expression.symbol.InputColumn) NodeLimits(io.crate.execution.jobs.NodeLimits) Row(io.crate.data.Row) NoopCircuitBreaker(org.elasticsearch.common.breaker.NoopCircuitBreaker) BytesRef(org.apache.lucene.util.BytesRef) TestingRowConsumer(io.crate.testing.TestingRowConsumer) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test)

Aggregations

InputColumn (io.crate.expression.symbol.InputColumn)61 Test (org.junit.Test)47 Symbol (io.crate.expression.symbol.Symbol)38 CrateDummyClusterServiceUnitTest (io.crate.test.integration.CrateDummyClusterServiceUnitTest)25 Reference (io.crate.metadata.Reference)15 BytesStreamOutput (org.elasticsearch.common.io.stream.BytesStreamOutput)11 StreamInput (org.elasticsearch.common.io.stream.StreamInput)11 MergePhase (io.crate.execution.dsl.phases.MergePhase)10 GroupProjection (io.crate.execution.dsl.projection.GroupProjection)10 FilterProjection (io.crate.execution.dsl.projection.FilterProjection)9 OrderedTopNProjection (io.crate.execution.dsl.projection.OrderedTopNProjection)9 Aggregation (io.crate.expression.symbol.Aggregation)9 Function (io.crate.expression.symbol.Function)9 ArrayList (java.util.ArrayList)9 TopNProjection (io.crate.execution.dsl.projection.TopNProjection)8 RandomizedTest (com.carrotsearch.randomizedtesting.RandomizedTest)7 Row (io.crate.data.Row)7 EvalProjection (io.crate.execution.dsl.projection.EvalProjection)7 Projection (io.crate.execution.dsl.projection.Projection)7 CountAggregation (io.crate.execution.engine.aggregation.impl.CountAggregation)7