use of io.crate.expression.symbol.InputColumn in project crate by crate.
the class CopyFromPlan method planCopyFromExecution.
public static ExecutionPlan planCopyFromExecution(AnalyzedCopyFrom copyFrom, DiscoveryNodes allNodes, PlannerContext context, Row params, SubQueryResults subQueryResults) {
var boundedCopyFrom = bind(copyFrom, context.transactionContext(), context.nodeContext(), params, subQueryResults);
/*
* Create a plan that reads json-objects-lines from a file
* and then executes upsert requests to index the data
*/
DocTableInfo table = boundedCopyFrom.tableInfo();
String partitionIdent = boundedCopyFrom.partitionIdent();
List<String> partitionedByNames = Collections.emptyList();
List<String> partitionValues = Collections.emptyList();
if (partitionIdent == null) {
if (table.isPartitioned()) {
partitionedByNames = Lists2.map(table.partitionedBy(), ColumnIdent::fqn);
}
} else {
assert table.isPartitioned() : "table must be partitioned if partitionIdent is set";
// partitionIdent is present -> possible to index raw source into concrete es index
partitionValues = PartitionName.decodeIdent(partitionIdent);
}
// need to exclude _id columns; they're auto generated and won't be available in the files being imported
ColumnIdent clusteredBy = table.clusteredBy();
if (DocSysColumns.ID.equals(clusteredBy)) {
clusteredBy = null;
}
List<Reference> primaryKeyRefs = table.primaryKey().stream().filter(r -> !r.equals(DocSysColumns.ID)).map(table::getReference).collect(Collectors.toList());
List<Symbol> toCollect = getSymbolsRequiredForShardIdCalc(primaryKeyRefs, table.partitionedByColumns(), clusteredBy == null ? null : table.getReference(clusteredBy));
Reference rawOrDoc = rawOrDoc(table, partitionIdent);
final int rawOrDocIdx = toCollect.size();
toCollect.add(rawOrDoc);
String[] excludes = partitionedByNames.size() > 0 ? partitionedByNames.toArray(new String[0]) : null;
InputColumns.SourceSymbols sourceSymbols = new InputColumns.SourceSymbols(toCollect);
Symbol clusteredByInputCol = null;
if (clusteredBy != null) {
clusteredByInputCol = InputColumns.create(table.getReference(clusteredBy), sourceSymbols);
}
SourceIndexWriterProjection sourceIndexWriterProjection;
List<? extends Symbol> projectionOutputs = AbstractIndexWriterProjection.OUTPUTS;
boolean returnSummary = copyFrom instanceof AnalyzedCopyFromReturnSummary;
boolean failFast = boundedCopyFrom.settings().getAsBoolean("fail_fast", false);
if (returnSummary || failFast) {
final InputColumn sourceUriSymbol = new InputColumn(toCollect.size(), DataTypes.STRING);
toCollect.add(SourceUriExpression.getReferenceForRelation(table.ident()));
final InputColumn sourceUriFailureSymbol = new InputColumn(toCollect.size(), DataTypes.STRING);
toCollect.add(SourceUriFailureExpression.getReferenceForRelation(table.ident()));
final InputColumn lineNumberSymbol = new InputColumn(toCollect.size(), DataTypes.LONG);
toCollect.add(SourceLineNumberExpression.getReferenceForRelation(table.ident()));
if (returnSummary) {
List<? extends Symbol> fields = ((AnalyzedCopyFromReturnSummary) copyFrom).outputs();
projectionOutputs = InputColumns.create(fields, new InputColumns.SourceSymbols(fields));
}
sourceIndexWriterProjection = new SourceIndexWriterReturnSummaryProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), new InputColumn(rawOrDocIdx, rawOrDoc.valueType()), table.primaryKey(), InputColumns.create(table.partitionedByColumns(), sourceSymbols), clusteredBy, boundedCopyFrom.settings(), null, excludes, InputColumns.create(primaryKeyRefs, sourceSymbols), clusteredByInputCol, projectionOutputs, // autoCreateIndices
table.isPartitioned(), sourceUriSymbol, sourceUriFailureSymbol, lineNumberSymbol);
} else {
sourceIndexWriterProjection = new SourceIndexWriterProjection(table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), new InputColumn(rawOrDocIdx, rawOrDoc.valueType()), table.primaryKey(), InputColumns.create(table.partitionedByColumns(), sourceSymbols), clusteredBy, boundedCopyFrom.settings(), null, excludes, InputColumns.create(primaryKeyRefs, sourceSymbols), clusteredByInputCol, projectionOutputs, // autoCreateIndices
table.isPartitioned());
}
// the partitionedBy-inputColumns created for the projection are still valid because the positions are not changed
if (partitionValues != null) {
rewriteToCollectToUsePartitionValues(table.partitionedByColumns(), partitionValues, toCollect);
}
FileUriCollectPhase collectPhase = new FileUriCollectPhase(context.jobId(), context.nextExecutionPhaseId(), "copyFrom", getExecutionNodes(allNodes, boundedCopyFrom.settings().getAsInt("num_readers", allNodes.getSize()), boundedCopyFrom.nodePredicate()), boundedCopyFrom.uri(), toCollect, Collections.emptyList(), boundedCopyFrom.settings().get("compression", null), boundedCopyFrom.settings().getAsBoolean("shared", null), CopyFromParserProperties.of(boundedCopyFrom.settings()), boundedCopyFrom.inputFormat(), boundedCopyFrom.settings());
Collect collect = new Collect(collectPhase, TopN.NO_LIMIT, 0, 1, -1, null);
// add the projection to the plan to ensure that the outputs are correctly set to the projection outputs
collect.addProjection(sourceIndexWriterProjection);
List<Projection> handlerProjections;
if (returnSummary) {
handlerProjections = Collections.emptyList();
} else {
handlerProjections = List.of(MergeCountProjection.INSTANCE);
}
return Merge.ensureOnHandler(collect, context, handlerProjections);
}
use of io.crate.expression.symbol.InputColumn in project crate by crate.
the class ProjectionToProjectorVisitorTest method testSortingTopNProjection.
@Test
public void testSortingTopNProjection() throws Exception {
List<Symbol> outputs = Arrays.asList(Literal.of("foo"), new InputColumn(0), new InputColumn(1));
OrderedTopNProjection projection = new OrderedTopNProjection(10, 0, outputs, Arrays.asList(new InputColumn(0), new InputColumn(1)), new boolean[] { false, false }, new boolean[] { false, false });
Projector projector = visitor.create(projection, txnCtx, RamAccounting.NO_ACCOUNTING, memoryManager, UUID.randomUUID());
assertThat(projector, instanceOf(SortingTopNProjector.class));
}
use of io.crate.expression.symbol.InputColumn in project crate by crate.
the class ProjectionToProjectorVisitorTest method testGroupProjector.
@Test
public void testGroupProjector() throws Exception {
// in(0) in(1) in(0), in(2)
// select race, avg(age), count(race), gender ... group by race, gender
List<Symbol> keys = Arrays.asList(new InputColumn(0, DataTypes.STRING), new InputColumn(2, DataTypes.STRING));
List<Aggregation> aggregations = Arrays.asList(new Aggregation(avgSignature, avgSignature.getReturnType().createType(), Collections.singletonList(new InputColumn(1))), new Aggregation(CountAggregation.SIGNATURE, CountAggregation.SIGNATURE.getReturnType().createType(), Collections.singletonList(new InputColumn(0))));
GroupProjection projection = new GroupProjection(keys, aggregations, AggregateMode.ITER_FINAL, RowGranularity.CLUSTER);
Projector projector = visitor.create(projection, txnCtx, RamAccounting.NO_ACCOUNTING, memoryManager, UUID.randomUUID());
assertThat(projector, instanceOf(GroupingProjector.class));
// use a topN projection in order to get sorted outputs
List<Symbol> outputs = Arrays.asList(new InputColumn(0, DataTypes.STRING), new InputColumn(1, DataTypes.STRING), new InputColumn(2, DataTypes.DOUBLE), new InputColumn(3, DataTypes.LONG));
OrderedTopNProjection topNProjection = new OrderedTopNProjection(10, 0, outputs, List.of(new InputColumn(2, DataTypes.DOUBLE)), new boolean[] { false }, new boolean[] { false });
Projector topNProjector = visitor.create(topNProjection, txnCtx, RamAccounting.NO_ACCOUNTING, memoryManager, UUID.randomUUID());
String human = "human";
String vogon = "vogon";
String male = "male";
String female = "female";
List<Object[]> rows = new ArrayList<>();
rows.add($(human, 34, male));
rows.add($(human, 22, female));
rows.add($(vogon, 40, male));
rows.add($(vogon, 48, male));
rows.add($(human, 34, male));
BatchIterator<Row> batchIterator = topNProjector.apply(projector.apply(InMemoryBatchIterator.of(new CollectionBucket(rows), SENTINEL, true)));
TestingRowConsumer consumer = new TestingRowConsumer();
consumer.accept(batchIterator, null);
Bucket bucket = consumer.getBucket();
assertThat(bucket, contains(isRow(human, female, 22.0, 1L), isRow(human, male, 34.0, 2L), isRow(vogon, male, 44.0, 2L)));
}
use of io.crate.expression.symbol.InputColumn in project crate by crate.
the class IndexWriterProjectorTest method testIndexWriter.
@Test
public void testIndexWriter() throws Throwable {
execute("create table bulk_import (id int primary key, name string) with (number_of_replicas=0)");
ensureGreen();
InputCollectExpression sourceInput = new InputCollectExpression(1);
List<CollectExpression<Row, ?>> collectExpressions = Collections.<CollectExpression<Row, ?>>singletonList(sourceInput);
RelationName bulkImportIdent = new RelationName(sqlExecutor.getCurrentSchema(), "bulk_import");
ClusterState state = clusterService().state();
Settings tableSettings = TableSettingsResolver.get(state.getMetadata(), bulkImportIdent, false);
ThreadPool threadPool = internalCluster().getInstance(ThreadPool.class);
IndexWriterProjector writerProjector = new IndexWriterProjector(clusterService(), new NodeLimits(new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new NoopCircuitBreaker("dummy"), RamAccounting.NO_ACCOUNTING, threadPool.scheduler(), threadPool.executor(ThreadPool.Names.SEARCH), CoordinatorTxnCtx.systemTransactionContext(), new NodeContext(internalCluster().getInstance(Functions.class)), Settings.EMPTY, IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.get(tableSettings), NumberOfReplicas.fromSettings(tableSettings, state.getNodes().getSize()), internalCluster().getInstance(TransportCreatePartitionsAction.class), internalCluster().getInstance(TransportShardUpsertAction.class)::execute, IndexNameResolver.forTable(bulkImportIdent), new Reference(new ReferenceIdent(bulkImportIdent, DocSysColumns.RAW), RowGranularity.DOC, DataTypes.STRING, 0, null), Collections.singletonList(ID_IDENT), Collections.<Symbol>singletonList(new InputColumn(0)), null, null, sourceInput, collectExpressions, 20, null, null, false, false, UUID.randomUUID(), UpsertResultContext.forRowCount(), false);
BatchIterator rowsIterator = InMemoryBatchIterator.of(IntStream.range(0, 100).mapToObj(i -> new RowN(new Object[] { i, "{\"id\": " + i + ", \"name\": \"Arthur\"}" })).collect(Collectors.toList()), SENTINEL, true);
TestingRowConsumer consumer = new TestingRowConsumer();
consumer.accept(writerProjector.apply(rowsIterator), null);
Bucket objects = consumer.getBucket();
assertThat(objects, contains(isRow(100L)));
execute("refresh table bulk_import");
execute("select count(*) from bulk_import");
assertThat(response.rowCount(), is(1L));
assertThat(response.rows()[0][0], is(100L));
}
use of io.crate.expression.symbol.InputColumn in project crate by crate.
the class IndexWriterProjectorUnitTest method testNullPKValue.
@Test
public void testNullPKValue() throws Throwable {
InputCollectExpression sourceInput = new InputCollectExpression(0);
List<CollectExpression<Row, ?>> collectExpressions = Collections.<CollectExpression<Row, ?>>singletonList(sourceInput);
TransportCreatePartitionsAction transportCreatePartitionsAction = mock(TransportCreatePartitionsAction.class);
IndexWriterProjector indexWriter = new IndexWriterProjector(clusterService, new NodeLimits(new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new NoopCircuitBreaker("dummy"), RamAccounting.NO_ACCOUNTING, scheduler, executor, CoordinatorTxnCtx.systemTransactionContext(), createNodeContext(), Settings.EMPTY, 5, 1, transportCreatePartitionsAction, (request, listener) -> {
}, IndexNameResolver.forTable(BULK_IMPORT_IDENT), RAW_SOURCE_REFERENCE, Collections.singletonList(ID_IDENT), Collections.<Symbol>singletonList(new InputColumn(1)), null, null, sourceInput, collectExpressions, 20, null, null, false, false, UUID.randomUUID(), UpsertResultContext.forRowCount(), false);
RowN rowN = new RowN(new Object[] { new BytesRef("{\"y\": \"x\"}"), null });
BatchIterator<Row> batchIterator = InMemoryBatchIterator.of(Collections.singletonList(rowN), SENTINEL, true);
batchIterator = indexWriter.apply(batchIterator);
TestingRowConsumer testingBatchConsumer = new TestingRowConsumer();
testingBatchConsumer.accept(batchIterator, null);
List<Object[]> result = testingBatchConsumer.getResult();
// Zero affected rows as a NULL as a PK value will result in an exception.
// It must never bubble up as other rows might already have been written.
assertThat(result.get(0)[0], is(0L));
}
Aggregations