use of io.crate.execution.engine.indexing.GroupRowsByShard in project crate by crate.
the class InsertFromValues method executeBulk.
@Override
public List<CompletableFuture<Long>> executeBulk(DependencyCarrier dependencies, PlannerContext plannerContext, List<Row> bulkParams, SubQueryResults subQueryResults) {
DocTableInfo tableInfo = dependencies.schemas().getTableInfo(writerProjection.tableIdent(), Operation.INSERT);
String[] updateColumnNames;
Assignments assignments;
if (writerProjection.onDuplicateKeyAssignments() == null) {
assignments = null;
updateColumnNames = null;
} else {
assignments = Assignments.convert(writerProjection.onDuplicateKeyAssignments(), dependencies.nodeContext());
updateColumnNames = assignments.targetNames();
}
InputFactory inputFactory = new InputFactory(dependencies.nodeContext());
InputFactory.Context<CollectExpression<Row, ?>> context = inputFactory.ctxForInputColumns(plannerContext.transactionContext());
var allColumnSymbols = InputColumns.create(writerProjection.allTargetColumns(), new InputColumns.SourceSymbols(writerProjection.allTargetColumns()));
ArrayList<Input<?>> insertInputs = new ArrayList<>(allColumnSymbols.size());
for (Symbol symbol : allColumnSymbols) {
insertInputs.add(context.add(symbol));
}
ArrayList<Input<?>> partitionedByInputs = new ArrayList<>(writerProjection.partitionedBySymbols().size());
for (Symbol partitionedBySymbol : writerProjection.partitionedBySymbols()) {
partitionedByInputs.add(context.add(partitionedBySymbol));
}
ArrayList<Input<?>> primaryKeyInputs = new ArrayList<>(writerProjection.ids().size());
for (Symbol symbol : writerProjection.ids()) {
primaryKeyInputs.add(context.add(symbol));
}
Input<?> clusterByInput;
if (writerProjection.clusteredBy() != null) {
clusterByInput = context.add(writerProjection.clusteredBy());
} else {
clusterByInput = null;
}
var indexNameResolver = IndexNameResolver.create(writerProjection.tableIdent(), writerProjection.partitionIdent(), partitionedByInputs);
ShardUpsertRequest.Builder builder = new ShardUpsertRequest.Builder(plannerContext.transactionContext().sessionSettings(), BULK_REQUEST_TIMEOUT_SETTING.get(dependencies.settings()), writerProjection.isIgnoreDuplicateKeys() ? ShardUpsertRequest.DuplicateKeyAction.IGNORE : ShardUpsertRequest.DuplicateKeyAction.UPDATE_OR_FAIL, // continueOnErrors
true, updateColumnNames, writerProjection.allTargetColumns().toArray(new Reference[0]), null, plannerContext.jobId(), true);
var shardedRequests = new ShardedRequests<>(builder::newRequest, RamAccounting.NO_ACCOUNTING);
HashMap<String, InsertSourceFromCells> validatorsCache = new HashMap<>();
IntArrayList bulkIndices = new IntArrayList();
List<CompletableFuture<Long>> results = createUnsetFutures(bulkParams.size());
for (int bulkIdx = 0; bulkIdx < bulkParams.size(); bulkIdx++) {
Row param = bulkParams.get(bulkIdx);
final Symbol[] assignmentSources;
if (assignments != null) {
assignmentSources = assignments.bindSources(tableInfo, param, subQueryResults);
} else {
assignmentSources = null;
}
GroupRowsByShard<ShardUpsertRequest, ShardUpsertRequest.Item> grouper = createRowsByShardGrouper(assignmentSources, insertInputs, indexNameResolver, context, plannerContext, dependencies.clusterService());
try {
Iterator<Row> rows = evaluateValueTableFunction(tableFunctionRelation.functionImplementation(), tableFunctionRelation.function().arguments(), writerProjection.allTargetColumns(), tableInfo, param, plannerContext, subQueryResults);
while (rows.hasNext()) {
Row row = rows.next();
grouper.accept(shardedRequests, row);
checkPrimaryKeyValuesNotNull(primaryKeyInputs);
checkClusterByValueNotNull(clusterByInput);
checkConstraintsOnGeneratedSource(row.materialize(), indexNameResolver.get(), tableInfo, plannerContext, validatorsCache);
bulkIndices.add(bulkIdx);
}
} catch (Throwable t) {
for (CompletableFuture<Long> result : results) {
result.completeExceptionally(t);
}
return results;
}
}
validatorsCache.clear();
var actionProvider = dependencies.transportActionProvider();
createIndices(actionProvider.transportBulkCreateIndicesAction(), shardedRequests.itemsByMissingIndex().keySet(), dependencies.clusterService(), plannerContext.jobId()).thenCompose(acknowledgedResponse -> {
var shardUpsertRequests = resolveAndGroupShardRequests(shardedRequests, dependencies.clusterService()).values();
return execute(dependencies.nodeLimits(), dependencies.clusterService().state(), shardUpsertRequests, actionProvider.transportShardUpsertAction(), dependencies.scheduler());
}).whenComplete((response, t) -> {
if (t == null) {
long[] resultRowCount = createBulkResponse(response, bulkParams.size(), bulkIndices);
for (int i = 0; i < bulkParams.size(); i++) {
results.get(i).complete(resultRowCount[i]);
}
} else {
for (CompletableFuture<Long> result : results) {
result.completeExceptionally(t);
}
}
});
return results;
}
use of io.crate.execution.engine.indexing.GroupRowsByShard in project crate by crate.
the class InsertFromValues method execute.
@Override
public void execute(DependencyCarrier dependencies, PlannerContext plannerContext, RowConsumer consumer, Row params, SubQueryResults subQueryResults) {
DocTableInfo tableInfo = dependencies.schemas().getTableInfo(writerProjection.tableIdent(), Operation.INSERT);
// For instance, the target table of the insert from values
// statement is the table with the following schema:
//
// CREATE TABLE users (
// dep_id TEXT,
// name TEXT,
// id INT,
// country_id INT,
// PRIMARY KEY (dep_id, id, country_id))
// CLUSTERED BY (dep_id)
// PARTITIONED BY (country_id)
//
// The insert from values statement below would have the column
// index writer projection of its plan that contains the column
// idents and symbols required to create corresponding inputs.
// The diagram below shows the projection's column symbols used
// in the plan and relation between symbols sub-/sets.
//
// +------------------------+
// | +-------------+ PK symbols
// cluster by +------+ | | +------+
// symbol | | | |
// + + + +
// INSERT INTO users (dep_id, name, id, country_id) VALUES (?, ?, ?, ?)
// + + + + +
// +-------+ | | | |
// all target +--------------+ | | +---+ partitioned by
// column +-------------------+ | symbols
// symbols +-------------------------+
InputFactory inputFactory = new InputFactory(dependencies.nodeContext());
InputFactory.Context<CollectExpression<Row, ?>> context = inputFactory.ctxForInputColumns(plannerContext.transactionContext());
var allColumnSymbols = InputColumns.create(writerProjection.allTargetColumns(), new InputColumns.SourceSymbols(writerProjection.allTargetColumns()));
ArrayList<Input<?>> insertInputs = new ArrayList<>(allColumnSymbols.size());
for (Symbol symbol : allColumnSymbols) {
insertInputs.add(context.add(symbol));
}
ArrayList<Input<?>> partitionedByInputs = new ArrayList<>(writerProjection.partitionedBySymbols().size());
for (Symbol partitionedBySymbol : writerProjection.partitionedBySymbols()) {
partitionedByInputs.add(context.add(partitionedBySymbol));
}
ArrayList<Input<?>> primaryKeyInputs = new ArrayList<>(writerProjection.ids().size());
for (Symbol symbol : writerProjection.ids()) {
primaryKeyInputs.add(context.add(symbol));
}
Input<?> clusterByInput;
if (writerProjection.clusteredBy() != null) {
clusterByInput = context.add(writerProjection.clusteredBy());
} else {
clusterByInput = null;
}
String[] updateColumnNames;
Symbol[] assignmentSources;
if (writerProjection.onDuplicateKeyAssignments() == null) {
updateColumnNames = null;
assignmentSources = null;
} else {
Assignments assignments = Assignments.convert(writerProjection.onDuplicateKeyAssignments(), dependencies.nodeContext());
assignmentSources = assignments.bindSources(tableInfo, params, subQueryResults);
updateColumnNames = assignments.targetNames();
}
var indexNameResolver = IndexNameResolver.create(writerProjection.tableIdent(), writerProjection.partitionIdent(), partitionedByInputs);
GroupRowsByShard<ShardUpsertRequest, ShardUpsertRequest.Item> grouper = createRowsByShardGrouper(assignmentSources, insertInputs, indexNameResolver, context, plannerContext, dependencies.clusterService());
ArrayList<Row> rows = new ArrayList<>();
evaluateValueTableFunction(tableFunctionRelation.functionImplementation(), tableFunctionRelation.function().arguments(), writerProjection.allTargetColumns(), tableInfo, params, plannerContext, subQueryResults).forEachRemaining(rows::add);
List<Symbol> returnValues = this.writerProjection.returnValues();
ShardUpsertRequest.Builder builder = new ShardUpsertRequest.Builder(plannerContext.transactionContext().sessionSettings(), BULK_REQUEST_TIMEOUT_SETTING.get(dependencies.settings()), writerProjection.isIgnoreDuplicateKeys() ? ShardUpsertRequest.DuplicateKeyAction.IGNORE : ShardUpsertRequest.DuplicateKeyAction.UPDATE_OR_FAIL, // continueOnErrors
rows.size() > 1, updateColumnNames, writerProjection.allTargetColumns().toArray(new Reference[0]), returnValues.isEmpty() ? null : returnValues.toArray(new Symbol[0]), plannerContext.jobId(), false);
var shardedRequests = new ShardedRequests<>(builder::newRequest, RamAccounting.NO_ACCOUNTING);
HashMap<String, InsertSourceFromCells> validatorsCache = new HashMap<>();
for (Row row : rows) {
grouper.accept(shardedRequests, row);
try {
checkPrimaryKeyValuesNotNull(primaryKeyInputs);
checkClusterByValueNotNull(clusterByInput);
checkConstraintsOnGeneratedSource(row.materialize(), indexNameResolver.get(), tableInfo, plannerContext, validatorsCache);
} catch (Throwable t) {
consumer.accept(null, t);
return;
}
}
validatorsCache.clear();
var actionProvider = dependencies.transportActionProvider();
createIndices(actionProvider.transportBulkCreateIndicesAction(), shardedRequests.itemsByMissingIndex().keySet(), dependencies.clusterService(), plannerContext.jobId()).thenCompose(acknowledgedResponse -> {
var shardUpsertRequests = resolveAndGroupShardRequests(shardedRequests, dependencies.clusterService()).values();
return execute(dependencies.nodeLimits(), dependencies.clusterService().state(), shardUpsertRequests, actionProvider.transportShardUpsertAction(), dependencies.scheduler());
}).whenComplete((response, t) -> {
if (t == null) {
if (returnValues.isEmpty()) {
consumer.accept(InMemoryBatchIterator.of(new Row1((long) response.numSuccessfulWrites()), SENTINEL), null);
} else {
consumer.accept(InMemoryBatchIterator.of(new CollectionBucket(response.resultRows()), SENTINEL, false), null);
}
} else {
consumer.accept(null, t);
}
});
}
use of io.crate.execution.engine.indexing.GroupRowsByShard in project crate by crate.
the class InsertFromValues method createRowsByShardGrouper.
private GroupRowsByShard<ShardUpsertRequest, ShardUpsertRequest.Item> createRowsByShardGrouper(Symbol[] assignmentSources, ArrayList<Input<?>> insertInputs, Supplier<String> indexNameResolver, InputFactory.Context<CollectExpression<Row, ?>> collectContext, PlannerContext plannerContext, ClusterService clusterService) {
InputRow insertValues = new InputRow(insertInputs);
Function<String, ShardUpsertRequest.Item> itemFactory = id -> new ShardUpsertRequest.Item(id, assignmentSources, insertValues.materialize(), null, null, null);
var rowShardResolver = new RowShardResolver(plannerContext.transactionContext(), plannerContext.nodeContext(), writerProjection.primaryKeys(), writerProjection.ids(), writerProjection.clusteredByIdent(), writerProjection.clusteredBy());
return new GroupRowsByShard<>(clusterService, rowShardResolver, new TypeGuessEstimateRowSize(), indexNameResolver, collectContext.expressions(), itemFactory, true);
}
Aggregations