use of io.crate.execution.dml.upsert.ShardUpsertRequest in project crate by crate.
the class InsertFromValues method executeBulk.
@Override
public List<CompletableFuture<Long>> executeBulk(DependencyCarrier dependencies, PlannerContext plannerContext, List<Row> bulkParams, SubQueryResults subQueryResults) {
DocTableInfo tableInfo = dependencies.schemas().getTableInfo(writerProjection.tableIdent(), Operation.INSERT);
String[] updateColumnNames;
Assignments assignments;
if (writerProjection.onDuplicateKeyAssignments() == null) {
assignments = null;
updateColumnNames = null;
} else {
assignments = Assignments.convert(writerProjection.onDuplicateKeyAssignments(), dependencies.nodeContext());
updateColumnNames = assignments.targetNames();
}
InputFactory inputFactory = new InputFactory(dependencies.nodeContext());
InputFactory.Context<CollectExpression<Row, ?>> context = inputFactory.ctxForInputColumns(plannerContext.transactionContext());
var allColumnSymbols = InputColumns.create(writerProjection.allTargetColumns(), new InputColumns.SourceSymbols(writerProjection.allTargetColumns()));
ArrayList<Input<?>> insertInputs = new ArrayList<>(allColumnSymbols.size());
for (Symbol symbol : allColumnSymbols) {
insertInputs.add(context.add(symbol));
}
ArrayList<Input<?>> partitionedByInputs = new ArrayList<>(writerProjection.partitionedBySymbols().size());
for (Symbol partitionedBySymbol : writerProjection.partitionedBySymbols()) {
partitionedByInputs.add(context.add(partitionedBySymbol));
}
ArrayList<Input<?>> primaryKeyInputs = new ArrayList<>(writerProjection.ids().size());
for (Symbol symbol : writerProjection.ids()) {
primaryKeyInputs.add(context.add(symbol));
}
Input<?> clusterByInput;
if (writerProjection.clusteredBy() != null) {
clusterByInput = context.add(writerProjection.clusteredBy());
} else {
clusterByInput = null;
}
var indexNameResolver = IndexNameResolver.create(writerProjection.tableIdent(), writerProjection.partitionIdent(), partitionedByInputs);
ShardUpsertRequest.Builder builder = new ShardUpsertRequest.Builder(plannerContext.transactionContext().sessionSettings(), BULK_REQUEST_TIMEOUT_SETTING.get(dependencies.settings()), writerProjection.isIgnoreDuplicateKeys() ? ShardUpsertRequest.DuplicateKeyAction.IGNORE : ShardUpsertRequest.DuplicateKeyAction.UPDATE_OR_FAIL, // continueOnErrors
true, updateColumnNames, writerProjection.allTargetColumns().toArray(new Reference[0]), null, plannerContext.jobId(), true);
var shardedRequests = new ShardedRequests<>(builder::newRequest, RamAccounting.NO_ACCOUNTING);
HashMap<String, InsertSourceFromCells> validatorsCache = new HashMap<>();
IntArrayList bulkIndices = new IntArrayList();
List<CompletableFuture<Long>> results = createUnsetFutures(bulkParams.size());
for (int bulkIdx = 0; bulkIdx < bulkParams.size(); bulkIdx++) {
Row param = bulkParams.get(bulkIdx);
final Symbol[] assignmentSources;
if (assignments != null) {
assignmentSources = assignments.bindSources(tableInfo, param, subQueryResults);
} else {
assignmentSources = null;
}
GroupRowsByShard<ShardUpsertRequest, ShardUpsertRequest.Item> grouper = createRowsByShardGrouper(assignmentSources, insertInputs, indexNameResolver, context, plannerContext, dependencies.clusterService());
try {
Iterator<Row> rows = evaluateValueTableFunction(tableFunctionRelation.functionImplementation(), tableFunctionRelation.function().arguments(), writerProjection.allTargetColumns(), tableInfo, param, plannerContext, subQueryResults);
while (rows.hasNext()) {
Row row = rows.next();
grouper.accept(shardedRequests, row);
checkPrimaryKeyValuesNotNull(primaryKeyInputs);
checkClusterByValueNotNull(clusterByInput);
checkConstraintsOnGeneratedSource(row.materialize(), indexNameResolver.get(), tableInfo, plannerContext, validatorsCache);
bulkIndices.add(bulkIdx);
}
} catch (Throwable t) {
for (CompletableFuture<Long> result : results) {
result.completeExceptionally(t);
}
return results;
}
}
validatorsCache.clear();
var actionProvider = dependencies.transportActionProvider();
createIndices(actionProvider.transportBulkCreateIndicesAction(), shardedRequests.itemsByMissingIndex().keySet(), dependencies.clusterService(), plannerContext.jobId()).thenCompose(acknowledgedResponse -> {
var shardUpsertRequests = resolveAndGroupShardRequests(shardedRequests, dependencies.clusterService()).values();
return execute(dependencies.nodeLimits(), dependencies.clusterService().state(), shardUpsertRequests, actionProvider.transportShardUpsertAction(), dependencies.scheduler());
}).whenComplete((response, t) -> {
if (t == null) {
long[] resultRowCount = createBulkResponse(response, bulkParams.size(), bulkIndices);
for (int i = 0; i < bulkParams.size(); i++) {
results.get(i).complete(resultRowCount[i]);
}
} else {
for (CompletableFuture<Long> result : results) {
result.completeExceptionally(t);
}
}
});
return results;
}
use of io.crate.execution.dml.upsert.ShardUpsertRequest in project crate by crate.
the class InsertFromValues method execute.
@Override
public void execute(DependencyCarrier dependencies, PlannerContext plannerContext, RowConsumer consumer, Row params, SubQueryResults subQueryResults) {
DocTableInfo tableInfo = dependencies.schemas().getTableInfo(writerProjection.tableIdent(), Operation.INSERT);
// For instance, the target table of the insert from values
// statement is the table with the following schema:
//
// CREATE TABLE users (
// dep_id TEXT,
// name TEXT,
// id INT,
// country_id INT,
// PRIMARY KEY (dep_id, id, country_id))
// CLUSTERED BY (dep_id)
// PARTITIONED BY (country_id)
//
// The insert from values statement below would have the column
// index writer projection of its plan that contains the column
// idents and symbols required to create corresponding inputs.
// The diagram below shows the projection's column symbols used
// in the plan and relation between symbols sub-/sets.
//
// +------------------------+
// | +-------------+ PK symbols
// cluster by +------+ | | +------+
// symbol | | | |
// + + + +
// INSERT INTO users (dep_id, name, id, country_id) VALUES (?, ?, ?, ?)
// + + + + +
// +-------+ | | | |
// all target +--------------+ | | +---+ partitioned by
// column +-------------------+ | symbols
// symbols +-------------------------+
InputFactory inputFactory = new InputFactory(dependencies.nodeContext());
InputFactory.Context<CollectExpression<Row, ?>> context = inputFactory.ctxForInputColumns(plannerContext.transactionContext());
var allColumnSymbols = InputColumns.create(writerProjection.allTargetColumns(), new InputColumns.SourceSymbols(writerProjection.allTargetColumns()));
ArrayList<Input<?>> insertInputs = new ArrayList<>(allColumnSymbols.size());
for (Symbol symbol : allColumnSymbols) {
insertInputs.add(context.add(symbol));
}
ArrayList<Input<?>> partitionedByInputs = new ArrayList<>(writerProjection.partitionedBySymbols().size());
for (Symbol partitionedBySymbol : writerProjection.partitionedBySymbols()) {
partitionedByInputs.add(context.add(partitionedBySymbol));
}
ArrayList<Input<?>> primaryKeyInputs = new ArrayList<>(writerProjection.ids().size());
for (Symbol symbol : writerProjection.ids()) {
primaryKeyInputs.add(context.add(symbol));
}
Input<?> clusterByInput;
if (writerProjection.clusteredBy() != null) {
clusterByInput = context.add(writerProjection.clusteredBy());
} else {
clusterByInput = null;
}
String[] updateColumnNames;
Symbol[] assignmentSources;
if (writerProjection.onDuplicateKeyAssignments() == null) {
updateColumnNames = null;
assignmentSources = null;
} else {
Assignments assignments = Assignments.convert(writerProjection.onDuplicateKeyAssignments(), dependencies.nodeContext());
assignmentSources = assignments.bindSources(tableInfo, params, subQueryResults);
updateColumnNames = assignments.targetNames();
}
var indexNameResolver = IndexNameResolver.create(writerProjection.tableIdent(), writerProjection.partitionIdent(), partitionedByInputs);
GroupRowsByShard<ShardUpsertRequest, ShardUpsertRequest.Item> grouper = createRowsByShardGrouper(assignmentSources, insertInputs, indexNameResolver, context, plannerContext, dependencies.clusterService());
ArrayList<Row> rows = new ArrayList<>();
evaluateValueTableFunction(tableFunctionRelation.functionImplementation(), tableFunctionRelation.function().arguments(), writerProjection.allTargetColumns(), tableInfo, params, plannerContext, subQueryResults).forEachRemaining(rows::add);
List<Symbol> returnValues = this.writerProjection.returnValues();
ShardUpsertRequest.Builder builder = new ShardUpsertRequest.Builder(plannerContext.transactionContext().sessionSettings(), BULK_REQUEST_TIMEOUT_SETTING.get(dependencies.settings()), writerProjection.isIgnoreDuplicateKeys() ? ShardUpsertRequest.DuplicateKeyAction.IGNORE : ShardUpsertRequest.DuplicateKeyAction.UPDATE_OR_FAIL, // continueOnErrors
rows.size() > 1, updateColumnNames, writerProjection.allTargetColumns().toArray(new Reference[0]), returnValues.isEmpty() ? null : returnValues.toArray(new Symbol[0]), plannerContext.jobId(), false);
var shardedRequests = new ShardedRequests<>(builder::newRequest, RamAccounting.NO_ACCOUNTING);
HashMap<String, InsertSourceFromCells> validatorsCache = new HashMap<>();
for (Row row : rows) {
grouper.accept(shardedRequests, row);
try {
checkPrimaryKeyValuesNotNull(primaryKeyInputs);
checkClusterByValueNotNull(clusterByInput);
checkConstraintsOnGeneratedSource(row.materialize(), indexNameResolver.get(), tableInfo, plannerContext, validatorsCache);
} catch (Throwable t) {
consumer.accept(null, t);
return;
}
}
validatorsCache.clear();
var actionProvider = dependencies.transportActionProvider();
createIndices(actionProvider.transportBulkCreateIndicesAction(), shardedRequests.itemsByMissingIndex().keySet(), dependencies.clusterService(), plannerContext.jobId()).thenCompose(acknowledgedResponse -> {
var shardUpsertRequests = resolveAndGroupShardRequests(shardedRequests, dependencies.clusterService()).values();
return execute(dependencies.nodeLimits(), dependencies.clusterService().state(), shardUpsertRequests, actionProvider.transportShardUpsertAction(), dependencies.scheduler());
}).whenComplete((response, t) -> {
if (t == null) {
if (returnValues.isEmpty()) {
consumer.accept(InMemoryBatchIterator.of(new Row1((long) response.numSuccessfulWrites()), SENTINEL), null);
} else {
consumer.accept(InMemoryBatchIterator.of(new CollectionBucket(response.resultRows()), SENTINEL, false), null);
}
} else {
consumer.accept(null, t);
}
});
}
use of io.crate.execution.dml.upsert.ShardUpsertRequest in project crate by crate.
the class UpdateById method createExecutor.
private ShardRequestExecutor<ShardUpsertRequest> createExecutor(DependencyCarrier dependencies, PlannerContext plannerContext) {
ClusterService clusterService = dependencies.clusterService();
CoordinatorTxnCtx txnCtx = plannerContext.transactionContext();
ShardUpsertRequest.Builder requestBuilder = new ShardUpsertRequest.Builder(txnCtx.sessionSettings(), ShardingUpsertExecutor.BULK_REQUEST_TIMEOUT_SETTING.get(clusterService.state().metadata().settings()), ShardUpsertRequest.DuplicateKeyAction.UPDATE_OR_FAIL, true, assignments.targetNames(), // missing assignments are for INSERT .. ON DUPLICATE KEY UPDATE
null, returnValues, plannerContext.jobId(), false);
UpdateRequests updateRequests = new UpdateRequests(requestBuilder, table, assignments);
return new ShardRequestExecutor<>(clusterService, txnCtx, dependencies.nodeContext(), table, updateRequests, dependencies.transportActionProvider().transportShardUpsertAction()::execute, docKeys);
}
use of io.crate.execution.dml.upsert.ShardUpsertRequest in project crate by crate.
the class ShardingUpsertExecutor method execRequests.
private CompletableFuture<UpsertResults> execRequests(ShardedRequests<ShardUpsertRequest, ShardUpsertRequest.Item> requests, final UpsertResults upsertResults) {
if (requests.itemsByShard.isEmpty()) {
requests.close();
// could be that processing the source uri only results in errors, so no items per shard exists
return CompletableFuture.completedFuture(upsertResults);
}
final AtomicInteger numRequests = new AtomicInteger(requests.itemsByShard.size());
final AtomicReference<Exception> interrupt = new AtomicReference<>(null);
final CompletableFuture<UpsertResults> resultFuture = new CompletableFuture<>();
Iterator<Map.Entry<ShardLocation, ShardUpsertRequest>> it = requests.itemsByShard.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<ShardLocation, ShardUpsertRequest> entry = it.next();
ShardUpsertRequest request = entry.getValue();
it.remove();
String nodeId = entry.getKey().nodeId;
ConcurrencyLimit nodeLimit = nodeLimits.get(nodeId);
ActionListener<ShardResponse> listener = new ShardResponseActionListener(numRequests, interrupt, upsertResults, resultCollector.accumulator(), requests.rowSourceInfos, nodeLimit, resultFuture);
listener = new RetryListener<>(scheduler, l -> requestExecutor.execute(request, l), listener, BackoffPolicy.unlimitedDynamic(nodeLimit));
requestExecutor.execute(request, listener);
}
return resultFuture.whenComplete((r, err) -> requests.close());
}
use of io.crate.execution.dml.upsert.ShardUpsertRequest in project crate by crate.
the class ShardingUpsertExecutor method apply.
@Override
public CompletableFuture<? extends Iterable<Row>> apply(BatchIterator<Row> batchIterator) {
final ConcurrencyLimit nodeLimit = nodeLimits.get(localNode);
long startTime = nodeLimit.startSample();
var isUsedBytesOverThreshold = new IsUsedBytesOverThreshold(queryCircuitBreaker, nodeLimit);
var reqBatchIterator = BatchIterators.partition(batchIterator, bulkSize, () -> new ShardedRequests<>(requestFactory, ramAccounting), grouper, bulkShardCreationLimiter.or(isUsedBytesOverThreshold));
// If IO is involved the source iterator should pause when the target node reaches a concurrent job counter limit.
// Without IO, we assume that the source iterates over in-memory structures which should be processed as
// fast as possible to free resources.
Predicate<ShardedRequests<ShardUpsertRequest, ShardUpsertRequest.Item>> shouldPause = this::shouldPauseOnPartitionCreation;
if (batchIterator.hasLazyResultSet()) {
shouldPause = shouldPause.or(this::shouldPauseOnTargetNodeJobsCounter).or(isUsedBytesOverThreshold);
}
BatchIteratorBackpressureExecutor<ShardedRequests<ShardUpsertRequest, ShardUpsertRequest.Item>, UpsertResults> executor = new BatchIteratorBackpressureExecutor<>(jobId, scheduler, this.executor, reqBatchIterator, this::execute, resultCollector.combiner(), resultCollector.supplier().get(), shouldPause, earlyTerminationCondition, earlyTerminationExceptionGenerator, this::getMaxLastRttInMs);
return executor.consumeIteratorAndExecute().thenApply(upsertResults -> resultCollector.finisher().apply(upsertResults)).whenComplete((res, err) -> {
nodeLimit.onSample(startTime, err != null);
});
}
Aggregations