use of io.trino.spi.exchange.ExchangeManager in project trino by trinodb.
the class LazyOutputBuffer method setOutputBuffers.
@Override
public void setOutputBuffers(OutputBuffers newOutputBuffers) {
Set<OutputBufferId> destroyedBuffers = ImmutableSet.of();
List<PendingRead> pendingReads = ImmutableList.of();
OutputBuffer outputBuffer = delegate;
if (outputBuffer == null) {
synchronized (this) {
outputBuffer = delegate;
if (outputBuffer == null) {
// ignore set output if buffer was already destroyed or failed
if (stateMachine.getState().isTerminal()) {
return;
}
switch(newOutputBuffers.getType()) {
case PARTITIONED:
outputBuffer = new PartitionedOutputBuffer(taskInstanceId, stateMachine, newOutputBuffers, maxBufferSize, memoryContextSupplier, executor);
break;
case BROADCAST:
outputBuffer = new BroadcastOutputBuffer(taskInstanceId, stateMachine, maxBroadcastBufferSize, memoryContextSupplier, executor, notifyStatusChanged);
break;
case ARBITRARY:
outputBuffer = new ArbitraryOutputBuffer(taskInstanceId, stateMachine, maxBufferSize, memoryContextSupplier, executor);
break;
case SPOOL:
ExchangeSinkInstanceHandle exchangeSinkInstanceHandle = newOutputBuffers.getExchangeSinkInstanceHandle().orElseThrow(() -> new IllegalArgumentException("exchange sink handle is expected to be present for buffer type EXTERNAL"));
ExchangeManager exchangeManager = exchangeManagerRegistry.getExchangeManager();
ExchangeSink exchangeSink = exchangeManager.createSink(exchangeSinkInstanceHandle, false);
outputBuffer = new SpoolingExchangeOutputBuffer(stateMachine, newOutputBuffers, exchangeSink, memoryContextSupplier);
break;
default:
throw new IllegalArgumentException("Unexpected output buffer type: " + newOutputBuffers.getType());
}
// process pending aborts and reads outside of synchronized lock
destroyedBuffers = ImmutableSet.copyOf(this.destroyedBuffers);
this.destroyedBuffers.clear();
pendingReads = ImmutableList.copyOf(this.pendingReads);
this.pendingReads.clear();
// Must be assigned last to avoid a race condition with unsynchronized readers
delegate = outputBuffer;
}
}
}
outputBuffer.setOutputBuffers(newOutputBuffers);
// process pending aborts and reads outside of synchronized lock
destroyedBuffers.forEach(outputBuffer::destroy);
for (PendingRead pendingRead : pendingReads) {
pendingRead.process(outputBuffer);
}
}
use of io.trino.spi.exchange.ExchangeManager in project trino by trinodb.
the class SqlQueryScheduler method createDistributedStagesScheduler.
private synchronized Optional<DistributedStagesScheduler> createDistributedStagesScheduler(int attempt) {
verify(attempt == 0 || retryPolicy == RetryPolicy.QUERY, "unexpected attempt %s for retry policy %s", attempt, retryPolicy);
if (queryStateMachine.isDone()) {
return Optional.empty();
}
if (attempt > 0 && retryPolicy == RetryPolicy.QUERY) {
dynamicFilterService.registerQueryRetry(queryStateMachine.getQueryId(), attempt);
}
DistributedStagesScheduler distributedStagesScheduler;
switch(retryPolicy) {
case TASK:
ExchangeManager exchangeManager = exchangeManagerRegistry.getExchangeManager();
distributedStagesScheduler = FaultTolerantDistributedStagesScheduler.create(queryStateMachine, stageManager, failureDetector, taskSourceFactory, taskDescriptorStorage, exchangeManager, nodePartitioningManager, coordinatorStagesScheduler.getTaskLifecycleListener(), maxTaskRetryAttemptsOverall, maxTaskRetryAttemptsPerTask, schedulerExecutor, schedulerStats, nodeAllocatorService, partitionMemoryEstimator);
break;
case QUERY:
case NONE:
distributedStagesScheduler = PipelinedDistributedStagesScheduler.create(queryStateMachine, schedulerStats, nodeScheduler, nodePartitioningManager, stageManager, coordinatorStagesScheduler, executionPolicy, failureDetector, schedulerExecutor, splitSourceFactory, splitBatchSize, dynamicFilterService, tableExecuteContextManager, retryPolicy, attempt);
break;
default:
throw new IllegalArgumentException("Unexpected retry policy: " + retryPolicy);
}
this.distributedStagesScheduler.set(distributedStagesScheduler);
distributedStagesScheduler.addStateChangeListener(state -> {
if (queryStateMachine.getQueryState() == QueryState.STARTING && (state == DistributedStagesSchedulerState.RUNNING || state.isDone())) {
queryStateMachine.transitionToRunning();
}
if (state.isDone() && !state.isFailure()) {
stageManager.getDistributedStagesInTopologicalOrder().forEach(stage -> stageManager.get(stage.getStageId()).finish());
}
if (stageManager.getCoordinatorStagesInTopologicalOrder().isEmpty()) {
// otherwise defer query transitioning to the coordinator stages
if (state == DistributedStagesSchedulerState.FINISHED) {
queryStateMachine.transitionToFinishing();
} else if (state == DistributedStagesSchedulerState.CANCELED) {
// output stage was canceled
queryStateMachine.transitionToCanceled();
}
}
if (state == DistributedStagesSchedulerState.FAILED) {
StageFailureInfo stageFailureInfo = distributedStagesScheduler.getFailureCause().orElseGet(() -> new StageFailureInfo(toFailure(new VerifyException("distributedStagesScheduler failed but failure cause is not present")), Optional.empty()));
ErrorCode errorCode = stageFailureInfo.getFailureInfo().getErrorCode();
if (shouldRetry(errorCode)) {
long delayInMillis = min(retryInitialDelay.toMillis() * ((long) pow(2, currentAttempt.get())), retryMaxDelay.toMillis());
currentAttempt.incrementAndGet();
scheduleRetryWithDelay(delayInMillis);
} else {
stageManager.getDistributedStagesInTopologicalOrder().forEach(stage -> {
if (stageFailureInfo.getFailedStageId().isPresent() && stageFailureInfo.getFailedStageId().get().equals(stage.getStageId())) {
stage.fail(stageFailureInfo.getFailureInfo().toException());
} else {
stage.abort();
}
});
queryStateMachine.transitionToFailed(stageFailureInfo.getFailureInfo().toException());
}
}
});
return Optional.of(distributedStagesScheduler);
}
use of io.trino.spi.exchange.ExchangeManager in project trino by trinodb.
the class ExchangeManagerRegistry method loadExchangeManager.
public synchronized void loadExchangeManager(String name, Map<String, String> properties) {
log.info("-- Loading exchange manager %s --", name);
checkState(exchangeManager == null, "exchangeManager is already loaded");
ExchangeManagerFactory factory = exchangeManagerFactories.get(name);
checkArgument(factory != null, "Exchange manager factory '%s' is not registered. Available factories: %s", name, exchangeManagerFactories.keySet());
ExchangeManager exchangeManager;
try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(factory.getClass().getClassLoader())) {
exchangeManager = factory.create(properties);
}
handleResolver.setExchangeManagerHandleResolver(factory.getHandleResolver());
log.info("-- Loaded exchange manager %s --", name);
this.exchangeManager = exchangeManager;
}
use of io.trino.spi.exchange.ExchangeManager in project trino by trinodb.
the class TestStageTaskSourceFactory method testArbitraryDistributionTaskSource.
@Test
public void testArbitraryDistributionTaskSource() {
ExchangeManager splittingExchangeManager = new TestingExchangeManager(true);
ExchangeManager nonSplittingExchangeManager = new TestingExchangeManager(false);
TaskSource taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(), ImmutableListMultimap.of(), ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
assertFalse(taskSource.isFinished());
List<TaskDescriptor> tasks = taskSource.getMoreTasks();
assertThat(tasks).isEmpty();
assertTrue(taskSource.isFinished());
TestingExchangeSourceHandle sourceHandle1 = new TestingExchangeSourceHandle(0, 1);
TestingExchangeSourceHandle sourceHandle2 = new TestingExchangeSourceHandle(0, 2);
TestingExchangeSourceHandle sourceHandle3 = new TestingExchangeSourceHandle(0, 3);
TestingExchangeSourceHandle sourceHandle4 = new TestingExchangeSourceHandle(0, 4);
TestingExchangeSourceHandle sourceHandle123 = new TestingExchangeSourceHandle(0, 123);
TestingExchangeSourceHandle sourceHandle321 = new TestingExchangeSourceHandle(0, 321);
Multimap<PlanNodeId, ExchangeSourceHandle> nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle3);
Exchange exchange = splittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle3, exchange)), nonReplicatedSources, ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertTrue(taskSource.isFinished());
assertThat(tasks).hasSize(1);
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 3)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle123);
exchange = nonSplittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle123, exchange)), nonReplicatedSources, ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 123)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle123, PLAN_NODE_2, sourceHandle321);
exchange = nonSplittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle123, exchange, sourceHandle321, exchange)), nonReplicatedSources, ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 123)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(1, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_2, new TestingExchangeSourceHandle(0, 321)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle1, PLAN_NODE_1, sourceHandle2, PLAN_NODE_2, sourceHandle4);
exchange = splittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle1, exchange, sourceHandle2, exchange, sourceHandle4, exchange)), nonReplicatedSources, ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 1), PLAN_NODE_1, new TestingExchangeSourceHandle(0, 2)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(1, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_2, new TestingExchangeSourceHandle(0, 3)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(2, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_2, new TestingExchangeSourceHandle(0, 1)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle1, PLAN_NODE_1, sourceHandle3, PLAN_NODE_2, sourceHandle4);
exchange = splittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle1, exchange, sourceHandle3, exchange, sourceHandle4, exchange)), nonReplicatedSources, ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 1)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(1, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 3)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(2, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_2, new TestingExchangeSourceHandle(0, 3)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(3, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_2, new TestingExchangeSourceHandle(0, 1)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
// with replicated sources
nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle1, PLAN_NODE_1, sourceHandle2, PLAN_NODE_1, sourceHandle4);
Multimap<PlanNodeId, ExchangeSourceHandle> replicatedSources = ImmutableListMultimap.of(PLAN_NODE_2, sourceHandle321);
exchange = splittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle1, exchange, sourceHandle2, exchange, sourceHandle4, exchange, sourceHandle321, exchange)), nonReplicatedSources, replicatedSources, DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 1), PLAN_NODE_1, new TestingExchangeSourceHandle(0, 2), PLAN_NODE_2, new TestingExchangeSourceHandle(0, 321)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(1, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 3), PLAN_NODE_2, sourceHandle321), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(2, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 1), PLAN_NODE_2, sourceHandle321), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
}
Aggregations