use of com.facebook.presto.execution.TaskStatus in project presto by prestodb.
the class SectionExecutionFactory method createStageScheduler.
private StageScheduler createStageScheduler(SplitSourceFactory splitSourceFactory, Session session, StreamingSubPlan plan, Function<PartitioningHandle, NodePartitionMap> partitioningCache, Optional<SqlStageExecution> parentStageExecution, StageId stageId, SqlStageExecution stageExecution, PartitioningHandle partitioningHandle, TableWriteInfo tableWriteInfo, Set<SqlStageExecution> childStageExecutions) {
Map<PlanNodeId, SplitSource> splitSources = splitSourceFactory.createSplitSources(plan.getFragment(), session, tableWriteInfo);
int maxTasksPerStage = getMaxTasksPerStage(session);
if (partitioningHandle.equals(SOURCE_DISTRIBUTION)) {
// nodes are selected dynamically based on the constraints of the splits and the system load
Map.Entry<PlanNodeId, SplitSource> entry = getOnlyElement(splitSources.entrySet());
PlanNodeId planNodeId = entry.getKey();
SplitSource splitSource = entry.getValue();
ConnectorId connectorId = splitSource.getConnectorId();
if (isInternalSystemConnector(connectorId)) {
connectorId = null;
}
NodeSelector nodeSelector = nodeScheduler.createNodeSelector(session, connectorId, maxTasksPerStage);
SplitPlacementPolicy placementPolicy = new DynamicSplitPlacementPolicy(nodeSelector, stageExecution::getAllTasks);
checkArgument(!plan.getFragment().getStageExecutionDescriptor().isStageGroupedExecution());
return newSourcePartitionedSchedulerAsStageScheduler(stageExecution, planNodeId, splitSource, placementPolicy, splitBatchSize);
} else if (partitioningHandle.equals(SCALED_WRITER_DISTRIBUTION)) {
Supplier<Collection<TaskStatus>> sourceTasksProvider = () -> childStageExecutions.stream().map(SqlStageExecution::getAllTasks).flatMap(Collection::stream).map(RemoteTask::getTaskStatus).collect(toList());
Supplier<Collection<TaskStatus>> writerTasksProvider = () -> stageExecution.getAllTasks().stream().map(RemoteTask::getTaskStatus).collect(toList());
ScaledWriterScheduler scheduler = new ScaledWriterScheduler(stageExecution, sourceTasksProvider, writerTasksProvider, nodeScheduler.createNodeSelector(session, null), scheduledExecutor, getWriterMinSize(session), isOptimizedScaleWriterProducerBuffer(session));
whenAllStages(childStageExecutions, StageExecutionState::isDone).addListener(scheduler::finish, directExecutor());
return scheduler;
} else {
if (!splitSources.isEmpty()) {
// contains local source
List<PlanNodeId> schedulingOrder = plan.getFragment().getTableScanSchedulingOrder();
ConnectorId connectorId = partitioningHandle.getConnectorId().orElseThrow(IllegalStateException::new);
List<ConnectorPartitionHandle> connectorPartitionHandles;
boolean groupedExecutionForStage = plan.getFragment().getStageExecutionDescriptor().isStageGroupedExecution();
if (groupedExecutionForStage) {
connectorPartitionHandles = nodePartitioningManager.listPartitionHandles(session, partitioningHandle);
checkState(!ImmutableList.of(NOT_PARTITIONED).equals(connectorPartitionHandles));
} else {
connectorPartitionHandles = ImmutableList.of(NOT_PARTITIONED);
}
BucketNodeMap bucketNodeMap;
List<InternalNode> stageNodeList;
if (plan.getFragment().getRemoteSourceNodes().stream().allMatch(node -> node.getExchangeType() == REPLICATE)) {
// no non-replicated remote source
boolean dynamicLifespanSchedule = plan.getFragment().getStageExecutionDescriptor().isDynamicLifespanSchedule();
bucketNodeMap = nodePartitioningManager.getBucketNodeMap(session, partitioningHandle, dynamicLifespanSchedule);
// verify execution is consistent with planner's decision on dynamic lifespan schedule
verify(bucketNodeMap.isDynamic() == dynamicLifespanSchedule);
if (bucketNodeMap.hasInitialMap()) {
stageNodeList = bucketNodeMap.getBucketToNode().get().stream().distinct().collect(toImmutableList());
} else {
stageNodeList = new ArrayList<>(nodeScheduler.createNodeSelector(session, connectorId).selectRandomNodes(maxTasksPerStage));
}
} else {
// cannot use dynamic lifespan schedule
verify(!plan.getFragment().getStageExecutionDescriptor().isDynamicLifespanSchedule());
// remote source requires nodePartitionMap
NodePartitionMap nodePartitionMap = partitioningCache.apply(plan.getFragment().getPartitioning());
if (groupedExecutionForStage) {
checkState(connectorPartitionHandles.size() == nodePartitionMap.getBucketToPartition().length);
}
stageNodeList = nodePartitionMap.getPartitionToNode();
bucketNodeMap = nodePartitionMap.asBucketNodeMap();
}
FixedSourcePartitionedScheduler fixedSourcePartitionedScheduler = new FixedSourcePartitionedScheduler(stageExecution, splitSources, plan.getFragment().getStageExecutionDescriptor(), schedulingOrder, stageNodeList, bucketNodeMap, splitBatchSize, getConcurrentLifespansPerNode(session), nodeScheduler.createNodeSelector(session, connectorId), connectorPartitionHandles);
if (plan.getFragment().getStageExecutionDescriptor().isRecoverableGroupedExecution()) {
stageExecution.registerStageTaskRecoveryCallback(taskId -> {
checkArgument(taskId.getStageExecutionId().getStageId().equals(stageId), "The task did not execute this stage");
checkArgument(parentStageExecution.isPresent(), "Parent stage execution must exist");
checkArgument(parentStageExecution.get().getAllTasks().size() == 1, "Parent stage should only have one task for recoverable grouped execution");
parentStageExecution.get().removeRemoteSourceIfSingleTaskStage(taskId);
fixedSourcePartitionedScheduler.recover(taskId);
});
}
return fixedSourcePartitionedScheduler;
} else {
// all sources are remote
NodePartitionMap nodePartitionMap = partitioningCache.apply(plan.getFragment().getPartitioning());
List<InternalNode> partitionToNode = nodePartitionMap.getPartitionToNode();
// todo this should asynchronously wait a standard timeout period before failing
checkCondition(!partitionToNode.isEmpty(), NO_NODES_AVAILABLE, "No worker nodes available");
return new FixedCountScheduler(stageExecution, partitionToNode);
}
}
}
use of com.facebook.presto.execution.TaskStatus in project presto by prestodb.
the class TaskInfoFetcher method updateTaskInfo.
synchronized void updateTaskInfo(TaskInfo newValue) {
boolean updated = taskInfo.setIf(newValue, oldValue -> {
TaskStatus oldTaskStatus = oldValue.getTaskStatus();
TaskStatus newTaskStatus = newValue.getTaskStatus();
if (oldTaskStatus.getState().isDone()) {
// never update if the task has reached a terminal state
return false;
}
// don't update to an older version (same version is ok)
return newTaskStatus.getVersion() >= oldTaskStatus.getVersion();
});
if (updated && newValue.getTaskStatus().getState().isDone()) {
finalTaskInfo.compareAndSet(Optional.empty(), Optional.of(newValue));
stop();
}
}
use of com.facebook.presto.execution.TaskStatus in project presto by prestodb.
the class TaskInfoFetcher method sendMetadataUpdates.
private synchronized void sendMetadataUpdates(MetadataUpdates results) {
TaskStatus taskStatus = getTaskInfo().getTaskStatus();
// we already have the final task info
if (isDone(getTaskInfo())) {
stop();
return;
}
// outstanding request?
if (metadataUpdateFuture != null && !metadataUpdateFuture.isDone()) {
// this should never happen
return;
}
byte[] metadataUpdatesJson = metadataUpdatesCodec.toBytes(results);
Request request = setContentTypeHeaders(isBinaryTransportEnabled, preparePost()).setUri(uriBuilderFrom(taskStatus.getSelf()).appendPath("metadataresults").build()).setBodyGenerator(createStaticBodyGenerator(metadataUpdatesJson)).build();
errorTracker.startRequest();
metadataUpdateFuture = httpClient.executeAsync(request, new ResponseHandler<Response, RuntimeException>() {
@Override
public Response handleException(Request request, Exception exception) {
throw propagate(request, exception);
}
@Override
public Response handle(Request request, Response response) {
return response;
}
});
currentRequestStartNanos.set(System.nanoTime());
}
use of com.facebook.presto.execution.TaskStatus in project presto by prestodb.
the class HttpRemoteTask method getQueuedPartitionedSplitsInfo.
@Override
public PartitionedSplitsInfo getQueuedPartitionedSplitsInfo() {
TaskStatus taskStatus = getTaskStatus();
if (taskStatus.getState().isDone()) {
return PartitionedSplitsInfo.forZeroSplits();
}
PartitionedSplitsInfo unacknowledgedSplitsInfo = getUnacknowledgedPartitionedSplitsInfo();
int count = unacknowledgedSplitsInfo.getCount() + taskStatus.getQueuedPartitionedDrivers();
long weight = unacknowledgedSplitsInfo.getWeightSum() + taskStatus.getQueuedPartitionedSplitsWeight();
return PartitionedSplitsInfo.forSplitCountAndWeightSum(count, weight);
}
use of com.facebook.presto.execution.TaskStatus in project presto by prestodb.
the class HttpRemoteTask method sendUpdate.
private synchronized void sendUpdate() {
TaskStatus taskStatus = getTaskStatus();
// don't update if the task hasn't been started yet or if it is already finished
if (!started.get() || !needsUpdate.get() || taskStatus.getState().isDone()) {
return;
}
// if there is a request already running, wait for it to complete
if (this.currentRequest != null && !this.currentRequest.isDone()) {
return;
}
// if throttled due to error, asynchronously wait for timeout and try again
ListenableFuture<?> errorRateLimit = updateErrorTracker.acquireRequestPermit();
if (!errorRateLimit.isDone()) {
errorRateLimit.addListener(this::sendUpdate, executor);
return;
}
List<TaskSource> sources = getSources();
Optional<byte[]> fragment = sendPlan.get() ? Optional.of(planFragment.toBytes(planFragmentCodec)) : Optional.empty();
Optional<TableWriteInfo> writeInfo = sendPlan.get() ? Optional.of(tableWriteInfo) : Optional.empty();
TaskUpdateRequest updateRequest = new TaskUpdateRequest(session.toSessionRepresentation(), session.getIdentity().getExtraCredentials(), fragment, sources, outputBuffers.get(), writeInfo);
byte[] taskUpdateRequestJson = taskUpdateRequestCodec.toBytes(updateRequest);
taskUpdateRequestSize.add(taskUpdateRequestJson.length);
if (taskUpdateRequestJson.length > maxTaskUpdateSizeInBytes) {
failTask(new PrestoException(EXCEEDED_TASK_UPDATE_SIZE_LIMIT, format("TaskUpdate size of %d Bytes has exceeded the limit of %d Bytes", taskUpdateRequestJson.length, maxTaskUpdateSizeInBytes)));
}
if (fragment.isPresent()) {
stats.updateWithPlanSize(taskUpdateRequestJson.length);
} else {
if (ThreadLocalRandom.current().nextDouble() < UPDATE_WITHOUT_PLAN_STATS_SAMPLE_RATE) {
// This is to keep track of the task update size even when the plan fragment is NOT present
stats.updateWithoutPlanSize(taskUpdateRequestJson.length);
}
}
HttpUriBuilder uriBuilder = getHttpUriBuilder(taskStatus);
Request request = setContentTypeHeaders(binaryTransportEnabled, preparePost()).setUri(uriBuilder.build()).setBodyGenerator(createStaticBodyGenerator(taskUpdateRequestJson)).build();
ResponseHandler responseHandler;
if (binaryTransportEnabled) {
responseHandler = createFullSmileResponseHandler((SmileCodec<TaskInfo>) taskInfoCodec);
} else {
responseHandler = createAdaptingJsonResponseHandler((JsonCodec<TaskInfo>) taskInfoCodec);
}
updateErrorTracker.startRequest();
ListenableFuture<BaseResponse<TaskInfo>> future = httpClient.executeAsync(request, responseHandler);
currentRequest = future;
currentRequestStartNanos = System.nanoTime();
// The needsUpdate flag needs to be set to false BEFORE adding the Future callback since callback might change the flag value
// and does so without grabbing the instance lock.
needsUpdate.set(false);
Futures.addCallback(future, new SimpleHttpResponseHandler<>(new UpdateResponseHandler(sources), request.getUri(), stats.getHttpResponseStats(), REMOTE_TASK_ERROR), executor);
}
Aggregations