Search in sources :

Example 1 with TaskStatus

use of com.facebook.presto.execution.TaskStatus in project presto by prestodb.

the class SectionExecutionFactory method createStageScheduler.

private StageScheduler createStageScheduler(SplitSourceFactory splitSourceFactory, Session session, StreamingSubPlan plan, Function<PartitioningHandle, NodePartitionMap> partitioningCache, Optional<SqlStageExecution> parentStageExecution, StageId stageId, SqlStageExecution stageExecution, PartitioningHandle partitioningHandle, TableWriteInfo tableWriteInfo, Set<SqlStageExecution> childStageExecutions) {
    Map<PlanNodeId, SplitSource> splitSources = splitSourceFactory.createSplitSources(plan.getFragment(), session, tableWriteInfo);
    int maxTasksPerStage = getMaxTasksPerStage(session);
    if (partitioningHandle.equals(SOURCE_DISTRIBUTION)) {
        // nodes are selected dynamically based on the constraints of the splits and the system load
        Map.Entry<PlanNodeId, SplitSource> entry = getOnlyElement(splitSources.entrySet());
        PlanNodeId planNodeId = entry.getKey();
        SplitSource splitSource = entry.getValue();
        ConnectorId connectorId = splitSource.getConnectorId();
        if (isInternalSystemConnector(connectorId)) {
            connectorId = null;
        }
        NodeSelector nodeSelector = nodeScheduler.createNodeSelector(session, connectorId, maxTasksPerStage);
        SplitPlacementPolicy placementPolicy = new DynamicSplitPlacementPolicy(nodeSelector, stageExecution::getAllTasks);
        checkArgument(!plan.getFragment().getStageExecutionDescriptor().isStageGroupedExecution());
        return newSourcePartitionedSchedulerAsStageScheduler(stageExecution, planNodeId, splitSource, placementPolicy, splitBatchSize);
    } else if (partitioningHandle.equals(SCALED_WRITER_DISTRIBUTION)) {
        Supplier<Collection<TaskStatus>> sourceTasksProvider = () -> childStageExecutions.stream().map(SqlStageExecution::getAllTasks).flatMap(Collection::stream).map(RemoteTask::getTaskStatus).collect(toList());
        Supplier<Collection<TaskStatus>> writerTasksProvider = () -> stageExecution.getAllTasks().stream().map(RemoteTask::getTaskStatus).collect(toList());
        ScaledWriterScheduler scheduler = new ScaledWriterScheduler(stageExecution, sourceTasksProvider, writerTasksProvider, nodeScheduler.createNodeSelector(session, null), scheduledExecutor, getWriterMinSize(session), isOptimizedScaleWriterProducerBuffer(session));
        whenAllStages(childStageExecutions, StageExecutionState::isDone).addListener(scheduler::finish, directExecutor());
        return scheduler;
    } else {
        if (!splitSources.isEmpty()) {
            // contains local source
            List<PlanNodeId> schedulingOrder = plan.getFragment().getTableScanSchedulingOrder();
            ConnectorId connectorId = partitioningHandle.getConnectorId().orElseThrow(IllegalStateException::new);
            List<ConnectorPartitionHandle> connectorPartitionHandles;
            boolean groupedExecutionForStage = plan.getFragment().getStageExecutionDescriptor().isStageGroupedExecution();
            if (groupedExecutionForStage) {
                connectorPartitionHandles = nodePartitioningManager.listPartitionHandles(session, partitioningHandle);
                checkState(!ImmutableList.of(NOT_PARTITIONED).equals(connectorPartitionHandles));
            } else {
                connectorPartitionHandles = ImmutableList.of(NOT_PARTITIONED);
            }
            BucketNodeMap bucketNodeMap;
            List<InternalNode> stageNodeList;
            if (plan.getFragment().getRemoteSourceNodes().stream().allMatch(node -> node.getExchangeType() == REPLICATE)) {
                // no non-replicated remote source
                boolean dynamicLifespanSchedule = plan.getFragment().getStageExecutionDescriptor().isDynamicLifespanSchedule();
                bucketNodeMap = nodePartitioningManager.getBucketNodeMap(session, partitioningHandle, dynamicLifespanSchedule);
                // verify execution is consistent with planner's decision on dynamic lifespan schedule
                verify(bucketNodeMap.isDynamic() == dynamicLifespanSchedule);
                if (bucketNodeMap.hasInitialMap()) {
                    stageNodeList = bucketNodeMap.getBucketToNode().get().stream().distinct().collect(toImmutableList());
                } else {
                    stageNodeList = new ArrayList<>(nodeScheduler.createNodeSelector(session, connectorId).selectRandomNodes(maxTasksPerStage));
                }
            } else {
                // cannot use dynamic lifespan schedule
                verify(!plan.getFragment().getStageExecutionDescriptor().isDynamicLifespanSchedule());
                // remote source requires nodePartitionMap
                NodePartitionMap nodePartitionMap = partitioningCache.apply(plan.getFragment().getPartitioning());
                if (groupedExecutionForStage) {
                    checkState(connectorPartitionHandles.size() == nodePartitionMap.getBucketToPartition().length);
                }
                stageNodeList = nodePartitionMap.getPartitionToNode();
                bucketNodeMap = nodePartitionMap.asBucketNodeMap();
            }
            FixedSourcePartitionedScheduler fixedSourcePartitionedScheduler = new FixedSourcePartitionedScheduler(stageExecution, splitSources, plan.getFragment().getStageExecutionDescriptor(), schedulingOrder, stageNodeList, bucketNodeMap, splitBatchSize, getConcurrentLifespansPerNode(session), nodeScheduler.createNodeSelector(session, connectorId), connectorPartitionHandles);
            if (plan.getFragment().getStageExecutionDescriptor().isRecoverableGroupedExecution()) {
                stageExecution.registerStageTaskRecoveryCallback(taskId -> {
                    checkArgument(taskId.getStageExecutionId().getStageId().equals(stageId), "The task did not execute this stage");
                    checkArgument(parentStageExecution.isPresent(), "Parent stage execution must exist");
                    checkArgument(parentStageExecution.get().getAllTasks().size() == 1, "Parent stage should only have one task for recoverable grouped execution");
                    parentStageExecution.get().removeRemoteSourceIfSingleTaskStage(taskId);
                    fixedSourcePartitionedScheduler.recover(taskId);
                });
            }
            return fixedSourcePartitionedScheduler;
        } else {
            // all sources are remote
            NodePartitionMap nodePartitionMap = partitioningCache.apply(plan.getFragment().getPartitioning());
            List<InternalNode> partitionToNode = nodePartitionMap.getPartitionToNode();
            // todo this should asynchronously wait a standard timeout period before failing
            checkCondition(!partitionToNode.isEmpty(), NO_NODES_AVAILABLE, "No worker nodes available");
            return new FixedCountScheduler(stageExecution, partitionToNode);
        }
    }
}
Also used : NodeTaskMap(com.facebook.presto.execution.NodeTaskMap) TaskStatus(com.facebook.presto.execution.TaskStatus) ForScheduler(com.facebook.presto.operator.ForScheduler) RemoteSourceNode(com.facebook.presto.sql.planner.plan.RemoteSourceNode) REPLICATE(com.facebook.presto.sql.planner.plan.ExchangeNode.Type.REPLICATE) SplitSourceFactory(com.facebook.presto.sql.planner.SplitSourceFactory) SettableFuture(com.google.common.util.concurrent.SettableFuture) SqlStageExecution(com.facebook.presto.execution.SqlStageExecution) NOT_PARTITIONED(com.facebook.presto.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) SqlStageExecution.createSqlStageExecution(com.facebook.presto.execution.SqlStageExecution.createSqlStageExecution) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Map(java.util.Map) SystemSessionProperties.getConcurrentLifespansPerNode(com.facebook.presto.SystemSessionProperties.getConcurrentLifespansPerNode) SystemSessionProperties.isOptimizedScaleWriterProducerBuffer(com.facebook.presto.SystemSessionProperties.isOptimizedScaleWriterProducerBuffer) QueryManagerConfig(com.facebook.presto.execution.QueryManagerConfig) Collectors.toSet(java.util.stream.Collectors.toSet) SplitSource(com.facebook.presto.split.SplitSource) RemoteTaskFactory(com.facebook.presto.execution.RemoteTaskFactory) ImmutableSet(com.google.common.collect.ImmutableSet) Predicate(java.util.function.Predicate) SystemSessionProperties.getWriterMinSize(com.facebook.presto.SystemSessionProperties.getWriterMinSize) Collection(java.util.Collection) TableWriteInfo.createTableWriteInfo(com.facebook.presto.execution.scheduler.TableWriteInfo.createTableWriteInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) NO_NODES_AVAILABLE(com.facebook.presto.spi.StandardErrorCode.NO_NODES_AVAILABLE) Iterables.getLast(com.google.common.collect.Iterables.getLast) NodeSelector(com.facebook.presto.execution.scheduler.nodeSelection.NodeSelector) SOURCE_DISTRIBUTION(com.facebook.presto.sql.planner.SystemPartitioningHandle.SOURCE_DISTRIBUTION) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(com.facebook.presto.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) Preconditions.checkState(com.google.common.base.Preconditions.checkState) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) List(java.util.List) Optional(java.util.Optional) StageExecutionId(com.facebook.presto.execution.StageExecutionId) ConnectorId(com.facebook.presto.spi.ConnectorId) ConnectorId.isInternalSystemConnector(com.facebook.presto.spi.ConnectorId.isInternalSystemConnector) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) StageId(com.facebook.presto.execution.StageId) OutputBuffers(com.facebook.presto.execution.buffer.OutputBuffers) ConnectorPartitionHandle(com.facebook.presto.spi.connector.ConnectorPartitionHandle) NodePartitionMap(com.facebook.presto.sql.planner.NodePartitionMap) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) HashMap(java.util.HashMap) Function(java.util.function.Function) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) PlanFragmentId(com.facebook.presto.sql.planner.plan.PlanFragmentId) SystemSessionProperties.getMaxTasksPerStage(com.facebook.presto.SystemSessionProperties.getMaxTasksPerStage) StageExecutionState(com.facebook.presto.execution.StageExecutionState) ExecutorService(java.util.concurrent.ExecutorService) Failures.checkCondition(com.facebook.presto.util.Failures.checkCondition) NodePartitioningManager(com.facebook.presto.sql.planner.NodePartitioningManager) Session(com.facebook.presto.Session) Sets.newConcurrentHashSet(com.google.common.collect.Sets.newConcurrentHashSet) SCALED_WRITER_DISTRIBUTION(com.facebook.presto.sql.planner.SystemPartitioningHandle.SCALED_WRITER_DISTRIBUTION) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) PlanNodeSearcher(com.facebook.presto.sql.planner.optimizations.PlanNodeSearcher) InternalNode(com.facebook.presto.metadata.InternalNode) PlanNode(com.facebook.presto.spi.plan.PlanNode) Collectors.toList(java.util.stream.Collectors.toList) RemoteTask(com.facebook.presto.execution.RemoteTask) FailureDetector(com.facebook.presto.failureDetector.FailureDetector) TableScanNode(com.facebook.presto.spi.plan.TableScanNode) PartitioningHandle(com.facebook.presto.sql.planner.PartitioningHandle) ForQueryExecution(com.facebook.presto.execution.ForQueryExecution) Metadata(com.facebook.presto.metadata.Metadata) NodePartitionMap(com.facebook.presto.sql.planner.NodePartitionMap) ArrayList(java.util.ArrayList) RemoteTask(com.facebook.presto.execution.RemoteTask) TaskStatus(com.facebook.presto.execution.TaskStatus) SqlStageExecution(com.facebook.presto.execution.SqlStageExecution) SqlStageExecution.createSqlStageExecution(com.facebook.presto.execution.SqlStageExecution.createSqlStageExecution) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) StageExecutionState(com.facebook.presto.execution.StageExecutionState) Supplier(java.util.function.Supplier) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) NodeSelector(com.facebook.presto.execution.scheduler.nodeSelection.NodeSelector) SplitSource(com.facebook.presto.split.SplitSource) NodeTaskMap(com.facebook.presto.execution.NodeTaskMap) Map(java.util.Map) NodePartitionMap(com.facebook.presto.sql.planner.NodePartitionMap) HashMap(java.util.HashMap) ConnectorId(com.facebook.presto.spi.ConnectorId)

Example 2 with TaskStatus

use of com.facebook.presto.execution.TaskStatus in project presto by prestodb.

the class TaskInfoFetcher method updateTaskInfo.

synchronized void updateTaskInfo(TaskInfo newValue) {
    boolean updated = taskInfo.setIf(newValue, oldValue -> {
        TaskStatus oldTaskStatus = oldValue.getTaskStatus();
        TaskStatus newTaskStatus = newValue.getTaskStatus();
        if (oldTaskStatus.getState().isDone()) {
            // never update if the task has reached a terminal state
            return false;
        }
        // don't update to an older version (same version is ok)
        return newTaskStatus.getVersion() >= oldTaskStatus.getVersion();
    });
    if (updated && newValue.getTaskStatus().getState().isDone()) {
        finalTaskInfo.compareAndSet(Optional.empty(), Optional.of(newValue));
        stop();
    }
}
Also used : TaskStatus(com.facebook.presto.execution.TaskStatus)

Example 3 with TaskStatus

use of com.facebook.presto.execution.TaskStatus in project presto by prestodb.

the class TaskInfoFetcher method sendMetadataUpdates.

private synchronized void sendMetadataUpdates(MetadataUpdates results) {
    TaskStatus taskStatus = getTaskInfo().getTaskStatus();
    // we already have the final task info
    if (isDone(getTaskInfo())) {
        stop();
        return;
    }
    // outstanding request?
    if (metadataUpdateFuture != null && !metadataUpdateFuture.isDone()) {
        // this should never happen
        return;
    }
    byte[] metadataUpdatesJson = metadataUpdatesCodec.toBytes(results);
    Request request = setContentTypeHeaders(isBinaryTransportEnabled, preparePost()).setUri(uriBuilderFrom(taskStatus.getSelf()).appendPath("metadataresults").build()).setBodyGenerator(createStaticBodyGenerator(metadataUpdatesJson)).build();
    errorTracker.startRequest();
    metadataUpdateFuture = httpClient.executeAsync(request, new ResponseHandler<Response, RuntimeException>() {

        @Override
        public Response handleException(Request request, Exception exception) {
            throw propagate(request, exception);
        }

        @Override
        public Response handle(Request request, Response response) {
            return response;
        }
    });
    currentRequestStartNanos.set(System.nanoTime());
}
Also used : Response(com.facebook.airlift.http.client.Response) BaseResponse(com.facebook.presto.server.smile.BaseResponse) SimpleHttpResponseHandler(com.facebook.presto.server.SimpleHttpResponseHandler) FullSmileResponseHandler.createFullSmileResponseHandler(com.facebook.presto.server.smile.FullSmileResponseHandler.createFullSmileResponseHandler) ResponseHandler(com.facebook.airlift.http.client.ResponseHandler) AdaptingJsonResponseHandler.createAdaptingJsonResponseHandler(com.facebook.presto.server.smile.AdaptingJsonResponseHandler.createAdaptingJsonResponseHandler) Request(com.facebook.airlift.http.client.Request) TaskStatus(com.facebook.presto.execution.TaskStatus)

Example 4 with TaskStatus

use of com.facebook.presto.execution.TaskStatus in project presto by prestodb.

the class HttpRemoteTask method getQueuedPartitionedSplitsInfo.

@Override
public PartitionedSplitsInfo getQueuedPartitionedSplitsInfo() {
    TaskStatus taskStatus = getTaskStatus();
    if (taskStatus.getState().isDone()) {
        return PartitionedSplitsInfo.forZeroSplits();
    }
    PartitionedSplitsInfo unacknowledgedSplitsInfo = getUnacknowledgedPartitionedSplitsInfo();
    int count = unacknowledgedSplitsInfo.getCount() + taskStatus.getQueuedPartitionedDrivers();
    long weight = unacknowledgedSplitsInfo.getWeightSum() + taskStatus.getQueuedPartitionedSplitsWeight();
    return PartitionedSplitsInfo.forSplitCountAndWeightSum(count, weight);
}
Also used : PartitionedSplitsInfo(com.facebook.presto.execution.PartitionedSplitsInfo) TaskStatus(com.facebook.presto.execution.TaskStatus)

Example 5 with TaskStatus

use of com.facebook.presto.execution.TaskStatus in project presto by prestodb.

the class HttpRemoteTask method sendUpdate.

private synchronized void sendUpdate() {
    TaskStatus taskStatus = getTaskStatus();
    // don't update if the task hasn't been started yet or if it is already finished
    if (!started.get() || !needsUpdate.get() || taskStatus.getState().isDone()) {
        return;
    }
    // if there is a request already running, wait for it to complete
    if (this.currentRequest != null && !this.currentRequest.isDone()) {
        return;
    }
    // if throttled due to error, asynchronously wait for timeout and try again
    ListenableFuture<?> errorRateLimit = updateErrorTracker.acquireRequestPermit();
    if (!errorRateLimit.isDone()) {
        errorRateLimit.addListener(this::sendUpdate, executor);
        return;
    }
    List<TaskSource> sources = getSources();
    Optional<byte[]> fragment = sendPlan.get() ? Optional.of(planFragment.toBytes(planFragmentCodec)) : Optional.empty();
    Optional<TableWriteInfo> writeInfo = sendPlan.get() ? Optional.of(tableWriteInfo) : Optional.empty();
    TaskUpdateRequest updateRequest = new TaskUpdateRequest(session.toSessionRepresentation(), session.getIdentity().getExtraCredentials(), fragment, sources, outputBuffers.get(), writeInfo);
    byte[] taskUpdateRequestJson = taskUpdateRequestCodec.toBytes(updateRequest);
    taskUpdateRequestSize.add(taskUpdateRequestJson.length);
    if (taskUpdateRequestJson.length > maxTaskUpdateSizeInBytes) {
        failTask(new PrestoException(EXCEEDED_TASK_UPDATE_SIZE_LIMIT, format("TaskUpdate size of %d Bytes has exceeded the limit of %d Bytes", taskUpdateRequestJson.length, maxTaskUpdateSizeInBytes)));
    }
    if (fragment.isPresent()) {
        stats.updateWithPlanSize(taskUpdateRequestJson.length);
    } else {
        if (ThreadLocalRandom.current().nextDouble() < UPDATE_WITHOUT_PLAN_STATS_SAMPLE_RATE) {
            // This is to keep track of the task update size even when the plan fragment is NOT present
            stats.updateWithoutPlanSize(taskUpdateRequestJson.length);
        }
    }
    HttpUriBuilder uriBuilder = getHttpUriBuilder(taskStatus);
    Request request = setContentTypeHeaders(binaryTransportEnabled, preparePost()).setUri(uriBuilder.build()).setBodyGenerator(createStaticBodyGenerator(taskUpdateRequestJson)).build();
    ResponseHandler responseHandler;
    if (binaryTransportEnabled) {
        responseHandler = createFullSmileResponseHandler((SmileCodec<TaskInfo>) taskInfoCodec);
    } else {
        responseHandler = createAdaptingJsonResponseHandler((JsonCodec<TaskInfo>) taskInfoCodec);
    }
    updateErrorTracker.startRequest();
    ListenableFuture<BaseResponse<TaskInfo>> future = httpClient.executeAsync(request, responseHandler);
    currentRequest = future;
    currentRequestStartNanos = System.nanoTime();
    // The needsUpdate flag needs to be set to false BEFORE adding the Future callback since callback might change the flag value
    // and does so without grabbing the instance lock.
    needsUpdate.set(false);
    Futures.addCallback(future, new SimpleHttpResponseHandler<>(new UpdateResponseHandler(sources), request.getUri(), stats.getHttpResponseStats(), REMOTE_TASK_ERROR), executor);
}
Also used : SmileCodec(com.facebook.airlift.json.smile.SmileCodec) TableWriteInfo(com.facebook.presto.execution.scheduler.TableWriteInfo) SimpleHttpResponseHandler(com.facebook.presto.server.SimpleHttpResponseHandler) StatusResponseHandler.createStatusResponseHandler(com.facebook.airlift.http.client.StatusResponseHandler.createStatusResponseHandler) FullSmileResponseHandler.createFullSmileResponseHandler(com.facebook.presto.server.smile.FullSmileResponseHandler.createFullSmileResponseHandler) ResponseHandler(com.facebook.airlift.http.client.ResponseHandler) AdaptingJsonResponseHandler.createAdaptingJsonResponseHandler(com.facebook.presto.server.smile.AdaptingJsonResponseHandler.createAdaptingJsonResponseHandler) TaskUpdateRequest(com.facebook.presto.server.TaskUpdateRequest) TaskUpdateRequest(com.facebook.presto.server.TaskUpdateRequest) Request(com.facebook.airlift.http.client.Request) PrestoException(com.facebook.presto.spi.PrestoException) TaskStatus(com.facebook.presto.execution.TaskStatus) BaseResponse(com.facebook.presto.server.smile.BaseResponse) JsonCodec(com.facebook.airlift.json.JsonCodec) HttpUriBuilder(com.facebook.airlift.http.client.HttpUriBuilder) TaskSource(com.facebook.presto.execution.TaskSource)

Aggregations

TaskStatus (com.facebook.presto.execution.TaskStatus)17 TaskInfo (com.facebook.presto.execution.TaskInfo)6 Request (com.facebook.airlift.http.client.Request)5 ResponseHandler (com.facebook.airlift.http.client.ResponseHandler)4 SimpleHttpResponseHandler (com.facebook.presto.server.SimpleHttpResponseHandler)4 TaskUpdateRequest (com.facebook.presto.server.TaskUpdateRequest)4 AdaptingJsonResponseHandler.createAdaptingJsonResponseHandler (com.facebook.presto.server.smile.AdaptingJsonResponseHandler.createAdaptingJsonResponseHandler)4 FullSmileResponseHandler.createFullSmileResponseHandler (com.facebook.presto.server.smile.FullSmileResponseHandler.createFullSmileResponseHandler)4 HttpUriBuilder (com.facebook.airlift.http.client.HttpUriBuilder)3 QueryManagerConfig (com.facebook.presto.execution.QueryManagerConfig)3 JsonCodec (com.facebook.airlift.json.JsonCodec)2 SmileCodec (com.facebook.airlift.json.smile.SmileCodec)2 NodeTaskMap (com.facebook.presto.execution.NodeTaskMap)2 MetadataUpdates (com.facebook.presto.metadata.MetadataUpdates)2 TaskStats (com.facebook.presto.operator.TaskStats)2 BaseResponse (com.facebook.presto.server.smile.BaseResponse)2 URI (java.net.URI)2 Bootstrap (com.facebook.airlift.bootstrap.Bootstrap)1 SetThreadName (com.facebook.airlift.concurrent.SetThreadName)1 ConfigBinder.configBinder (com.facebook.airlift.configuration.ConfigBinder.configBinder)1