Search in sources :

Example 1 with JobStatus

use of com.hazelcast.jet.core.JobStatus in project hazelcast-jet by hazelcast.

the class MasterContext method setFinalResult.

void setFinalResult(Throwable failure) {
    JobStatus status = isSuccess(failure) ? COMPLETED : FAILED;
    jobStatus.set(status);
    if (failure == null) {
        completionFuture.internalComplete();
    } else {
        completionFuture.internalCompleteExceptionally(failure);
    }
}
Also used : JobStatus(com.hazelcast.jet.core.JobStatus)

Example 2 with JobStatus

use of com.hazelcast.jet.core.JobStatus in project hazelcast-jet by hazelcast.

the class MasterContext method invokeCompleteExecution.

private void invokeCompleteExecution(Throwable error) {
    JobStatus status = jobStatus();
    Throwable finalError;
    if (status == STARTING || status == RESTARTING || status == RUNNING) {
        logger.fine("Completing " + jobIdString());
        finalError = error;
    } else {
        if (error != null) {
            logger.severe("Cannot properly complete failed " + jobIdString() + ": status is " + status, error);
        } else {
            logger.severe("Cannot properly complete " + jobIdString() + ": status is " + status);
        }
        finalError = new IllegalStateException("Job coordination failed.");
    }
    Function<ExecutionPlan, Operation> operationCtor = plan -> new CompleteExecutionOperation(executionId, finalError);
    invoke(operationCtor, responses -> finalizeJob(error), null);
}
Also used : JobStatus(com.hazelcast.jet.core.JobStatus) NO_SNAPSHOT(com.hazelcast.jet.impl.execution.SnapshotContext.NO_SNAPSHOT) SnapshotRepository.snapshotDataMapName(com.hazelcast.jet.impl.SnapshotRepository.snapshotDataMapName) NonCompletableFuture(com.hazelcast.jet.impl.util.NonCompletableFuture) Address(com.hazelcast.nio.Address) Util.jobAndExecutionId(com.hazelcast.jet.impl.util.Util.jobAndExecutionId) Processors.mapP(com.hazelcast.jet.core.processor.Processors.mapP) SourceProcessors.readMapP(com.hazelcast.jet.core.processor.SourceProcessors.readMapP) CompletionToken(com.hazelcast.jet.impl.util.CompletionToken) Util.idToString(com.hazelcast.jet.impl.util.Util.idToString) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) Map(java.util.Map) STARTING(com.hazelcast.jet.core.JobStatus.STARTING) DAG(com.hazelcast.jet.core.DAG) JobStatus(com.hazelcast.jet.core.JobStatus) ExceptionUtil(com.hazelcast.jet.impl.util.ExceptionUtil) ExecutionService(com.hazelcast.spi.ExecutionService) Operation(com.hazelcast.spi.Operation) CancellationException(java.util.concurrent.CancellationException) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) Util.getJetInstance(com.hazelcast.jet.impl.util.Util.getJetInstance) JobConfig(com.hazelcast.jet.config.JobConfig) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) Collectors(java.util.stream.Collectors) BroadcastKey(com.hazelcast.jet.core.BroadcastKey) List(java.util.List) ExecutionCallback(com.hazelcast.core.ExecutionCallback) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) Entry(java.util.Map.Entry) CancelExecutionOperation(com.hazelcast.jet.impl.operation.CancelExecutionOperation) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) COMPLETED(com.hazelcast.jet.core.JobStatus.COMPLETED) InternalCompletableFuture(com.hazelcast.spi.InternalCompletableFuture) ExecutionPlanBuilder.createExecutionPlans(com.hazelcast.jet.impl.execution.init.ExecutionPlanBuilder.createExecutionPlans) Collectors.partitioningBy(java.util.stream.Collectors.partitioningBy) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) StartExecutionOperation(com.hazelcast.jet.impl.operation.StartExecutionOperation) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) HashSet(java.util.HashSet) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) ILogger(com.hazelcast.logging.ILogger) ExceptionUtil.withTryCatch(com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch) NOT_STARTED(com.hazelcast.jet.core.JobStatus.NOT_STARTED) MembersView(com.hazelcast.internal.cluster.impl.MembersView) DistributedFunction(com.hazelcast.jet.function.DistributedFunction) Edge(com.hazelcast.jet.core.Edge) ClusterServiceImpl(com.hazelcast.internal.cluster.impl.ClusterServiceImpl) Nullable(javax.annotation.Nullable) NodeEngineImpl(com.hazelcast.spi.impl.NodeEngineImpl) RESTARTING(com.hazelcast.jet.core.JobStatus.RESTARTING) BroadcastEntry(com.hazelcast.jet.impl.execution.BroadcastEntry) ExceptionUtil.isTopologicalFailure(com.hazelcast.jet.impl.util.ExceptionUtil.isTopologicalFailure) DistributedFunctions.entryKey(com.hazelcast.jet.function.DistributedFunctions.entryKey) Consumer(java.util.function.Consumer) Vertex(com.hazelcast.jet.core.Vertex) Collectors.toList(java.util.stream.Collectors.toList) CustomClassLoadedObject.deserializeWithCustomClassLoader(com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject.deserializeWithCustomClassLoader) CompleteExecutionOperation(com.hazelcast.jet.impl.operation.CompleteExecutionOperation) ExceptionUtil.peel(com.hazelcast.jet.impl.util.ExceptionUtil.peel) FAILED(com.hazelcast.jet.core.JobStatus.FAILED) RUNNING(com.hazelcast.jet.core.JobStatus.RUNNING) ProcessingGuarantee(com.hazelcast.jet.config.ProcessingGuarantee) JobRestartRequestedException(com.hazelcast.jet.impl.exception.JobRestartRequestedException) SnapshotOperation(com.hazelcast.jet.impl.operation.SnapshotOperation) Edge.between(com.hazelcast.jet.core.Edge.between) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) Operation(com.hazelcast.spi.Operation) CancelExecutionOperation(com.hazelcast.jet.impl.operation.CancelExecutionOperation) StartExecutionOperation(com.hazelcast.jet.impl.operation.StartExecutionOperation) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) CompleteExecutionOperation(com.hazelcast.jet.impl.operation.CompleteExecutionOperation) SnapshotOperation(com.hazelcast.jet.impl.operation.SnapshotOperation) CompleteExecutionOperation(com.hazelcast.jet.impl.operation.CompleteExecutionOperation)

Example 3 with JobStatus

use of com.hazelcast.jet.core.JobStatus in project hazelcast-jet by hazelcast.

the class MasterContext method tryStartJob.

/**
 * Starts execution of the job if it is not already completed, cancelled or failed.
 * If the job is already cancelled, the job completion procedure is triggered.
 * If the job quorum is not satisfied, job restart is rescheduled.
 * If there was a membership change and the partition table is not completely
 * fixed yet, job restart is rescheduled.
 */
void tryStartJob(Function<Long, Long> executionIdSupplier) {
    if (!setJobStatusToStarting()) {
        return;
    }
    if (scheduleRestartIfQuorumAbsent() || scheduleRestartIfClusterIsNotSafe()) {
        return;
    }
    DAG dag;
    try {
        dag = deserializeDAG();
    } catch (Exception e) {
        logger.warning("DAG deserialization failed", e);
        finalizeJob(e);
        return;
    }
    // save a copy of the vertex list, because it is going to change
    vertices = new HashSet<>();
    dag.iterator().forEachRemaining(vertices::add);
    executionId = executionIdSupplier.apply(jobId);
    // last started snapshot complete or not complete. The next started snapshot must be greater than this number
    long lastSnapshotId = NO_SNAPSHOT;
    if (isSnapshottingEnabled()) {
        Long snapshotIdToRestore = snapshotRepository.latestCompleteSnapshot(jobId);
        snapshotRepository.deleteAllSnapshotsExceptOne(jobId, snapshotIdToRestore);
        Long lastStartedSnapshot = snapshotRepository.latestStartedSnapshot(jobId);
        if (snapshotIdToRestore != null) {
            logger.info("State of " + jobIdString() + " will be restored from snapshot " + snapshotIdToRestore);
            rewriteDagWithSnapshotRestore(dag, snapshotIdToRestore);
        } else {
            logger.info("No previous snapshot for " + jobIdString() + " found.");
        }
        if (lastStartedSnapshot != null) {
            lastSnapshotId = lastStartedSnapshot;
        }
    }
    MembersView membersView = getMembersView();
    ClassLoader previousCL = swapContextClassLoader(coordinationService.getClassLoader(jobId));
    try {
        int defaultLocalParallelism = getJetInstance(nodeEngine).getConfig().getInstanceConfig().getCooperativeThreadCount();
        logger.info("Start executing " + jobIdString() + ", status " + jobStatus() + "\n" + dag.toString(defaultLocalParallelism));
        logger.fine("Building execution plan for " + jobIdString());
        executionPlanMap = createExecutionPlans(nodeEngine, membersView, dag, getJobConfig(), lastSnapshotId);
    } catch (Exception e) {
        logger.severe("Exception creating execution plan for " + jobIdString(), e);
        finalizeJob(e);
        return;
    } finally {
        Thread.currentThread().setContextClassLoader(previousCL);
    }
    logger.fine("Built execution plans for " + jobIdString());
    Set<MemberInfo> participants = executionPlanMap.keySet();
    Function<ExecutionPlan, Operation> operationCtor = plan -> new InitExecutionOperation(jobId, executionId, membersView.getVersion(), participants, nodeEngine.getSerializationService().toData(plan));
    invoke(operationCtor, this::onInitStepCompleted, null);
}
Also used : NO_SNAPSHOT(com.hazelcast.jet.impl.execution.SnapshotContext.NO_SNAPSHOT) SnapshotRepository.snapshotDataMapName(com.hazelcast.jet.impl.SnapshotRepository.snapshotDataMapName) NonCompletableFuture(com.hazelcast.jet.impl.util.NonCompletableFuture) Address(com.hazelcast.nio.Address) Util.jobAndExecutionId(com.hazelcast.jet.impl.util.Util.jobAndExecutionId) Processors.mapP(com.hazelcast.jet.core.processor.Processors.mapP) SourceProcessors.readMapP(com.hazelcast.jet.core.processor.SourceProcessors.readMapP) CompletionToken(com.hazelcast.jet.impl.util.CompletionToken) Util.idToString(com.hazelcast.jet.impl.util.Util.idToString) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) Map(java.util.Map) STARTING(com.hazelcast.jet.core.JobStatus.STARTING) DAG(com.hazelcast.jet.core.DAG) JobStatus(com.hazelcast.jet.core.JobStatus) ExceptionUtil(com.hazelcast.jet.impl.util.ExceptionUtil) ExecutionService(com.hazelcast.spi.ExecutionService) Operation(com.hazelcast.spi.Operation) CancellationException(java.util.concurrent.CancellationException) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) Util.getJetInstance(com.hazelcast.jet.impl.util.Util.getJetInstance) JobConfig(com.hazelcast.jet.config.JobConfig) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) Collectors(java.util.stream.Collectors) BroadcastKey(com.hazelcast.jet.core.BroadcastKey) List(java.util.List) ExecutionCallback(com.hazelcast.core.ExecutionCallback) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) Entry(java.util.Map.Entry) CancelExecutionOperation(com.hazelcast.jet.impl.operation.CancelExecutionOperation) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) COMPLETED(com.hazelcast.jet.core.JobStatus.COMPLETED) InternalCompletableFuture(com.hazelcast.spi.InternalCompletableFuture) ExecutionPlanBuilder.createExecutionPlans(com.hazelcast.jet.impl.execution.init.ExecutionPlanBuilder.createExecutionPlans) Collectors.partitioningBy(java.util.stream.Collectors.partitioningBy) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) StartExecutionOperation(com.hazelcast.jet.impl.operation.StartExecutionOperation) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) HashSet(java.util.HashSet) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) ILogger(com.hazelcast.logging.ILogger) ExceptionUtil.withTryCatch(com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch) NOT_STARTED(com.hazelcast.jet.core.JobStatus.NOT_STARTED) MembersView(com.hazelcast.internal.cluster.impl.MembersView) DistributedFunction(com.hazelcast.jet.function.DistributedFunction) Edge(com.hazelcast.jet.core.Edge) ClusterServiceImpl(com.hazelcast.internal.cluster.impl.ClusterServiceImpl) Nullable(javax.annotation.Nullable) NodeEngineImpl(com.hazelcast.spi.impl.NodeEngineImpl) RESTARTING(com.hazelcast.jet.core.JobStatus.RESTARTING) BroadcastEntry(com.hazelcast.jet.impl.execution.BroadcastEntry) ExceptionUtil.isTopologicalFailure(com.hazelcast.jet.impl.util.ExceptionUtil.isTopologicalFailure) DistributedFunctions.entryKey(com.hazelcast.jet.function.DistributedFunctions.entryKey) Consumer(java.util.function.Consumer) Vertex(com.hazelcast.jet.core.Vertex) Collectors.toList(java.util.stream.Collectors.toList) CustomClassLoadedObject.deserializeWithCustomClassLoader(com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject.deserializeWithCustomClassLoader) CompleteExecutionOperation(com.hazelcast.jet.impl.operation.CompleteExecutionOperation) ExceptionUtil.peel(com.hazelcast.jet.impl.util.ExceptionUtil.peel) FAILED(com.hazelcast.jet.core.JobStatus.FAILED) RUNNING(com.hazelcast.jet.core.JobStatus.RUNNING) ProcessingGuarantee(com.hazelcast.jet.config.ProcessingGuarantee) JobRestartRequestedException(com.hazelcast.jet.impl.exception.JobRestartRequestedException) SnapshotOperation(com.hazelcast.jet.impl.operation.SnapshotOperation) Edge.between(com.hazelcast.jet.core.Edge.between) MembersView(com.hazelcast.internal.cluster.impl.MembersView) DAG(com.hazelcast.jet.core.DAG) Operation(com.hazelcast.spi.Operation) CancelExecutionOperation(com.hazelcast.jet.impl.operation.CancelExecutionOperation) StartExecutionOperation(com.hazelcast.jet.impl.operation.StartExecutionOperation) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) CompleteExecutionOperation(com.hazelcast.jet.impl.operation.CompleteExecutionOperation) SnapshotOperation(com.hazelcast.jet.impl.operation.SnapshotOperation) CancellationException(java.util.concurrent.CancellationException) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) JobRestartRequestedException(com.hazelcast.jet.impl.exception.JobRestartRequestedException) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) CustomClassLoadedObject.deserializeWithCustomClassLoader(com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject.deserializeWithCustomClassLoader)

Example 4 with JobStatus

use of com.hazelcast.jet.core.JobStatus in project hazelcast-jet by hazelcast.

the class JobCoordinationService method getJobStatus.

/**
 * Returns the job status or fails with {@link JobNotFoundException}
 * if the requested job is not found
 */
public JobStatus getJobStatus(long jobId) {
    if (!isMaster()) {
        throw new JetException("Cannot query status of Job " + idToString(jobId) + ". Master address: " + nodeEngine.getClusterService().getMasterAddress());
    }
    // first check if there is a job result present.
    // this map is updated first during completion.
    JobResult jobResult = jobRepository.getJobResult(jobId);
    if (jobResult != null) {
        return jobResult.getJobStatus();
    }
    // check if there a master context for running job
    MasterContext currentMasterContext = masterContexts.get(jobId);
    if (currentMasterContext != null) {
        JobStatus jobStatus = currentMasterContext.jobStatus();
        if (jobStatus == JobStatus.RUNNING) {
            return currentMasterContext.isCancelled() ? JobStatus.COMPLETING : JobStatus.RUNNING;
        }
        return jobStatus;
    }
    // no master context found, job might be just submitted
    JobRecord jobRecord = jobRepository.getJobRecord(jobId);
    if (jobRecord == null) {
        // no job record found, but check job results again
        // since job might have been completed meanwhile.
        jobResult = jobRepository.getJobResult(jobId);
        if (jobResult != null) {
            return jobResult.getJobStatus();
        }
        throw new JobNotFoundException(jobId);
    } else {
        return NOT_STARTED;
    }
}
Also used : JobStatus(com.hazelcast.jet.core.JobStatus) JobNotFoundException(com.hazelcast.jet.core.JobNotFoundException) JetException(com.hazelcast.jet.JetException)

Example 5 with JobStatus

use of com.hazelcast.jet.core.JobStatus in project hazelcast by hazelcast.

the class JobCoordinationService method getJobStatus.

/**
 * Returns the job status or fails with {@link JobNotFoundException}
 * if the requested job is not found.
 */
public CompletableFuture<JobStatus> getJobStatus(long jobId) {
    return callWithJob(jobId, mc -> {
        // When the job finishes running, we write NOT_RUNNING to jobStatus first and then
        // write null to requestedTerminationMode (see MasterJobContext.finalizeJob()). We
        // have to read them in the opposite order.
        TerminationMode terminationMode = mc.jobContext().requestedTerminationMode();
        JobStatus jobStatus = mc.jobStatus();
        return jobStatus == RUNNING && terminationMode != null ? COMPLETING : jobStatus;
    }, JobResult::getJobStatus, jobRecord -> NOT_RUNNING, jobExecutionRecord -> jobExecutionRecord.isSuspended() ? SUSPENDED : NOT_RUNNING);
}
Also used : JobStatus(com.hazelcast.jet.core.JobStatus)

Aggregations

JobStatus (com.hazelcast.jet.core.JobStatus)16 CancellationException (java.util.concurrent.CancellationException)7 COMPLETED (com.hazelcast.jet.core.JobStatus.COMPLETED)5 List (java.util.List)5 CompletableFuture (java.util.concurrent.CompletableFuture)5 JobConfig (com.hazelcast.jet.config.JobConfig)4 DAG (com.hazelcast.jet.core.DAG)4 Edge.between (com.hazelcast.jet.core.Edge.between)4 FAILED (com.hazelcast.jet.core.JobStatus.FAILED)4 TopologyChangedException (com.hazelcast.jet.core.TopologyChangedException)4 MemberInfo (com.hazelcast.internal.cluster.MemberInfo)3 MembersView (com.hazelcast.internal.cluster.impl.MembersView)3 Util.idToString (com.hazelcast.jet.Util.idToString)3 ProcessingGuarantee (com.hazelcast.jet.config.ProcessingGuarantee)3 Edge (com.hazelcast.jet.core.Edge)3 RUNNING (com.hazelcast.jet.core.JobStatus.RUNNING)3 Vertex (com.hazelcast.jet.core.Vertex)3 Function (java.util.function.Function)3 ExecutionCallback (com.hazelcast.core.ExecutionCallback)2 LocalMemberResetException (com.hazelcast.core.LocalMemberResetException)2