Search in sources :

Example 1 with TopologyChangedException

use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast-jet by hazelcast.

the class MasterContext method getExecuteResult.

/**
 * <ul>
 * <li>Returns null if there is no failure.
 * <li>Returns CancellationException if the job is cancelled.
 * <li>Returns JobRestartRequestedException if the current execution is cancelled
 * <li>If there is at least one non-restartable failure, such as an exception in user code, then returns that failure.
 * <li>Otherwise, the failure is because a job participant has left the cluster.
 *   In that case, {@code TopologyChangeException} is returned so that the job will be restarted.
 * </ul>
 */
private Throwable getExecuteResult(Map<MemberInfo, Object> responses, boolean isRestartRequested) {
    if (cancellationToken.isCompleted()) {
        logger.fine(jobIdString() + " to be cancelled after execute");
        return new CancellationException();
    } else if (isRestartRequested) {
        return new JobRestartRequestedException();
    }
    Map<Boolean, List<Entry<MemberInfo, Object>>> grouped = groupResponses(responses);
    Collection<MemberInfo> successfulMembers = grouped.get(false).stream().map(Entry::getKey).collect(toList());
    if (successfulMembers.size() == executionPlanMap.size()) {
        logger.fine("Execute of " + jobIdString() + " is successful.");
        return null;
    }
    List<Entry<MemberInfo, Object>> failures = grouped.get(true);
    logger.fine("Execute of " + jobIdString() + " has failures: " + failures);
    // In that case, all remaining participants return a CancellationException.
    return failures.stream().map(e -> (Throwable) e.getValue()).filter(t -> !(t instanceof CancellationException || isTopologicalFailure(t))).findFirst().map(ExceptionUtil::peel).orElse(new TopologyChangedException());
}
Also used : NO_SNAPSHOT(com.hazelcast.jet.impl.execution.SnapshotContext.NO_SNAPSHOT) SnapshotRepository.snapshotDataMapName(com.hazelcast.jet.impl.SnapshotRepository.snapshotDataMapName) NonCompletableFuture(com.hazelcast.jet.impl.util.NonCompletableFuture) Address(com.hazelcast.nio.Address) Util.jobAndExecutionId(com.hazelcast.jet.impl.util.Util.jobAndExecutionId) Processors.mapP(com.hazelcast.jet.core.processor.Processors.mapP) SourceProcessors.readMapP(com.hazelcast.jet.core.processor.SourceProcessors.readMapP) CompletionToken(com.hazelcast.jet.impl.util.CompletionToken) Util.idToString(com.hazelcast.jet.impl.util.Util.idToString) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) Map(java.util.Map) STARTING(com.hazelcast.jet.core.JobStatus.STARTING) DAG(com.hazelcast.jet.core.DAG) JobStatus(com.hazelcast.jet.core.JobStatus) ExceptionUtil(com.hazelcast.jet.impl.util.ExceptionUtil) ExecutionService(com.hazelcast.spi.ExecutionService) Operation(com.hazelcast.spi.Operation) CancellationException(java.util.concurrent.CancellationException) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) Util.getJetInstance(com.hazelcast.jet.impl.util.Util.getJetInstance) JobConfig(com.hazelcast.jet.config.JobConfig) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) Collectors(java.util.stream.Collectors) BroadcastKey(com.hazelcast.jet.core.BroadcastKey) List(java.util.List) ExecutionCallback(com.hazelcast.core.ExecutionCallback) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) Entry(java.util.Map.Entry) CancelExecutionOperation(com.hazelcast.jet.impl.operation.CancelExecutionOperation) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) COMPLETED(com.hazelcast.jet.core.JobStatus.COMPLETED) InternalCompletableFuture(com.hazelcast.spi.InternalCompletableFuture) ExecutionPlanBuilder.createExecutionPlans(com.hazelcast.jet.impl.execution.init.ExecutionPlanBuilder.createExecutionPlans) Collectors.partitioningBy(java.util.stream.Collectors.partitioningBy) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) StartExecutionOperation(com.hazelcast.jet.impl.operation.StartExecutionOperation) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) HashSet(java.util.HashSet) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) ILogger(com.hazelcast.logging.ILogger) ExceptionUtil.withTryCatch(com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch) NOT_STARTED(com.hazelcast.jet.core.JobStatus.NOT_STARTED) MembersView(com.hazelcast.internal.cluster.impl.MembersView) DistributedFunction(com.hazelcast.jet.function.DistributedFunction) Edge(com.hazelcast.jet.core.Edge) ClusterServiceImpl(com.hazelcast.internal.cluster.impl.ClusterServiceImpl) Nullable(javax.annotation.Nullable) NodeEngineImpl(com.hazelcast.spi.impl.NodeEngineImpl) RESTARTING(com.hazelcast.jet.core.JobStatus.RESTARTING) BroadcastEntry(com.hazelcast.jet.impl.execution.BroadcastEntry) ExceptionUtil.isTopologicalFailure(com.hazelcast.jet.impl.util.ExceptionUtil.isTopologicalFailure) DistributedFunctions.entryKey(com.hazelcast.jet.function.DistributedFunctions.entryKey) Consumer(java.util.function.Consumer) Vertex(com.hazelcast.jet.core.Vertex) Collectors.toList(java.util.stream.Collectors.toList) CustomClassLoadedObject.deserializeWithCustomClassLoader(com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject.deserializeWithCustomClassLoader) CompleteExecutionOperation(com.hazelcast.jet.impl.operation.CompleteExecutionOperation) ExceptionUtil.peel(com.hazelcast.jet.impl.util.ExceptionUtil.peel) FAILED(com.hazelcast.jet.core.JobStatus.FAILED) RUNNING(com.hazelcast.jet.core.JobStatus.RUNNING) ProcessingGuarantee(com.hazelcast.jet.config.ProcessingGuarantee) JobRestartRequestedException(com.hazelcast.jet.impl.exception.JobRestartRequestedException) SnapshotOperation(com.hazelcast.jet.impl.operation.SnapshotOperation) Edge.between(com.hazelcast.jet.core.Edge.between) Entry(java.util.Map.Entry) BroadcastEntry(com.hazelcast.jet.impl.execution.BroadcastEntry) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) CancellationException(java.util.concurrent.CancellationException) JobRestartRequestedException(com.hazelcast.jet.impl.exception.JobRestartRequestedException) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException)

Example 2 with TopologyChangedException

use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast-jet by hazelcast.

the class JobCoordinationService method completeMasterContextIfJobAlreadyCompleted.

// If a job result is present, it completes the master context using the job result
private boolean completeMasterContextIfJobAlreadyCompleted(MasterContext masterContext) {
    long jobId = masterContext.getJobId();
    JobResult jobResult = jobRepository.getJobResult(jobId);
    if (jobResult != null) {
        logger.fine("Completing master context " + idToString(jobId) + " since already completed with result: " + jobResult);
        masterContext.setFinalResult(jobResult.getFailure());
        return masterContexts.remove(jobId, masterContext);
    }
    if (!masterContext.getJobConfig().isAutoRestartOnMemberFailureEnabled() && jobRepository.getExecutionIdCount(jobId) > 0) {
        String coordinator = nodeEngine.getNode().getThisUuid();
        Throwable result = new TopologyChangedException();
        logger.info("Completing Job " + idToString(jobId) + " with " + result + " since auto-restart is disabled and the job has been executed before");
        jobRepository.completeJob(jobId, coordinator, System.currentTimeMillis(), result);
        masterContext.setFinalResult(result);
        return masterContexts.remove(jobId, masterContext);
    }
    return false;
}
Also used : Util.idToString(com.hazelcast.jet.impl.util.Util.idToString) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException)

Example 3 with TopologyChangedException

use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast-jet by hazelcast.

the class JobExecutionService method verifyClusterInformation.

private void verifyClusterInformation(long jobId, long executionId, Address coordinator, int coordinatorMemberListVersion, Set<MemberInfo> participants) {
    Address masterAddress = nodeEngine.getMasterAddress();
    if (!coordinator.equals(masterAddress)) {
        failIfNotRunning();
        throw new IllegalStateException(String.format("Coordinator %s cannot initialize %s. Reason: it is not the master, the master is %s", coordinator, jobAndExecutionId(jobId, executionId), masterAddress));
    }
    ClusterServiceImpl clusterService = (ClusterServiceImpl) nodeEngine.getClusterService();
    MembershipManager membershipManager = clusterService.getMembershipManager();
    int localMemberListVersion = membershipManager.getMemberListVersion();
    Address thisAddress = nodeEngine.getThisAddress();
    if (coordinatorMemberListVersion > localMemberListVersion) {
        assert !masterAddress.equals(thisAddress) : String.format("Local node: %s is master but InitOperation has coordinator member list version: %s larger than " + " local member list version: %s", thisAddress, coordinatorMemberListVersion, localMemberListVersion);
        nodeEngine.getOperationService().send(new TriggerMemberListPublishOp(), masterAddress);
        throw new RetryableHazelcastException(String.format("Cannot initialize %s for coordinator %s, local member list version %s," + " coordinator member list version %s", jobAndExecutionId(jobId, executionId), coordinator, localMemberListVersion, coordinatorMemberListVersion));
    }
    boolean isLocalMemberParticipant = false;
    for (MemberInfo participant : participants) {
        if (participant.getAddress().equals(thisAddress)) {
            isLocalMemberParticipant = true;
        }
        if (membershipManager.getMember(participant.getAddress(), participant.getUuid()) == null) {
            throw new TopologyChangedException(String.format("Cannot initialize %s for coordinator %s: participant %s not found in local member list." + " Local member list version: %s, coordinator member list version: %s", jobAndExecutionId(jobId, executionId), coordinator, participant, localMemberListVersion, coordinatorMemberListVersion));
        }
    }
    if (!isLocalMemberParticipant) {
        throw new IllegalArgumentException(String.format("Cannot initialize %s since member %s is not in participants: %s", jobAndExecutionId(jobId, executionId), thisAddress, participants));
    }
}
Also used : Address(com.hazelcast.nio.Address) RetryableHazelcastException(com.hazelcast.spi.exception.RetryableHazelcastException) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) ClusterServiceImpl(com.hazelcast.internal.cluster.impl.ClusterServiceImpl) MembershipManager(com.hazelcast.internal.cluster.impl.MembershipManager) TriggerMemberListPublishOp(com.hazelcast.internal.cluster.impl.operations.TriggerMemberListPublishOp) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException)

Example 4 with TopologyChangedException

use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast-jet by hazelcast.

the class JobExecutionService method assertExecutionContext.

public ExecutionContext assertExecutionContext(Address coordinator, long jobId, long executionId, Operation callerOp) {
    Address masterAddress = nodeEngine.getMasterAddress();
    if (!coordinator.equals(masterAddress)) {
        failIfNotRunning();
        throw new IllegalStateException(String.format("Coordinator %s cannot do '%s' for %s: it is not the master, the master is %s", coordinator, callerOp.getClass().getSimpleName(), jobAndExecutionId(jobId, executionId), masterAddress));
    }
    failIfNotRunning();
    ExecutionContext executionContext = executionContexts.get(executionId);
    if (executionContext == null) {
        throw new TopologyChangedException(String.format("%s not found for coordinator %s for '%s'", jobAndExecutionId(jobId, executionId), coordinator, callerOp.getClass().getSimpleName()));
    } else if (!(executionContext.coordinator().equals(coordinator) && executionContext.jobId() == jobId)) {
        throw new IllegalStateException(String.format("%s, originally from coordinator %s, cannot do '%s' by coordinator %s and execution %s", jobAndExecutionId(jobId, executionContext.executionId()), executionContext.coordinator(), callerOp.getClass().getSimpleName(), coordinator, idToString(executionId)));
    }
    return executionContext;
}
Also used : ExecutionContext(com.hazelcast.jet.impl.execution.ExecutionContext) Address(com.hazelcast.nio.Address) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException)

Example 5 with TopologyChangedException

use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast by hazelcast.

the class JobCoordinationService method completeMasterContextIfJobAlreadyCompleted.

// If a job result is present, it completes the master context using the job result
private boolean completeMasterContextIfJobAlreadyCompleted(MasterContext masterContext) {
    long jobId = masterContext.jobId();
    JobResult jobResult = jobRepository.getJobResult(jobId);
    if (jobResult != null) {
        logger.fine("Completing master context for " + masterContext.jobIdString() + " since already completed with result: " + jobResult);
        masterContext.jobContext().setFinalResult(jobResult.getFailureAsThrowable());
        return masterContexts.remove(jobId, masterContext);
    }
    if (!masterContext.jobConfig().isAutoScaling() && masterContext.jobExecutionRecord().executed()) {
        logger.info("Suspending or failing " + masterContext.jobIdString() + " since auto-restart is disabled and the job has been executed before");
        masterContext.jobContext().finalizeJob(new TopologyChangedException());
        return true;
    }
    return false;
}
Also used : TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException)

Aggregations

TopologyChangedException (com.hazelcast.jet.core.TopologyChangedException)12 MemberInfo (com.hazelcast.internal.cluster.MemberInfo)7 Address (com.hazelcast.nio.Address)6 ClusterServiceImpl (com.hazelcast.internal.cluster.impl.ClusterServiceImpl)5 ILogger (com.hazelcast.logging.ILogger)5 Map (java.util.Map)5 Set (java.util.Set)5 Address (com.hazelcast.cluster.Address)4 ExecutionPlan (com.hazelcast.jet.impl.execution.init.ExecutionPlan)4 ExceptionUtil.withTryCatch (com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch)4 Util.idToString (com.hazelcast.jet.impl.util.Util.idToString)4 NodeEngineImpl (com.hazelcast.spi.impl.NodeEngineImpl)4 MembersView (com.hazelcast.internal.cluster.impl.MembersView)3 JobConfig (com.hazelcast.jet.config.JobConfig)3 ProcessingGuarantee (com.hazelcast.jet.config.ProcessingGuarantee)3 DAG (com.hazelcast.jet.core.DAG)3 Edge (com.hazelcast.jet.core.Edge)3 Edge.between (com.hazelcast.jet.core.Edge.between)3 JobStatus (com.hazelcast.jet.core.JobStatus)3 COMPLETED (com.hazelcast.jet.core.JobStatus.COMPLETED)3