use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast-jet by hazelcast.
the class MasterContext method getExecuteResult.
/**
* <ul>
* <li>Returns null if there is no failure.
* <li>Returns CancellationException if the job is cancelled.
* <li>Returns JobRestartRequestedException if the current execution is cancelled
* <li>If there is at least one non-restartable failure, such as an exception in user code, then returns that failure.
* <li>Otherwise, the failure is because a job participant has left the cluster.
* In that case, {@code TopologyChangeException} is returned so that the job will be restarted.
* </ul>
*/
private Throwable getExecuteResult(Map<MemberInfo, Object> responses, boolean isRestartRequested) {
if (cancellationToken.isCompleted()) {
logger.fine(jobIdString() + " to be cancelled after execute");
return new CancellationException();
} else if (isRestartRequested) {
return new JobRestartRequestedException();
}
Map<Boolean, List<Entry<MemberInfo, Object>>> grouped = groupResponses(responses);
Collection<MemberInfo> successfulMembers = grouped.get(false).stream().map(Entry::getKey).collect(toList());
if (successfulMembers.size() == executionPlanMap.size()) {
logger.fine("Execute of " + jobIdString() + " is successful.");
return null;
}
List<Entry<MemberInfo, Object>> failures = grouped.get(true);
logger.fine("Execute of " + jobIdString() + " has failures: " + failures);
// In that case, all remaining participants return a CancellationException.
return failures.stream().map(e -> (Throwable) e.getValue()).filter(t -> !(t instanceof CancellationException || isTopologicalFailure(t))).findFirst().map(ExceptionUtil::peel).orElse(new TopologyChangedException());
}
use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast-jet by hazelcast.
the class JobCoordinationService method completeMasterContextIfJobAlreadyCompleted.
// If a job result is present, it completes the master context using the job result
private boolean completeMasterContextIfJobAlreadyCompleted(MasterContext masterContext) {
long jobId = masterContext.getJobId();
JobResult jobResult = jobRepository.getJobResult(jobId);
if (jobResult != null) {
logger.fine("Completing master context " + idToString(jobId) + " since already completed with result: " + jobResult);
masterContext.setFinalResult(jobResult.getFailure());
return masterContexts.remove(jobId, masterContext);
}
if (!masterContext.getJobConfig().isAutoRestartOnMemberFailureEnabled() && jobRepository.getExecutionIdCount(jobId) > 0) {
String coordinator = nodeEngine.getNode().getThisUuid();
Throwable result = new TopologyChangedException();
logger.info("Completing Job " + idToString(jobId) + " with " + result + " since auto-restart is disabled and the job has been executed before");
jobRepository.completeJob(jobId, coordinator, System.currentTimeMillis(), result);
masterContext.setFinalResult(result);
return masterContexts.remove(jobId, masterContext);
}
return false;
}
use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast-jet by hazelcast.
the class JobExecutionService method verifyClusterInformation.
private void verifyClusterInformation(long jobId, long executionId, Address coordinator, int coordinatorMemberListVersion, Set<MemberInfo> participants) {
Address masterAddress = nodeEngine.getMasterAddress();
if (!coordinator.equals(masterAddress)) {
failIfNotRunning();
throw new IllegalStateException(String.format("Coordinator %s cannot initialize %s. Reason: it is not the master, the master is %s", coordinator, jobAndExecutionId(jobId, executionId), masterAddress));
}
ClusterServiceImpl clusterService = (ClusterServiceImpl) nodeEngine.getClusterService();
MembershipManager membershipManager = clusterService.getMembershipManager();
int localMemberListVersion = membershipManager.getMemberListVersion();
Address thisAddress = nodeEngine.getThisAddress();
if (coordinatorMemberListVersion > localMemberListVersion) {
assert !masterAddress.equals(thisAddress) : String.format("Local node: %s is master but InitOperation has coordinator member list version: %s larger than " + " local member list version: %s", thisAddress, coordinatorMemberListVersion, localMemberListVersion);
nodeEngine.getOperationService().send(new TriggerMemberListPublishOp(), masterAddress);
throw new RetryableHazelcastException(String.format("Cannot initialize %s for coordinator %s, local member list version %s," + " coordinator member list version %s", jobAndExecutionId(jobId, executionId), coordinator, localMemberListVersion, coordinatorMemberListVersion));
}
boolean isLocalMemberParticipant = false;
for (MemberInfo participant : participants) {
if (participant.getAddress().equals(thisAddress)) {
isLocalMemberParticipant = true;
}
if (membershipManager.getMember(participant.getAddress(), participant.getUuid()) == null) {
throw new TopologyChangedException(String.format("Cannot initialize %s for coordinator %s: participant %s not found in local member list." + " Local member list version: %s, coordinator member list version: %s", jobAndExecutionId(jobId, executionId), coordinator, participant, localMemberListVersion, coordinatorMemberListVersion));
}
}
if (!isLocalMemberParticipant) {
throw new IllegalArgumentException(String.format("Cannot initialize %s since member %s is not in participants: %s", jobAndExecutionId(jobId, executionId), thisAddress, participants));
}
}
use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast-jet by hazelcast.
the class JobExecutionService method assertExecutionContext.
public ExecutionContext assertExecutionContext(Address coordinator, long jobId, long executionId, Operation callerOp) {
Address masterAddress = nodeEngine.getMasterAddress();
if (!coordinator.equals(masterAddress)) {
failIfNotRunning();
throw new IllegalStateException(String.format("Coordinator %s cannot do '%s' for %s: it is not the master, the master is %s", coordinator, callerOp.getClass().getSimpleName(), jobAndExecutionId(jobId, executionId), masterAddress));
}
failIfNotRunning();
ExecutionContext executionContext = executionContexts.get(executionId);
if (executionContext == null) {
throw new TopologyChangedException(String.format("%s not found for coordinator %s for '%s'", jobAndExecutionId(jobId, executionId), coordinator, callerOp.getClass().getSimpleName()));
} else if (!(executionContext.coordinator().equals(coordinator) && executionContext.jobId() == jobId)) {
throw new IllegalStateException(String.format("%s, originally from coordinator %s, cannot do '%s' by coordinator %s and execution %s", jobAndExecutionId(jobId, executionContext.executionId()), executionContext.coordinator(), callerOp.getClass().getSimpleName(), coordinator, idToString(executionId)));
}
return executionContext;
}
use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast by hazelcast.
the class JobCoordinationService method completeMasterContextIfJobAlreadyCompleted.
// If a job result is present, it completes the master context using the job result
private boolean completeMasterContextIfJobAlreadyCompleted(MasterContext masterContext) {
long jobId = masterContext.jobId();
JobResult jobResult = jobRepository.getJobResult(jobId);
if (jobResult != null) {
logger.fine("Completing master context for " + masterContext.jobIdString() + " since already completed with result: " + jobResult);
masterContext.jobContext().setFinalResult(jobResult.getFailureAsThrowable());
return masterContexts.remove(jobId, masterContext);
}
if (!masterContext.jobConfig().isAutoScaling() && masterContext.jobExecutionRecord().executed()) {
logger.info("Suspending or failing " + masterContext.jobIdString() + " since auto-restart is disabled and the job has been executed before");
masterContext.jobContext().finalizeJob(new TopologyChangedException());
return true;
}
return false;
}
Aggregations