use of com.hazelcast.jet.core.JobStatus.RESTARTING in project hazelcast-jet by hazelcast.
the class MasterContext method invokeCompleteExecution.
private void invokeCompleteExecution(Throwable error) {
JobStatus status = jobStatus();
Throwable finalError;
if (status == STARTING || status == RESTARTING || status == RUNNING) {
logger.fine("Completing " + jobIdString());
finalError = error;
} else {
if (error != null) {
logger.severe("Cannot properly complete failed " + jobIdString() + ": status is " + status, error);
} else {
logger.severe("Cannot properly complete " + jobIdString() + ": status is " + status);
}
finalError = new IllegalStateException("Job coordination failed.");
}
Function<ExecutionPlan, Operation> operationCtor = plan -> new CompleteExecutionOperation(executionId, finalError);
invoke(operationCtor, responses -> finalizeJob(error), null);
}
use of com.hazelcast.jet.core.JobStatus.RESTARTING in project hazelcast-jet by hazelcast.
the class SplitBrainTest method when_quorumIsLostOnBothSides_then_jobRestartsUntilMerge.
@Test
public void when_quorumIsLostOnBothSides_then_jobRestartsUntilMerge() {
int firstSubClusterSize = 2;
int secondSubClusterSize = 2;
int clusterSize = firstSubClusterSize + secondSubClusterSize;
StuckProcessor.executionStarted = new CountDownLatch(clusterSize * PARALLELISM);
Job[] jobRef = new Job[1];
Consumer<JetInstance[]> beforeSplit = instances -> {
MockPS processorSupplier = new MockPS(StuckProcessor::new, clusterSize);
DAG dag = new DAG().vertex(new Vertex("test", processorSupplier));
jobRef[0] = instances[0].newJob(dag, new JobConfig().setSplitBrainProtection(true));
assertOpenEventually(StuckProcessor.executionStarted);
};
BiConsumer<JetInstance[], JetInstance[]> onSplit = (firstSubCluster, secondSubCluster) -> {
StuckProcessor.proceedLatch.countDown();
long jobId = jobRef[0].getId();
assertTrueEventually(() -> {
JetService service1 = getJetService(firstSubCluster[0]);
JetService service2 = getJetService(secondSubCluster[0]);
assertEquals(RESTARTING, service1.getJobCoordinationService().getJobStatus(jobId));
assertEquals(STARTING, service2.getJobCoordinationService().getJobStatus(jobId));
});
assertTrueAllTheTime(() -> {
JetService service1 = getJetService(firstSubCluster[0]);
JetService service2 = getJetService(secondSubCluster[0]);
assertEquals(RESTARTING, service1.getJobCoordinationService().getJobStatus(jobId));
assertEquals(STARTING, service2.getJobCoordinationService().getJobStatus(jobId));
}, 20);
};
Consumer<JetInstance[]> afterMerge = instances -> {
assertTrueEventually(() -> {
assertEquals(clusterSize * 2, MockPS.initCount.get());
assertEquals(clusterSize * 2, MockPS.closeCount.get());
});
assertEquals(clusterSize, MockPS.receivedCloseErrors.size());
MockPS.receivedCloseErrors.forEach(t -> assertTrue(t instanceof TopologyChangedException));
};
testSplitBrain(firstSubClusterSize, secondSubClusterSize, beforeSplit, onSplit, afterMerge);
}
Aggregations