use of org.apache.flink.api.common.JobID in project flink by apache.
the class CheckpointCoordinatorTest method testTriggerAndConfirmSimpleSavepoint.
@Test
public void testTriggerAndConfirmSimpleSavepoint() throws Exception {
final JobID jid = new JobID();
final long timestamp = System.currentTimeMillis();
// create some mock Execution vertices that receive the checkpoint trigger messages
final ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
final ExecutionAttemptID attemptID2 = new ExecutionAttemptID();
ExecutionVertex vertex1 = mockExecutionVertex(attemptID1);
ExecutionVertex vertex2 = mockExecutionVertex(attemptID2);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
assertEquals(0, coord.getNumberOfPendingCheckpoints());
assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
// trigger the first checkpoint. this should succeed
String savepointDir = tmpFolder.newFolder().getAbsolutePath();
Future<CompletedCheckpoint> savepointFuture = coord.triggerSavepoint(timestamp, savepointDir);
assertFalse(savepointFuture.isDone());
// validate that we have a pending savepoint
assertEquals(1, coord.getNumberOfPendingCheckpoints());
long checkpointId = coord.getPendingCheckpoints().entrySet().iterator().next().getKey();
PendingCheckpoint pending = coord.getPendingCheckpoints().get(checkpointId);
assertNotNull(pending);
assertEquals(checkpointId, pending.getCheckpointId());
assertEquals(timestamp, pending.getCheckpointTimestamp());
assertEquals(jid, pending.getJobId());
assertEquals(2, pending.getNumberOfNonAcknowledgedTasks());
assertEquals(0, pending.getNumberOfAcknowledgedTasks());
assertEquals(0, pending.getTaskStates().size());
assertFalse(pending.isDiscarded());
assertFalse(pending.isFullyAcknowledged());
assertFalse(pending.canBeSubsumed());
assertTrue(pending instanceof PendingCheckpoint);
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
// acknowledge from one of the tasks
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId));
assertEquals(1, pending.getNumberOfAcknowledgedTasks());
assertEquals(1, pending.getNumberOfNonAcknowledgedTasks());
assertFalse(pending.isDiscarded());
assertFalse(pending.isFullyAcknowledged());
assertFalse(savepointFuture.isDone());
// acknowledge the same task again (should not matter)
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId));
assertFalse(pending.isDiscarded());
assertFalse(pending.isFullyAcknowledged());
assertFalse(savepointFuture.isDone());
// acknowledge the other task.
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, checkpointId));
// the checkpoint is internally converted to a successful checkpoint and the
// pending checkpoint object is disposed
assertTrue(pending.isDiscarded());
assertTrue(savepointFuture.isDone());
// the now we should have a completed checkpoint
assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, coord.getNumberOfPendingCheckpoints());
// validate that the relevant tasks got a confirmation message
{
verify(vertex1.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointId), eq(timestamp));
verify(vertex2.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointId), eq(timestamp));
}
CompletedCheckpoint success = coord.getSuccessfulCheckpoints().get(0);
assertEquals(jid, success.getJobId());
assertEquals(timestamp, success.getTimestamp());
assertEquals(pending.getCheckpointId(), success.getCheckpointID());
assertTrue(success.getTaskStates().isEmpty());
// ---------------
// trigger another checkpoint and see that this one replaces the other checkpoint
// ---------------
final long timestampNew = timestamp + 7;
savepointFuture = coord.triggerSavepoint(timestampNew, savepointDir);
assertFalse(savepointFuture.isDone());
long checkpointIdNew = coord.getPendingCheckpoints().entrySet().iterator().next().getKey();
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, checkpointIdNew));
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointIdNew));
assertEquals(0, coord.getNumberOfPendingCheckpoints());
assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
CompletedCheckpoint successNew = coord.getSuccessfulCheckpoints().get(0);
assertEquals(jid, successNew.getJobId());
assertEquals(timestampNew, successNew.getTimestamp());
assertEquals(checkpointIdNew, successNew.getCheckpointID());
assertTrue(successNew.getTaskStates().isEmpty());
assertTrue(savepointFuture.isDone());
// validate that the relevant tasks got a confirmation message
{
verify(vertex1.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointIdNew), eq(timestampNew), any(CheckpointOptions.class));
verify(vertex2.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointIdNew), eq(timestampNew), any(CheckpointOptions.class));
verify(vertex1.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointIdNew), eq(timestampNew));
verify(vertex2.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointIdNew), eq(timestampNew));
}
coord.shutdown(JobStatus.FINISHED);
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class CheckpointCoordinatorTest method testStopPeriodicScheduler.
@Test
public void testStopPeriodicScheduler() throws Exception {
// create some mock Execution vertices that receive the checkpoint trigger messages
final ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
ExecutionVertex vertex1 = mockExecutionVertex(attemptID1);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(new JobID(), 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
// Periodic
CheckpointTriggerResult triggerResult = coord.triggerCheckpoint(System.currentTimeMillis(), CheckpointProperties.forStandardCheckpoint(), null, true);
assertTrue(triggerResult.isFailure());
assertEquals(CheckpointDeclineReason.PERIODIC_SCHEDULER_SHUTDOWN, triggerResult.getFailureReason());
// Not periodic
triggerResult = coord.triggerCheckpoint(System.currentTimeMillis(), CheckpointProperties.forStandardCheckpoint(), null, false);
assertFalse(triggerResult.isFailure());
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class CheckpointCoordinatorTest method testPeriodicTriggering.
@Test
public void testPeriodicTriggering() {
try {
final JobID jid = new JobID();
final long start = System.currentTimeMillis();
// create some mock execution vertices and trigger some checkpoint
final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID();
final ExecutionAttemptID ackAttemptID = new ExecutionAttemptID();
final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID();
ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID);
ExecutionVertex ackVertex = mockExecutionVertex(ackAttemptID);
ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID);
final AtomicInteger numCalls = new AtomicInteger();
final Execution execution = triggerVertex.getCurrentExecutionAttempt();
doAnswer(new Answer<Void>() {
private long lastId = -1;
private long lastTs = -1;
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
long id = (Long) invocation.getArguments()[0];
long ts = (Long) invocation.getArguments()[1];
assertTrue(id > lastId);
assertTrue(ts >= lastTs);
assertTrue(ts >= start);
lastId = id;
lastTs = ts;
numCalls.incrementAndGet();
return null;
}
}).when(execution).triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class));
CheckpointCoordinator coord = new CheckpointCoordinator(jid, // periodic interval is 10 ms
10, // timeout is very long (200 s)
200000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
coord.startCheckpointScheduler();
long timeout = System.currentTimeMillis() + 60000;
do {
Thread.sleep(20);
} while (timeout > System.currentTimeMillis() && numCalls.get() < 5);
assertTrue(numCalls.get() >= 5);
coord.stopCheckpointScheduler();
// for 400 ms, no further calls may come.
// there may be the case that one trigger was fired and about to
// acquire the lock, such that after cancelling it will still do
// the remainder of its work
int numCallsSoFar = numCalls.get();
Thread.sleep(400);
assertTrue(numCallsSoFar == numCalls.get() || numCallsSoFar + 1 == numCalls.get());
// start another sequence of periodic scheduling
numCalls.set(0);
coord.startCheckpointScheduler();
timeout = System.currentTimeMillis() + 60000;
do {
Thread.sleep(20);
} while (timeout > System.currentTimeMillis() && numCalls.get() < 5);
assertTrue(numCalls.get() >= 5);
coord.stopCheckpointScheduler();
// for 400 ms, no further calls may come
// there may be the case that one trigger was fired and about to
// acquire the lock, such that after cancelling it will still do
// the remainder of its work
numCallsSoFar = numCalls.get();
Thread.sleep(400);
assertTrue(numCallsSoFar == numCalls.get() || numCallsSoFar + 1 == numCalls.get());
coord.shutdown(JobStatus.FINISHED);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class CheckpointCoordinatorTest method testMaxConcurrentAttempts.
private void testMaxConcurrentAttempts(int maxConcurrentAttempts) {
try {
final JobID jid = new JobID();
// create some mock execution vertices and trigger some checkpoint
final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID();
final ExecutionAttemptID ackAttemptID = new ExecutionAttemptID();
final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID();
ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID);
ExecutionVertex ackVertex = mockExecutionVertex(ackAttemptID);
ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID);
final AtomicInteger numCalls = new AtomicInteger();
final Execution execution = triggerVertex.getCurrentExecutionAttempt();
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
numCalls.incrementAndGet();
return null;
}
}).when(execution).triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class));
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
numCalls.incrementAndGet();
return null;
}
}).when(execution).notifyCheckpointComplete(anyLong(), anyLong());
CheckpointCoordinator coord = new CheckpointCoordinator(jid, // periodic interval is 10 ms
10, // timeout is very long (200 s)
200000, // no extra delay
0L, maxConcurrentAttempts, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
coord.startCheckpointScheduler();
// after a while, there should be exactly as many checkpoints
// as concurrently permitted
long now = System.currentTimeMillis();
long timeout = now + 60000;
long minDuration = now + 100;
do {
Thread.sleep(20);
} while ((now = System.currentTimeMillis()) < minDuration || (numCalls.get() < maxConcurrentAttempts && now < timeout));
assertEquals(maxConcurrentAttempts, numCalls.get());
verify(triggerVertex.getCurrentExecutionAttempt(), times(maxConcurrentAttempts)).triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class));
// now, once we acknowledge one checkpoint, it should trigger the next one
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID, 1L));
// this should have immediately triggered a new checkpoint
now = System.currentTimeMillis();
timeout = now + 60000;
do {
Thread.sleep(20);
} while (numCalls.get() < maxConcurrentAttempts + 1 && now < timeout);
assertEquals(maxConcurrentAttempts + 1, numCalls.get());
// no further checkpoints should happen
Thread.sleep(200);
assertEquals(maxConcurrentAttempts + 1, numCalls.get());
coord.shutdown(JobStatus.FINISHED);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class BlobClientSslTest method testRegularStream.
/**
* Tests the PUT/GET operations for regular (non-content-addressable) streams.
*/
@Test
public void testRegularStream() {
final JobID jobID = JobID.generate();
final String key = "testkey3";
try {
final File testFile = File.createTempFile("testfile", ".dat");
testFile.deleteOnExit();
prepareTestFile(testFile);
BlobClient client = null;
InputStream is = null;
try {
final InetSocketAddress serverAddress = new InetSocketAddress("localhost", BLOB_SSL_SERVER.getPort());
client = new BlobClient(serverAddress, sslClientConfig);
// Store the data
is = new FileInputStream(testFile);
client.put(jobID, key, is);
is.close();
is = null;
// Retrieve the data
is = client.get(jobID, key);
validateGet(is, testFile);
} finally {
if (is != null) {
is.close();
}
if (client != null) {
client.close();
}
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations