Search in sources :

Example 31 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class CheckpointCoordinatorTest method testTriggerAndConfirmSimpleSavepoint.

@Test
public void testTriggerAndConfirmSimpleSavepoint() throws Exception {
    final JobID jid = new JobID();
    final long timestamp = System.currentTimeMillis();
    // create some mock Execution vertices that receive the checkpoint trigger messages
    final ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
    final ExecutionAttemptID attemptID2 = new ExecutionAttemptID();
    ExecutionVertex vertex1 = mockExecutionVertex(attemptID1);
    ExecutionVertex vertex2 = mockExecutionVertex(attemptID2);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    assertEquals(0, coord.getNumberOfPendingCheckpoints());
    assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
    // trigger the first checkpoint. this should succeed
    String savepointDir = tmpFolder.newFolder().getAbsolutePath();
    Future<CompletedCheckpoint> savepointFuture = coord.triggerSavepoint(timestamp, savepointDir);
    assertFalse(savepointFuture.isDone());
    // validate that we have a pending savepoint
    assertEquals(1, coord.getNumberOfPendingCheckpoints());
    long checkpointId = coord.getPendingCheckpoints().entrySet().iterator().next().getKey();
    PendingCheckpoint pending = coord.getPendingCheckpoints().get(checkpointId);
    assertNotNull(pending);
    assertEquals(checkpointId, pending.getCheckpointId());
    assertEquals(timestamp, pending.getCheckpointTimestamp());
    assertEquals(jid, pending.getJobId());
    assertEquals(2, pending.getNumberOfNonAcknowledgedTasks());
    assertEquals(0, pending.getNumberOfAcknowledgedTasks());
    assertEquals(0, pending.getTaskStates().size());
    assertFalse(pending.isDiscarded());
    assertFalse(pending.isFullyAcknowledged());
    assertFalse(pending.canBeSubsumed());
    assertTrue(pending instanceof PendingCheckpoint);
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
    // acknowledge from one of the tasks
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId));
    assertEquals(1, pending.getNumberOfAcknowledgedTasks());
    assertEquals(1, pending.getNumberOfNonAcknowledgedTasks());
    assertFalse(pending.isDiscarded());
    assertFalse(pending.isFullyAcknowledged());
    assertFalse(savepointFuture.isDone());
    // acknowledge the same task again (should not matter)
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId));
    assertFalse(pending.isDiscarded());
    assertFalse(pending.isFullyAcknowledged());
    assertFalse(savepointFuture.isDone());
    // acknowledge the other task.
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, checkpointId));
    // the checkpoint is internally converted to a successful checkpoint and the
    // pending checkpoint object is disposed
    assertTrue(pending.isDiscarded());
    assertTrue(savepointFuture.isDone());
    // the now we should have a completed checkpoint
    assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
    assertEquals(0, coord.getNumberOfPendingCheckpoints());
    // validate that the relevant tasks got a confirmation message
    {
        verify(vertex1.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointId), eq(timestamp));
        verify(vertex2.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointId), eq(timestamp));
    }
    CompletedCheckpoint success = coord.getSuccessfulCheckpoints().get(0);
    assertEquals(jid, success.getJobId());
    assertEquals(timestamp, success.getTimestamp());
    assertEquals(pending.getCheckpointId(), success.getCheckpointID());
    assertTrue(success.getTaskStates().isEmpty());
    // ---------------
    // trigger another checkpoint and see that this one replaces the other checkpoint
    // ---------------
    final long timestampNew = timestamp + 7;
    savepointFuture = coord.triggerSavepoint(timestampNew, savepointDir);
    assertFalse(savepointFuture.isDone());
    long checkpointIdNew = coord.getPendingCheckpoints().entrySet().iterator().next().getKey();
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, checkpointIdNew));
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointIdNew));
    assertEquals(0, coord.getNumberOfPendingCheckpoints());
    assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
    CompletedCheckpoint successNew = coord.getSuccessfulCheckpoints().get(0);
    assertEquals(jid, successNew.getJobId());
    assertEquals(timestampNew, successNew.getTimestamp());
    assertEquals(checkpointIdNew, successNew.getCheckpointID());
    assertTrue(successNew.getTaskStates().isEmpty());
    assertTrue(savepointFuture.isDone());
    // validate that the relevant tasks got a confirmation message
    {
        verify(vertex1.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointIdNew), eq(timestampNew), any(CheckpointOptions.class));
        verify(vertex2.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointIdNew), eq(timestampNew), any(CheckpointOptions.class));
        verify(vertex1.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointIdNew), eq(timestampNew));
        verify(vertex2.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointIdNew), eq(timestampNew));
    }
    coord.shutdown(JobStatus.FINISHED);
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 32 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class CheckpointCoordinatorTest method testStopPeriodicScheduler.

@Test
public void testStopPeriodicScheduler() throws Exception {
    // create some mock Execution vertices that receive the checkpoint trigger messages
    final ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
    ExecutionVertex vertex1 = mockExecutionVertex(attemptID1);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(new JobID(), 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    // Periodic
    CheckpointTriggerResult triggerResult = coord.triggerCheckpoint(System.currentTimeMillis(), CheckpointProperties.forStandardCheckpoint(), null, true);
    assertTrue(triggerResult.isFailure());
    assertEquals(CheckpointDeclineReason.PERIODIC_SCHEDULER_SHUTDOWN, triggerResult.getFailureReason());
    // Not periodic
    triggerResult = coord.triggerCheckpoint(System.currentTimeMillis(), CheckpointProperties.forStandardCheckpoint(), null, false);
    assertFalse(triggerResult.isFailure());
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 33 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class CheckpointCoordinatorTest method testPeriodicTriggering.

@Test
public void testPeriodicTriggering() {
    try {
        final JobID jid = new JobID();
        final long start = System.currentTimeMillis();
        // create some mock execution vertices and trigger some checkpoint
        final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID = new ExecutionAttemptID();
        final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID();
        ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID);
        ExecutionVertex ackVertex = mockExecutionVertex(ackAttemptID);
        ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID);
        final AtomicInteger numCalls = new AtomicInteger();
        final Execution execution = triggerVertex.getCurrentExecutionAttempt();
        doAnswer(new Answer<Void>() {

            private long lastId = -1;

            private long lastTs = -1;

            @Override
            public Void answer(InvocationOnMock invocation) throws Throwable {
                long id = (Long) invocation.getArguments()[0];
                long ts = (Long) invocation.getArguments()[1];
                assertTrue(id > lastId);
                assertTrue(ts >= lastTs);
                assertTrue(ts >= start);
                lastId = id;
                lastTs = ts;
                numCalls.incrementAndGet();
                return null;
            }
        }).when(execution).triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class));
        CheckpointCoordinator coord = new CheckpointCoordinator(jid, // periodic interval is 10 ms
        10, // timeout is very long (200 s)
        200000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
        coord.startCheckpointScheduler();
        long timeout = System.currentTimeMillis() + 60000;
        do {
            Thread.sleep(20);
        } while (timeout > System.currentTimeMillis() && numCalls.get() < 5);
        assertTrue(numCalls.get() >= 5);
        coord.stopCheckpointScheduler();
        // for 400 ms, no further calls may come.
        // there may be the case that one trigger was fired and about to
        // acquire the lock, such that after cancelling it will still do
        // the remainder of its work
        int numCallsSoFar = numCalls.get();
        Thread.sleep(400);
        assertTrue(numCallsSoFar == numCalls.get() || numCallsSoFar + 1 == numCalls.get());
        // start another sequence of periodic scheduling
        numCalls.set(0);
        coord.startCheckpointScheduler();
        timeout = System.currentTimeMillis() + 60000;
        do {
            Thread.sleep(20);
        } while (timeout > System.currentTimeMillis() && numCalls.get() < 5);
        assertTrue(numCalls.get() >= 5);
        coord.stopCheckpointScheduler();
        // for 400 ms, no further calls may come
        // there may be the case that one trigger was fired and about to
        // acquire the lock, such that after cancelling it will still do
        // the remainder of its work
        numCallsSoFar = numCalls.get();
        Thread.sleep(400);
        assertTrue(numCallsSoFar == numCalls.get() || numCallsSoFar + 1 == numCalls.get());
        coord.shutdown(JobStatus.FINISHED);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) IOException(java.io.IOException) Execution(org.apache.flink.runtime.executiongraph.Execution) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) InvocationOnMock(org.mockito.invocation.InvocationOnMock) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 34 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class CheckpointCoordinatorTest method testMaxConcurrentAttempts.

private void testMaxConcurrentAttempts(int maxConcurrentAttempts) {
    try {
        final JobID jid = new JobID();
        // create some mock execution vertices and trigger some checkpoint
        final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID = new ExecutionAttemptID();
        final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID();
        ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID);
        ExecutionVertex ackVertex = mockExecutionVertex(ackAttemptID);
        ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID);
        final AtomicInteger numCalls = new AtomicInteger();
        final Execution execution = triggerVertex.getCurrentExecutionAttempt();
        doAnswer(new Answer<Void>() {

            @Override
            public Void answer(InvocationOnMock invocation) throws Throwable {
                numCalls.incrementAndGet();
                return null;
            }
        }).when(execution).triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class));
        doAnswer(new Answer<Void>() {

            @Override
            public Void answer(InvocationOnMock invocation) throws Throwable {
                numCalls.incrementAndGet();
                return null;
            }
        }).when(execution).notifyCheckpointComplete(anyLong(), anyLong());
        CheckpointCoordinator coord = new CheckpointCoordinator(jid, // periodic interval is 10 ms
        10, // timeout is very long (200 s)
        200000, // no extra delay
        0L, maxConcurrentAttempts, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
        coord.startCheckpointScheduler();
        // after a while, there should be exactly as many checkpoints
        // as concurrently permitted
        long now = System.currentTimeMillis();
        long timeout = now + 60000;
        long minDuration = now + 100;
        do {
            Thread.sleep(20);
        } while ((now = System.currentTimeMillis()) < minDuration || (numCalls.get() < maxConcurrentAttempts && now < timeout));
        assertEquals(maxConcurrentAttempts, numCalls.get());
        verify(triggerVertex.getCurrentExecutionAttempt(), times(maxConcurrentAttempts)).triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class));
        // now, once we acknowledge one checkpoint, it should trigger the next one
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID, 1L));
        // this should have immediately triggered a new checkpoint
        now = System.currentTimeMillis();
        timeout = now + 60000;
        do {
            Thread.sleep(20);
        } while (numCalls.get() < maxConcurrentAttempts + 1 && now < timeout);
        assertEquals(maxConcurrentAttempts + 1, numCalls.get());
        // no further checkpoints should happen
        Thread.sleep(200);
        assertEquals(maxConcurrentAttempts + 1, numCalls.get());
        coord.shutdown(JobStatus.FINISHED);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) IOException(java.io.IOException) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) Execution(org.apache.flink.runtime.executiongraph.Execution) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) InvocationOnMock(org.mockito.invocation.InvocationOnMock) JobID(org.apache.flink.api.common.JobID)

Example 35 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class BlobClientSslTest method testRegularStream.

/**
	 * Tests the PUT/GET operations for regular (non-content-addressable) streams.
	 */
@Test
public void testRegularStream() {
    final JobID jobID = JobID.generate();
    final String key = "testkey3";
    try {
        final File testFile = File.createTempFile("testfile", ".dat");
        testFile.deleteOnExit();
        prepareTestFile(testFile);
        BlobClient client = null;
        InputStream is = null;
        try {
            final InetSocketAddress serverAddress = new InetSocketAddress("localhost", BLOB_SSL_SERVER.getPort());
            client = new BlobClient(serverAddress, sslClientConfig);
            // Store the data
            is = new FileInputStream(testFile);
            client.put(jobID, key, is);
            is.close();
            is = null;
            // Retrieve the data
            is = client.get(jobID, key);
            validateGet(is, testFile);
        } finally {
            if (is != null) {
                is.close();
            }
            if (client != null) {
                client.close();
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) InetSocketAddress(java.net.InetSocketAddress) File(java.io.File) JobID(org.apache.flink.api.common.JobID) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

JobID (org.apache.flink.api.common.JobID)335 Test (org.junit.Test)274 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)88 IOException (java.io.IOException)74 Configuration (org.apache.flink.configuration.Configuration)72 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)61 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)48 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)47 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)44 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)42 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)38 ArrayList (java.util.ArrayList)37 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)32 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)31 HashMap (java.util.HashMap)29 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)29 FiniteDuration (scala.concurrent.duration.FiniteDuration)28 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)24 File (java.io.File)23 UUID (java.util.UUID)23