Search in sources :

Example 56 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class JobMasterTest method testCloseUnestablishedResourceManagerConnection.

/**
 * Tests that we can close an unestablished ResourceManager connection.
 */
@Test
public void testCloseUnestablishedResourceManagerConnection() throws Exception {
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withConfiguration(configuration).withHighAvailabilityServices(haServices).createJobMaster();
    try {
        jobMaster.start();
        final TestingResourceManagerGateway firstResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
        final TestingResourceManagerGateway secondResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
        final OneShotLatch firstJobManagerRegistration = new OneShotLatch();
        final OneShotLatch secondJobManagerRegistration = new OneShotLatch();
        firstResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
            firstJobManagerRegistration.trigger();
            return CompletableFuture.completedFuture(firstResourceManagerGateway.getJobMasterRegistrationSuccess());
        });
        secondResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
            secondJobManagerRegistration.trigger();
            return CompletableFuture.completedFuture(secondResourceManagerGateway.getJobMasterRegistrationSuccess());
        });
        notifyResourceManagerLeaderListeners(firstResourceManagerGateway);
        // wait until we have seen the first registration attempt
        firstJobManagerRegistration.await();
        // this should stop the connection attempts towards the first RM
        notifyResourceManagerLeaderListeners(secondResourceManagerGateway);
        // check that we start registering at the second RM
        secondJobManagerRegistration.await();
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) Test(org.junit.Test)

Example 57 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class OperatorStateBackendTest method testSnapshotRestoreAsync.

@Test
public void testSnapshotRestoreAsync() throws Exception {
    OperatorStateBackend operatorStateBackend = new DefaultOperatorStateBackendBuilder(OperatorStateBackendTest.class.getClassLoader(), new ExecutionConfig(), true, emptyStateHandles, new CloseableRegistry()).build();
    ListStateDescriptor<MutableType> stateDescriptor1 = new ListStateDescriptor<>("test1", new JavaSerializer<MutableType>());
    ListStateDescriptor<MutableType> stateDescriptor2 = new ListStateDescriptor<>("test2", new JavaSerializer<MutableType>());
    ListStateDescriptor<MutableType> stateDescriptor3 = new ListStateDescriptor<>("test3", new JavaSerializer<MutableType>());
    MapStateDescriptor<MutableType, MutableType> broadcastStateDescriptor1 = new MapStateDescriptor<>("test4", new JavaSerializer<MutableType>(), new JavaSerializer<MutableType>());
    MapStateDescriptor<MutableType, MutableType> broadcastStateDescriptor2 = new MapStateDescriptor<>("test5", new JavaSerializer<MutableType>(), new JavaSerializer<MutableType>());
    MapStateDescriptor<MutableType, MutableType> broadcastStateDescriptor3 = new MapStateDescriptor<>("test6", new JavaSerializer<MutableType>(), new JavaSerializer<MutableType>());
    ListState<MutableType> listState1 = operatorStateBackend.getListState(stateDescriptor1);
    ListState<MutableType> listState2 = operatorStateBackend.getListState(stateDescriptor2);
    ListState<MutableType> listState3 = operatorStateBackend.getUnionListState(stateDescriptor3);
    BroadcastState<MutableType, MutableType> broadcastState1 = operatorStateBackend.getBroadcastState(broadcastStateDescriptor1);
    BroadcastState<MutableType, MutableType> broadcastState2 = operatorStateBackend.getBroadcastState(broadcastStateDescriptor2);
    BroadcastState<MutableType, MutableType> broadcastState3 = operatorStateBackend.getBroadcastState(broadcastStateDescriptor3);
    listState1.add(MutableType.of(42));
    listState1.add(MutableType.of(4711));
    listState2.add(MutableType.of(7));
    listState2.add(MutableType.of(13));
    listState2.add(MutableType.of(23));
    listState3.add(MutableType.of(17));
    listState3.add(MutableType.of(18));
    listState3.add(MutableType.of(19));
    listState3.add(MutableType.of(20));
    broadcastState1.put(MutableType.of(1), MutableType.of(2));
    broadcastState1.put(MutableType.of(2), MutableType.of(5));
    broadcastState2.put(MutableType.of(2), MutableType.of(5));
    BlockerCheckpointStreamFactory streamFactory = new BlockerCheckpointStreamFactory(1024 * 1024);
    OneShotLatch waiterLatch = new OneShotLatch();
    OneShotLatch blockerLatch = new OneShotLatch();
    streamFactory.setWaiterLatch(waiterLatch);
    streamFactory.setBlockerLatch(blockerLatch);
    RunnableFuture<SnapshotResult<OperatorStateHandle>> runnableFuture = operatorStateBackend.snapshot(1, 1, streamFactory, CheckpointOptions.forCheckpointWithDefaultLocation());
    ExecutorService executorService = Executors.newFixedThreadPool(1);
    executorService.submit(runnableFuture);
    // wait until the async checkpoint is in the write code, then continue
    waiterLatch.await();
    // do some mutations to the state, to test if our snapshot will NOT reflect them
    listState1.add(MutableType.of(77));
    broadcastState1.put(MutableType.of(32), MutableType.of(97));
    int n = 0;
    for (MutableType mutableType : listState2.get()) {
        if (++n == 2) {
            // allow the write code to continue, so that we could do changes while state is
            // written in parallel.
            blockerLatch.trigger();
        }
        mutableType.setValue(mutableType.getValue() + 10);
    }
    listState3.clear();
    broadcastState2.clear();
    operatorStateBackend.getListState(new ListStateDescriptor<>("test4", new JavaSerializer<MutableType>()));
    // run the snapshot
    SnapshotResult<OperatorStateHandle> snapshotResult = runnableFuture.get();
    OperatorStateHandle stateHandle = snapshotResult.getJobManagerOwnedSnapshot();
    try {
        operatorStateBackend.close();
        operatorStateBackend.dispose();
        AbstractStateBackend abstractStateBackend = new MemoryStateBackend(4096);
        CloseableRegistry cancelStreamRegistry = new CloseableRegistry();
        operatorStateBackend = abstractStateBackend.createOperatorStateBackend(createMockEnvironment(), "testOperator", StateObjectCollection.singleton(stateHandle), cancelStreamRegistry);
        assertEquals(3, operatorStateBackend.getRegisteredStateNames().size());
        assertEquals(3, operatorStateBackend.getRegisteredBroadcastStateNames().size());
        listState1 = operatorStateBackend.getListState(stateDescriptor1);
        listState2 = operatorStateBackend.getListState(stateDescriptor2);
        listState3 = operatorStateBackend.getUnionListState(stateDescriptor3);
        broadcastState1 = operatorStateBackend.getBroadcastState(broadcastStateDescriptor1);
        broadcastState2 = operatorStateBackend.getBroadcastState(broadcastStateDescriptor2);
        broadcastState3 = operatorStateBackend.getBroadcastState(broadcastStateDescriptor3);
        assertEquals(3, operatorStateBackend.getRegisteredStateNames().size());
        assertEquals(3, operatorStateBackend.getRegisteredBroadcastStateNames().size());
        Iterator<MutableType> it = listState1.get().iterator();
        assertEquals(42, it.next().value);
        assertEquals(4711, it.next().value);
        assertFalse(it.hasNext());
        it = listState2.get().iterator();
        assertEquals(7, it.next().value);
        assertEquals(13, it.next().value);
        assertEquals(23, it.next().value);
        assertFalse(it.hasNext());
        it = listState3.get().iterator();
        assertEquals(17, it.next().value);
        assertEquals(18, it.next().value);
        assertEquals(19, it.next().value);
        assertEquals(20, it.next().value);
        assertFalse(it.hasNext());
        Iterator<Map.Entry<MutableType, MutableType>> bIt = broadcastState1.iterator();
        assertTrue(bIt.hasNext());
        Map.Entry<MutableType, MutableType> entry = bIt.next();
        assertEquals(1, entry.getKey().value);
        assertEquals(2, entry.getValue().value);
        assertTrue(bIt.hasNext());
        entry = bIt.next();
        assertEquals(2, entry.getKey().value);
        assertEquals(5, entry.getValue().value);
        assertFalse(bIt.hasNext());
        bIt = broadcastState2.iterator();
        assertTrue(bIt.hasNext());
        entry = bIt.next();
        assertEquals(2, entry.getKey().value);
        assertEquals(5, entry.getValue().value);
        assertFalse(bIt.hasNext());
        bIt = broadcastState3.iterator();
        assertFalse(bIt.hasNext());
        operatorStateBackend.close();
        operatorStateBackend.dispose();
    } finally {
        stateHandle.discardState();
    }
    executorService.shutdown();
}
Also used : MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) BlockerCheckpointStreamFactory(org.apache.flink.runtime.util.BlockerCheckpointStreamFactory) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ExecutorService(java.util.concurrent.ExecutorService) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 58 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class OperatorStateBackendTest method testSnapshotAsyncCancel.

@Test
public void testSnapshotAsyncCancel() throws Exception {
    DefaultOperatorStateBackend operatorStateBackend = new DefaultOperatorStateBackendBuilder(OperatorStateBackendTest.class.getClassLoader(), new ExecutionConfig(), true, emptyStateHandles, new CloseableRegistry()).build();
    ListStateDescriptor<MutableType> stateDescriptor1 = new ListStateDescriptor<>("test1", new JavaSerializer<MutableType>());
    ListState<MutableType> listState1 = operatorStateBackend.getListState(stateDescriptor1);
    listState1.add(MutableType.of(42));
    listState1.add(MutableType.of(4711));
    BlockerCheckpointStreamFactory streamFactory = new BlockerCheckpointStreamFactory(1024 * 1024);
    OneShotLatch waiterLatch = new OneShotLatch();
    OneShotLatch blockerLatch = new OneShotLatch();
    streamFactory.setWaiterLatch(waiterLatch);
    streamFactory.setBlockerLatch(blockerLatch);
    RunnableFuture<SnapshotResult<OperatorStateHandle>> runnableFuture = operatorStateBackend.snapshot(1, 1, streamFactory, CheckpointOptions.forCheckpointWithDefaultLocation());
    ExecutorService executorService = Executors.newFixedThreadPool(1);
    executorService.submit(runnableFuture);
    // wait until the async checkpoint is in the stream's write code, then continue
    waiterLatch.await();
    // cancel the future, which should close the underlying stream
    runnableFuture.cancel(true);
    for (BlockingCheckpointOutputStream stream : streamFactory.getAllCreatedStreams()) {
        Assert.assertTrue(stream.isClosed());
    }
    // we allow the stream under test to proceed
    blockerLatch.trigger();
    try {
        runnableFuture.get(60, TimeUnit.SECONDS);
        Assert.fail();
    } catch (CancellationException ignore) {
    }
}
Also used : ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) BlockingCheckpointOutputStream(org.apache.flink.runtime.util.BlockingCheckpointOutputStream) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) CancellationException(java.util.concurrent.CancellationException) ExecutorService(java.util.concurrent.ExecutorService) BlockerCheckpointStreamFactory(org.apache.flink.runtime.util.BlockerCheckpointStreamFactory) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Example 59 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class OperatorStateBackendTest method testSnapshotAsyncClose.

@Test
public void testSnapshotAsyncClose() throws Exception {
    DefaultOperatorStateBackend operatorStateBackend = new DefaultOperatorStateBackendBuilder(OperatorStateBackendTest.class.getClassLoader(), new ExecutionConfig(), true, emptyStateHandles, new CloseableRegistry()).build();
    ListStateDescriptor<MutableType> stateDescriptor1 = new ListStateDescriptor<>("test1", new JavaSerializer<MutableType>());
    ListState<MutableType> listState1 = operatorStateBackend.getListState(stateDescriptor1);
    listState1.add(MutableType.of(42));
    listState1.add(MutableType.of(4711));
    MapStateDescriptor<MutableType, MutableType> broadcastStateDescriptor1 = new MapStateDescriptor<>("test4", new JavaSerializer<MutableType>(), new JavaSerializer<MutableType>());
    BroadcastState<MutableType, MutableType> broadcastState1 = operatorStateBackend.getBroadcastState(broadcastStateDescriptor1);
    broadcastState1.put(MutableType.of(1), MutableType.of(2));
    broadcastState1.put(MutableType.of(2), MutableType.of(5));
    BlockerCheckpointStreamFactory streamFactory = new BlockerCheckpointStreamFactory(1024 * 1024);
    OneShotLatch waiterLatch = new OneShotLatch();
    OneShotLatch blockerLatch = new OneShotLatch();
    streamFactory.setWaiterLatch(waiterLatch);
    streamFactory.setBlockerLatch(blockerLatch);
    RunnableFuture<SnapshotResult<OperatorStateHandle>> runnableFuture = operatorStateBackend.snapshot(1, 1, streamFactory, CheckpointOptions.forCheckpointWithDefaultLocation());
    ExecutorService executorService = Executors.newFixedThreadPool(1);
    executorService.submit(runnableFuture);
    // wait until the async checkpoint is in the write code, then continue
    waiterLatch.await();
    operatorStateBackend.close();
    blockerLatch.trigger();
    try {
        runnableFuture.get(60, TimeUnit.SECONDS);
        Assert.fail();
    } catch (CancellationException expected) {
    }
}
Also used : MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) CancellationException(java.util.concurrent.CancellationException) ExecutorService(java.util.concurrent.ExecutorService) BlockerCheckpointStreamFactory(org.apache.flink.runtime.util.BlockerCheckpointStreamFactory) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Example 60 with OneShotLatch

use of org.apache.flink.core.testutils.OneShotLatch in project flink by apache.

the class RocksDBAsyncSnapshotTest method testFullyAsyncSnapshot.

/**
 * This ensures that asynchronous state handles are actually materialized asynchronously.
 *
 * <p>We use latches to block at various stages and see if the code still continues through the
 * parts that are not asynchronous. If the checkpoint is not done asynchronously the test will
 * simply lock forever.
 */
@Test
public void testFullyAsyncSnapshot() throws Exception {
    final OneInputStreamTaskTestHarness<String, String> testHarness = new OneInputStreamTaskTestHarness<>(OneInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    testHarness.setupOutputForSingletonOperatorChain();
    testHarness.configureForKeyedStream(new KeySelector<String, String>() {

        @Override
        public String getKey(String value) throws Exception {
            return value;
        }
    }, BasicTypeInfo.STRING_TYPE_INFO);
    StreamConfig streamConfig = testHarness.getStreamConfig();
    File dbDir = temporaryFolder.newFolder();
    RocksDBStateBackend backend = new RocksDBStateBackend(new MemoryStateBackend());
    backend.setDbStoragePath(dbDir.getAbsolutePath());
    streamConfig.setStateBackend(backend);
    streamConfig.setStreamOperator(new AsyncCheckpointOperator());
    streamConfig.setOperatorID(new OperatorID());
    final OneShotLatch delayCheckpointLatch = new OneShotLatch();
    final OneShotLatch ensureCheckpointLatch = new OneShotLatch();
    CheckpointResponder checkpointResponderMock = new CheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            // even though the async checkpoint would not finish
            try {
                delayCheckpointLatch.await();
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
            boolean hasManagedKeyedState = false;
            for (Map.Entry<OperatorID, OperatorSubtaskState> entry : subtaskState.getSubtaskStateMappings()) {
                OperatorSubtaskState state = entry.getValue();
                if (state != null) {
                    hasManagedKeyedState |= state.getManagedKeyedState() != null;
                }
            }
            // should be one k/v state
            assertTrue(hasManagedKeyedState);
            // we now know that the checkpoint went through
            ensureCheckpointLatch.trigger();
        }

        @Override
        public void reportCheckpointMetrics(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics) {
        }

        @Override
        public void declineCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointException checkpointException) {
        }
    };
    JobID jobID = new JobID();
    ExecutionAttemptID executionAttemptID = new ExecutionAttemptID();
    TestTaskStateManager taskStateManagerTestMock = new TestTaskStateManager(jobID, executionAttemptID, checkpointResponderMock, TestLocalRecoveryConfig.disabled(), new InMemoryStateChangelogStorage(), new HashMap<>(), -1L, new OneShotLatch());
    StreamMockEnvironment mockEnv = new StreamMockEnvironment(testHarness.jobConfig, testHarness.taskConfig, testHarness.memorySize, new MockInputSplitProvider(), testHarness.bufferSize, taskStateManagerTestMock);
    AtomicReference<Throwable> errorRef = new AtomicReference<>();
    mockEnv.setExternalExceptionHandler(errorRef::set);
    testHarness.invoke(mockEnv);
    testHarness.waitForTaskRunning();
    final OneInputStreamTask<String, String> task = testHarness.getTask();
    task.triggerCheckpointAsync(new CheckpointMetaData(42, 17), CheckpointOptions.forCheckpointWithDefaultLocation()).get();
    testHarness.processElement(new StreamRecord<>("Wohoo", 0));
    // now we allow the checkpoint
    delayCheckpointLatch.trigger();
    // wait for the checkpoint to go through
    ensureCheckpointLatch.await();
    testHarness.endInput();
    ExecutorService threadPool = task.getAsyncOperationsThreadPool();
    threadPool.shutdown();
    Assert.assertTrue(threadPool.awaitTermination(60_000, TimeUnit.MILLISECONDS));
    testHarness.waitForTaskCompletion();
    if (errorRef.get() != null) {
        fail("Unexpected exception during execution.");
    }
}
Also used : OneInputStreamTask(org.apache.flink.streaming.runtime.tasks.OneInputStreamTask) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) StreamMockEnvironment(org.apache.flink.streaming.runtime.tasks.StreamMockEnvironment) MockInputSplitProvider(org.apache.flink.runtime.operators.testutils.MockInputSplitProvider) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) AtomicReference(java.util.concurrent.atomic.AtomicReference) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TestTaskStateManager(org.apache.flink.runtime.state.TestTaskStateManager) OneInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.OneInputStreamTaskTestHarness) ExecutorService(java.util.concurrent.ExecutorService) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)138 Test (org.junit.Test)118 JobID (org.apache.flink.api.common.JobID)41 CompletableFuture (java.util.concurrent.CompletableFuture)38 ExecutionException (java.util.concurrent.ExecutionException)27 Configuration (org.apache.flink.configuration.Configuration)26 IOException (java.io.IOException)24 Before (org.junit.Before)24 FlinkException (org.apache.flink.util.FlinkException)23 TestLogger (org.apache.flink.util.TestLogger)21 File (java.io.File)20 UUID (java.util.UUID)18 TimeoutException (java.util.concurrent.TimeoutException)18 TestingResourceManagerGateway (org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway)18 Time (org.apache.flink.api.common.time.Time)17 TestingJobMasterGateway (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway)17 Rule (org.junit.Rule)17 Collections (java.util.Collections)16 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)16 RpcUtils (org.apache.flink.runtime.rpc.RpcUtils)16