use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class AggregatingAlignedProcessingTimeWindowOperatorTest method checkpointRestoreWithPendingWindowTumbling.
@Test
public void checkpointRestoreWithPendingWindowTumbling() {
try {
final int windowSize = 200;
// tumbling window that triggers every 50 milliseconds
AggregatingProcessingTimeWindowOperator<Integer, Tuple2<Integer, Integer>> op = new AggregatingProcessingTimeWindowOperator<>(sumFunction, fieldOneSelector, IntSerializer.INSTANCE, tupleSerializer, windowSize, windowSize);
OneInputStreamOperatorTestHarness<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(op);
testHarness.setProcessingTime(0);
testHarness.setup();
testHarness.open();
// inject some elements
final int numElementsFirst = 700;
final int numElements = 1000;
for (int i = 0; i < numElementsFirst; i++) {
StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
testHarness.processElement(next);
}
// draw a snapshot
List<Tuple2<Integer, Integer>> resultAtSnapshot = extractFromStreamRecords(testHarness.getOutput());
int beforeSnapShot = resultAtSnapshot.size();
StreamStateHandle state = testHarness.snapshotLegacy(1L, System.currentTimeMillis());
int afterSnapShot = testHarness.getOutput().size();
assertEquals("operator performed computation during snapshot", beforeSnapShot, afterSnapShot);
assertTrue(resultAtSnapshot.size() <= numElementsFirst);
// inject some random elements, which should not show up in the state
for (int i = numElementsFirst; i < numElements; i++) {
StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
testHarness.processElement(next);
}
testHarness.close();
op.dispose();
// re-create the operator and restore the state
op = new AggregatingProcessingTimeWindowOperator<>(sumFunction, fieldOneSelector, IntSerializer.INSTANCE, tupleSerializer, windowSize, windowSize);
testHarness = new OneInputStreamOperatorTestHarness<>(op);
testHarness.setup();
testHarness.restore(state);
testHarness.open();
// inject the remaining elements
for (int i = numElementsFirst; i < numElements; i++) {
StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
testHarness.processElement(next);
}
testHarness.setProcessingTime(200);
// get and verify the result
List<Tuple2<Integer, Integer>> finalResult = new ArrayList<>(resultAtSnapshot);
List<Tuple2<Integer, Integer>> partialFinalResult = extractFromStreamRecords(testHarness.getOutput());
finalResult.addAll(partialFinalResult);
assertEquals(numElements, finalResult.size());
Collections.sort(finalResult, tupleComparator);
for (int i = 0; i < numElements; i++) {
assertEquals(i, finalResult.get(i).f0.intValue());
assertEquals(i, finalResult.get(i).f1.intValue());
}
testHarness.close();
op.dispose();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class AggregatingAlignedProcessingTimeWindowOperatorTest method checkpointRestoreWithPendingWindowSliding.
@Test
public void checkpointRestoreWithPendingWindowSliding() {
try {
final int factor = 4;
final int windowSlide = 50;
final int windowSize = factor * windowSlide;
// sliding window (200 msecs) every 50 msecs
AggregatingProcessingTimeWindowOperator<Integer, Tuple2<Integer, Integer>> op = new AggregatingProcessingTimeWindowOperator<>(sumFunction, fieldOneSelector, IntSerializer.INSTANCE, tupleSerializer, windowSize, windowSlide);
OneInputStreamOperatorTestHarness<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(op);
testHarness.setProcessingTime(0);
testHarness.setup();
testHarness.open();
// inject some elements
final int numElements = 1000;
final int numElementsFirst = 700;
for (int i = 0; i < numElementsFirst; i++) {
StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
testHarness.processElement(next);
}
// draw a snapshot
List<Tuple2<Integer, Integer>> resultAtSnapshot = extractFromStreamRecords(testHarness.getOutput());
int beforeSnapShot = resultAtSnapshot.size();
StreamStateHandle state = testHarness.snapshotLegacy(1L, System.currentTimeMillis());
int afterSnapShot = testHarness.getOutput().size();
assertEquals("operator performed computation during snapshot", beforeSnapShot, afterSnapShot);
assertTrue(resultAtSnapshot.size() <= factor * numElementsFirst);
// inject the remaining elements - these should not influence the snapshot
for (int i = numElementsFirst; i < numElements; i++) {
StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
testHarness.processElement(next);
}
testHarness.close();
op.dispose();
// re-create the operator and restore the state
op = new AggregatingProcessingTimeWindowOperator<>(sumFunction, fieldOneSelector, IntSerializer.INSTANCE, tupleSerializer, windowSize, windowSlide);
testHarness = new OneInputStreamOperatorTestHarness<>(op);
testHarness.setup();
testHarness.restore(state);
testHarness.open();
// inject again the remaining elements
for (int i = numElementsFirst; i < numElements; i++) {
StreamRecord<Tuple2<Integer, Integer>> next = new StreamRecord<>(new Tuple2<>(i, i));
testHarness.processElement(next);
}
testHarness.setProcessingTime(50);
testHarness.setProcessingTime(100);
testHarness.setProcessingTime(150);
testHarness.setProcessingTime(200);
testHarness.setProcessingTime(250);
testHarness.setProcessingTime(300);
testHarness.setProcessingTime(350);
testHarness.setProcessingTime(400);
// get and verify the result
List<Tuple2<Integer, Integer>> finalResult = new ArrayList<>(resultAtSnapshot);
List<Tuple2<Integer, Integer>> partialFinalResult = extractFromStreamRecords(testHarness.getOutput());
finalResult.addAll(partialFinalResult);
assertEquals(numElements * factor, finalResult.size());
Collections.sort(finalResult, tupleComparator);
for (int i = 0; i < factor * numElements; i++) {
assertEquals(i / factor, finalResult.get(i).f0.intValue());
assertEquals(i / factor, finalResult.get(i).f1.intValue());
}
testHarness.close();
op.dispose();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class SavepointITCase method testTriggerSavepointAndResumeWithFileBasedCheckpoints.
/**
* Triggers a savepoint for a job that uses the FsStateBackend. We expect
* that all checkpoint files are written to a new savepoint directory.
*
* <ol>
* <li>Submit job, wait for some progress</li>
* <li>Trigger savepoint and verify that savepoint has been created</li>
* <li>Shut down the cluster, re-submit the job from the savepoint,
* verify that the initial state has been reset, and
* all tasks are running again</li>
* <li>Cancel job, dispose the savepoint, and verify that everything
* has been cleaned up</li>
* </ol>
*/
@Test
public void testTriggerSavepointAndResumeWithFileBasedCheckpoints() throws Exception {
// Config
final int numTaskManagers = 2;
final int numSlotsPerTaskManager = 2;
final int parallelism = numTaskManagers * numSlotsPerTaskManager;
final Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
final File testRoot = folder.newFolder();
TestingCluster flink = null;
try {
// Create a test actor system
ActorSystem testActorSystem = AkkaUtils.createDefaultActorSystem();
// Flink configuration
final Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTaskManager);
final File checkpointDir = new File(testRoot, "checkpoints");
final File savepointRootDir = new File(testRoot, "savepoints");
if (!checkpointDir.mkdir() || !savepointRootDir.mkdirs()) {
fail("Test setup failed: failed to create temporary directories.");
}
// Use file based checkpoints
config.setString(CoreOptions.STATE_BACKEND, "filesystem");
config.setString(FsStateBackendFactory.CHECKPOINT_DIRECTORY_URI_CONF_KEY, checkpointDir.toURI().toString());
config.setString(FsStateBackendFactory.MEMORY_THRESHOLD_CONF_KEY, "0");
config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointRootDir.toURI().toString());
// Start Flink
flink = new TestingCluster(config);
flink.start(true);
// Submit the job
final JobGraph jobGraph = createJobGraph(parallelism, 0, 1000);
final JobID jobId = jobGraph.getJobID();
// Reset the static test job helpers
StatefulCounter.resetForTest(parallelism);
// Retrieve the job manager
ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
LOG.info("Submitting job " + jobGraph.getJobID() + " in detached mode.");
flink.submitJobDetached(jobGraph);
LOG.info("Waiting for some progress.");
// wait for the JobManager to be ready
Future<Object> allRunning = jobManager.ask(new WaitForAllVerticesToBeRunning(jobId), deadline.timeLeft());
Await.ready(allRunning, deadline.timeLeft());
// wait for the Tasks to be ready
StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
LOG.info("Triggering a savepoint.");
Future<Object> savepointPathFuture = jobManager.ask(new TriggerSavepoint(jobId, Option.<String>empty()), deadline.timeLeft());
final String savepointPath = ((TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
LOG.info("Retrieved savepoint path: " + savepointPath + ".");
// Retrieve the savepoint from the testing job manager
LOG.info("Requesting the savepoint.");
Future<Object> savepointFuture = jobManager.ask(new RequestSavepoint(savepointPath), deadline.timeLeft());
SavepointV1 savepoint = (SavepointV1) ((ResponseSavepoint) Await.result(savepointFuture, deadline.timeLeft())).savepoint();
LOG.info("Retrieved savepoint: " + savepointPath + ".");
// Shut down the Flink cluster (thereby canceling the job)
LOG.info("Shutting down Flink cluster.");
flink.shutdown();
flink.awaitTermination();
// - Verification START -------------------------------------------
// Only one savepoint should exist
File[] files = savepointRootDir.listFiles();
if (files != null) {
assertEquals("Savepoint not created in expected directory", 1, files.length);
assertTrue("Savepoint did not create self-contained directory", files[0].isDirectory());
File savepointDir = files[0];
File[] savepointFiles = savepointDir.listFiles();
assertNotNull(savepointFiles);
// Expect one metadata file and one checkpoint file per stateful
// parallel subtask
String errMsg = "Did not write expected number of savepoint/checkpoint files to directory: " + Arrays.toString(savepointFiles);
assertEquals(errMsg, 1 + parallelism, savepointFiles.length);
} else {
fail("Savepoint not created in expected directory");
}
// We currently have the following directory layout: checkpointDir/jobId/chk-ID
File jobCheckpoints = new File(checkpointDir, jobId.toString());
if (jobCheckpoints.exists()) {
files = jobCheckpoints.listFiles();
assertNotNull("Checkpoint directory empty", files);
assertEquals("Checkpoints directory not clean: " + Arrays.toString(files), 0, files.length);
}
// - Verification END ---------------------------------------------
// Restart the cluster
LOG.info("Restarting Flink cluster.");
flink.start();
// Retrieve the job manager
LOG.info("Retrieving JobManager.");
jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
LOG.info("JobManager: " + jobManager + ".");
// Reset static test helpers
StatefulCounter.resetForTest(parallelism);
// Gather all task deployment descriptors
final Throwable[] error = new Throwable[1];
final TestingCluster finalFlink = flink;
final Multimap<JobVertexID, TaskDeploymentDescriptor> tdds = HashMultimap.create();
new JavaTestKit(testActorSystem) {
{
new Within(deadline.timeLeft()) {
@Override
protected void run() {
try {
// Register to all submit task messages for job
for (ActorRef taskManager : finalFlink.getTaskManagersAsJava()) {
taskManager.tell(new TestingTaskManagerMessages.RegisterSubmitTaskListener(jobId), getTestActor());
}
// Set the savepoint path
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
LOG.info("Resubmitting job " + jobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
// Submit the job
finalFlink.submitJobDetached(jobGraph);
int numTasks = 0;
for (JobVertex jobVertex : jobGraph.getVertices()) {
numTasks += jobVertex.getParallelism();
}
// Gather the task deployment descriptors
LOG.info("Gathering " + numTasks + " submitted " + "TaskDeploymentDescriptor instances.");
for (int i = 0; i < numTasks; i++) {
ResponseSubmitTaskListener resp = (ResponseSubmitTaskListener) expectMsgAnyClassOf(getRemainingTime(), ResponseSubmitTaskListener.class);
TaskDeploymentDescriptor tdd = resp.tdd();
LOG.info("Received: " + tdd.toString() + ".");
TaskInformation taskInformation = tdd.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
tdds.put(taskInformation.getJobVertexId(), tdd);
}
} catch (Throwable t) {
error[0] = t;
}
}
};
}
};
// - Verification START -------------------------------------------
String errMsg = "Error during gathering of TaskDeploymentDescriptors";
assertNull(errMsg, error[0]);
// have a matching task deployment descriptor.
for (TaskState taskState : savepoint.getTaskStates()) {
Collection<TaskDeploymentDescriptor> taskTdds = tdds.get(taskState.getJobVertexID());
errMsg = "Missing task for savepoint state for operator " + taskState.getJobVertexID() + ".";
assertTrue(errMsg, taskTdds.size() > 0);
assertEquals(taskState.getNumberCollectedStates(), taskTdds.size());
for (TaskDeploymentDescriptor tdd : taskTdds) {
SubtaskState subtaskState = taskState.getState(tdd.getSubtaskIndex());
assertNotNull(subtaskState);
errMsg = "Initial operator state mismatch.";
assertEquals(errMsg, subtaskState.getLegacyOperatorState(), tdd.getTaskStateHandles().getLegacyOperatorState());
}
}
// Await state is restored
StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
// Await some progress after restore
StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
// - Verification END ---------------------------------------------
LOG.info("Cancelling job " + jobId + ".");
jobManager.tell(new CancelJob(jobId));
LOG.info("Disposing savepoint " + savepointPath + ".");
Future<Object> disposeFuture = jobManager.ask(new DisposeSavepoint(savepointPath), deadline.timeLeft());
errMsg = "Failed to dispose savepoint " + savepointPath + ".";
Object resp = Await.result(disposeFuture, deadline.timeLeft());
assertTrue(errMsg, resp.getClass() == getDisposeSavepointSuccess().getClass());
// - Verification START -------------------------------------------
// The checkpoint files
List<File> checkpointFiles = new ArrayList<>();
for (TaskState stateForTaskGroup : savepoint.getTaskStates()) {
for (SubtaskState subtaskState : stateForTaskGroup.getStates()) {
ChainedStateHandle<StreamStateHandle> streamTaskState = subtaskState.getLegacyOperatorState();
for (int i = 0; i < streamTaskState.getLength(); i++) {
if (streamTaskState.get(i) != null) {
FileStateHandle fileStateHandle = (FileStateHandle) streamTaskState.get(i);
checkpointFiles.add(new File(fileStateHandle.getFilePath().toUri()));
}
}
}
}
// The checkpoint files of the savepoint should have been discarded
for (File f : checkpointFiles) {
errMsg = "Checkpoint file " + f + " not cleaned up properly.";
assertFalse(errMsg, f.exists());
}
if (checkpointFiles.size() > 0) {
File parent = checkpointFiles.get(0).getParentFile();
errMsg = "Checkpoint parent directory " + parent + " not cleaned up properly.";
assertFalse(errMsg, parent.exists());
}
// All savepoints should have been cleaned up
errMsg = "Savepoints directory not cleaned up properly: " + Arrays.toString(savepointRootDir.listFiles()) + ".";
assertEquals(errMsg, 0, savepointRootDir.listFiles().length);
// - Verification END ---------------------------------------------
} finally {
if (flink != null) {
flink.shutdown();
}
}
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class FsCheckpointStateOutputStreamTest method runTest.
private void runTest(int numBytes, int bufferSize, int threshold, boolean expectFile) throws Exception {
FsCheckpointStreamFactory.CheckpointStateOutputStream stream = new FsCheckpointStreamFactory.FsCheckpointStateOutputStream(TEMP_DIR_PATH, FileSystem.getLocalFileSystem(), bufferSize, threshold);
Random rnd = new Random();
byte[] original = new byte[numBytes];
byte[] bytes = new byte[original.length];
rnd.nextBytes(original);
System.arraycopy(original, 0, bytes, 0, original.length);
// the test writes a mixture of writing individual bytes and byte arrays
int pos = 0;
while (pos < bytes.length) {
boolean single = rnd.nextBoolean();
if (single) {
stream.write(bytes[pos++]);
} else {
int num = rnd.nextInt(Math.min(10, bytes.length - pos));
stream.write(bytes, pos, num);
pos += num;
}
}
StreamStateHandle handle = stream.closeAndGetHandle();
if (expectFile) {
assertTrue(handle instanceof FileStateHandle);
} else {
assertTrue(handle instanceof ByteStreamStateHandle);
}
// make sure the writing process did not alter the original byte array
assertArrayEquals(original, bytes);
try (InputStream inStream = handle.openInputStream()) {
byte[] validation = new byte[bytes.length];
DataInputStream dataInputStream = new DataInputStream(inStream);
dataInputStream.readFully(validation);
assertArrayEquals(bytes, validation);
}
handle.discardState();
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class HeapKeyedStateBackend method snapshot.
@Override
@SuppressWarnings("unchecked")
public RunnableFuture<KeyGroupsStateHandle> snapshot(final long checkpointId, final long timestamp, final CheckpointStreamFactory streamFactory, CheckpointOptions checkpointOptions) throws Exception {
if (!hasRegisteredState()) {
return DoneFuture.nullValue();
}
long syncStartTime = System.currentTimeMillis();
Preconditions.checkState(stateTables.size() <= Short.MAX_VALUE, "Too many KV-States: " + stateTables.size() + ". Currently at most " + Short.MAX_VALUE + " states are supported");
List<KeyedBackendSerializationProxy.StateMetaInfo<?, ?>> metaInfoProxyList = new ArrayList<>(stateTables.size());
final Map<String, Integer> kVStateToId = new HashMap<>(stateTables.size());
final Map<StateTable<K, ?, ?>, StateTableSnapshot> cowStateStableSnapshots = new HashedMap(stateTables.size());
for (Map.Entry<String, StateTable<K, ?, ?>> kvState : stateTables.entrySet()) {
RegisteredBackendStateMetaInfo<?, ?> metaInfo = kvState.getValue().getMetaInfo();
KeyedBackendSerializationProxy.StateMetaInfo<?, ?> metaInfoProxy = new KeyedBackendSerializationProxy.StateMetaInfo(metaInfo.getStateType(), metaInfo.getName(), metaInfo.getNamespaceSerializer(), metaInfo.getStateSerializer());
metaInfoProxyList.add(metaInfoProxy);
kVStateToId.put(kvState.getKey(), kVStateToId.size());
StateTable<K, ?, ?> stateTable = kvState.getValue();
if (null != stateTable) {
cowStateStableSnapshots.put(stateTable, stateTable.createSnapshot());
}
}
final KeyedBackendSerializationProxy serializationProxy = new KeyedBackendSerializationProxy(keySerializer, metaInfoProxyList);
//--------------------------------------------------- this becomes the end of sync part
// implementation of the async IO operation, based on FutureTask
final AbstractAsyncIOCallable<KeyGroupsStateHandle, CheckpointStreamFactory.CheckpointStateOutputStream> ioCallable = new AbstractAsyncIOCallable<KeyGroupsStateHandle, CheckpointStreamFactory.CheckpointStateOutputStream>() {
AtomicBoolean open = new AtomicBoolean(false);
@Override
public CheckpointStreamFactory.CheckpointStateOutputStream openIOHandle() throws Exception {
if (open.compareAndSet(false, true)) {
CheckpointStreamFactory.CheckpointStateOutputStream stream = streamFactory.createCheckpointStateOutputStream(checkpointId, timestamp);
try {
cancelStreamRegistry.registerClosable(stream);
return stream;
} catch (Exception ex) {
open.set(false);
throw ex;
}
} else {
throw new IOException("Operation already opened.");
}
}
@Override
public KeyGroupsStateHandle performOperation() throws Exception {
long asyncStartTime = System.currentTimeMillis();
CheckpointStreamFactory.CheckpointStateOutputStream stream = getIoHandle();
DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(stream);
serializationProxy.write(outView);
long[] keyGroupRangeOffsets = new long[keyGroupRange.getNumberOfKeyGroups()];
for (int keyGroupPos = 0; keyGroupPos < keyGroupRange.getNumberOfKeyGroups(); ++keyGroupPos) {
int keyGroupId = keyGroupRange.getKeyGroupId(keyGroupPos);
keyGroupRangeOffsets[keyGroupPos] = stream.getPos();
outView.writeInt(keyGroupId);
for (Map.Entry<String, StateTable<K, ?, ?>> kvState : stateTables.entrySet()) {
outView.writeShort(kVStateToId.get(kvState.getKey()));
cowStateStableSnapshots.get(kvState.getValue()).writeMappingsInKeyGroup(outView, keyGroupId);
}
}
if (open.compareAndSet(true, false)) {
StreamStateHandle streamStateHandle = stream.closeAndGetHandle();
KeyGroupRangeOffsets offsets = new KeyGroupRangeOffsets(keyGroupRange, keyGroupRangeOffsets);
final KeyGroupsStateHandle keyGroupsStateHandle = new KeyGroupsStateHandle(offsets, streamStateHandle);
if (asynchronousSnapshots) {
LOG.info("Heap backend snapshot ({}, asynchronous part) in thread {} took {} ms.", streamFactory, Thread.currentThread(), (System.currentTimeMillis() - asyncStartTime));
}
return keyGroupsStateHandle;
} else {
throw new IOException("Checkpoint stream already closed.");
}
}
@Override
public void done(boolean canceled) {
if (open.compareAndSet(true, false)) {
CheckpointStreamFactory.CheckpointStateOutputStream stream = getIoHandle();
if (null != stream) {
cancelStreamRegistry.unregisterClosable(stream);
IOUtils.closeQuietly(stream);
}
}
for (StateTableSnapshot snapshot : cowStateStableSnapshots.values()) {
snapshot.release();
}
}
};
AsyncStoppableTaskWithCallback<KeyGroupsStateHandle> task = AsyncStoppableTaskWithCallback.from(ioCallable);
if (!asynchronousSnapshots) {
task.run();
}
LOG.info("Heap backend snapshot (" + streamFactory + ", synchronous part) in thread " + Thread.currentThread() + " took " + (System.currentTimeMillis() - syncStartTime) + " ms.");
return task;
}
Aggregations