use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class WritableSavepointITCase method validateModification.
private void validateModification(StateBackend backend, String savepointPath) throws Exception {
StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
sEnv.setStateBackend(backend);
DataStream<Account> stream = sEnv.fromCollection(accounts).keyBy(acc -> acc.id).flatMap(new UpdateAndGetAccount()).uid(ACCOUNT_UID);
CompletableFuture<Collection<Account>> results = collector.collect(stream);
stream.map(acc -> acc.id).map(new StatefulOperator()).uid(MODIFY_UID).addSink(new DiscardingSink<>());
JobGraph jobGraph = sEnv.getStreamGraph().getJobGraph();
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath, false));
ClusterClient<?> client = MINI_CLUSTER_RESOURCE.getClusterClient();
Optional<SerializedThrowable> serializedThrowable = client.submitJob(jobGraph).thenCompose(client::requestJobResult).get().getSerializedThrowable();
Assert.assertFalse(serializedThrowable.isPresent());
Assert.assertEquals("Unexpected output", 3, results.get().size());
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class WritableSavepointWindowITCase method testTumbleWindow.
@Test
public void testTumbleWindow() throws Exception {
final String savepointPath = getTempDirPath(new AbstractID().toHexString());
ExecutionEnvironment bEnv = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<String, Integer>> bootstrapData = bEnv.fromCollection(WORDS).map(word -> Tuple2.of(word, 1)).returns(TUPLE_TYPE_INFO);
WindowedOperatorTransformation<Tuple2<String, Integer>, String, TimeWindow> transformation = OperatorTransformation.bootstrapWith(bootstrapData).assignTimestamps(record -> 2L).keyBy(tuple -> tuple.f0, Types.STRING).window(TumblingEventTimeWindows.of(Time.milliseconds(5)));
Savepoint.create(stateBackend, 128).withOperator(UID, windowBootstrap.bootstrap(transformation)).write(savepointPath);
bEnv.execute("write state");
StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
sEnv.setStateBackend(stateBackend);
WindowedStream<Tuple2<String, Integer>, String, TimeWindow> stream = sEnv.addSource(new MaxWatermarkSource<Tuple2<String, Integer>>()).returns(TUPLE_TYPE_INFO).keyBy(tuple -> tuple.f0).window(TumblingEventTimeWindows.of(Time.milliseconds(5)));
DataStream<Tuple2<String, Integer>> windowed = windowStream.window(stream).uid(UID);
CompletableFuture<Collection<Tuple2<String, Integer>>> future = collector.collect(windowed);
submitJob(savepointPath, sEnv);
Collection<Tuple2<String, Integer>> results = future.get();
Assert.assertThat("Incorrect results from bootstrapped windows", results, STANDARD_MATCHER);
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class WritableSavepointWindowITCase method testTumbleWindowWithEvictor.
@Test
public void testTumbleWindowWithEvictor() throws Exception {
final String savepointPath = getTempDirPath(new AbstractID().toHexString());
ExecutionEnvironment bEnv = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<String, Integer>> bootstrapData = bEnv.fromCollection(WORDS).map(word -> Tuple2.of(word, 1)).returns(TUPLE_TYPE_INFO);
WindowedOperatorTransformation<Tuple2<String, Integer>, String, TimeWindow> transformation = OperatorTransformation.bootstrapWith(bootstrapData).assignTimestamps(record -> 2L).keyBy(tuple -> tuple.f0, Types.STRING).window(TumblingEventTimeWindows.of(Time.milliseconds(5))).evictor(CountEvictor.of(1));
Savepoint.create(new MemoryStateBackend(), 128).withOperator(UID, windowBootstrap.bootstrap(transformation)).write(savepointPath);
bEnv.execute("write state");
StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
WindowedStream<Tuple2<String, Integer>, String, TimeWindow> stream = sEnv.addSource(new MaxWatermarkSource<>(), TUPLE_TYPE_INFO).keyBy(tuple -> tuple.f0).window(TumblingEventTimeWindows.of(Time.milliseconds(5))).evictor(CountEvictor.of(1));
DataStream<Tuple2<String, Integer>> windowed = windowStream.window(stream).uid(UID);
CompletableFuture<Collection<Tuple2<String, Integer>>> future = collector.collect(windowed);
submitJob(savepointPath, sEnv);
Collection<Tuple2<String, Integer>> results = future.get();
Assert.assertThat("Incorrect results from bootstrapped windows", results, EVICTOR_MATCHER);
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class SavepointDeepCopyTest method testSavepointDeepCopy.
/**
* Test savepoint deep copy. This method tests the savepoint deep copy by:
*
* <ul>
* <li>create {@code savepoint1} with operator {@code Operator1}, make sure it has more state
* files in addition to _metadata
* <li>create {@code savepoint2} from {@code savepoint1} by adding a new operator {@code
* Operator2}
* <li>check all state files in {@code savepoint1}'s directory are copied over to {@code
* savepoint2}'s directory
* <li>read the state of {@code Operator1} from {@code savepoint2} and make sure the number of
* the keys remain same
* </ul>
*
* @throws Exception throw exceptions when anything goes wrong
*/
@Test
public void testSavepointDeepCopy() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> words = env.fromElements(TEXT.split(" "));
StateBootstrapTransformation<String> transformation = OperatorTransformation.bootstrapWith(words).keyBy(e -> e).transform(new WordMapBootstrapper());
File savepointUrl1 = createAndRegisterTempFile(new AbstractID().toHexString());
String savepointPath1 = savepointUrl1.getPath();
SavepointWriter.newSavepoint(backend, 128).withConfiguration(FS_SMALL_FILE_THRESHOLD, FILE_STATE_SIZE_THRESHOLD).withOperator("Operator1", transformation).write(savepointPath1);
env.execute("bootstrap savepoint1");
Assert.assertTrue("Failed to bootstrap savepoint1 with additional state files", Files.list(Paths.get(savepointPath1)).count() > 1);
Set<String> stateFiles1 = Files.list(Paths.get(savepointPath1)).map(path -> path.getFileName().toString()).collect(Collectors.toSet());
// create savepoint2 from savepoint1 created above
File savepointUrl2 = createAndRegisterTempFile(new AbstractID().toHexString());
String savepointPath2 = savepointUrl2.getPath();
SavepointWriter savepoint2 = SavepointWriter.fromExistingSavepoint(savepointPath1, backend).withConfiguration(FS_SMALL_FILE_THRESHOLD, FILE_STATE_SIZE_THRESHOLD);
savepoint2.withOperator("Operator2", transformation).write(savepointPath2);
env.execute("create savepoint2");
Assert.assertTrue("Failed to create savepoint2 from savepoint1 with additional state files", Files.list(Paths.get(savepointPath2)).count() > 1);
Set<String> stateFiles2 = Files.list(Paths.get(savepointPath2)).map(path -> path.getFileName().toString()).collect(Collectors.toSet());
assertThat("At least one state file in savepoint1 are not in savepoint2", stateFiles1, everyItem(isIn(stateFiles2)));
// Try to fromExistingSavepoint savepoint2 and read the state of "Operator1" (which has not
// been
// touched/changed when savepoint2
// was created) and make sure the number of keys remain same
long actuallyKeyNum = JobResultRetriever.collect(SavepointReader.read(env, savepointPath2, backend).readKeyedState("Operator1", new ReadFunction())).size();
long expectedKeyNum = Arrays.stream(TEXT.split(" ")).distinct().count();
Assert.assertEquals("Unexpected number of keys in the state of Operator1", expectedKeyNum, actuallyKeyNum);
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class SavepointWriterITCase method validateModification.
private void validateModification(StateBackend backend, String savepointPath) throws Exception {
StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
if (backend != null) {
sEnv.setStateBackend(backend);
}
DataStream<Account> stream = sEnv.fromCollection(accounts).keyBy(acc -> acc.id).flatMap(new UpdateAndGetAccount()).uid(ACCOUNT_UID);
CompletableFuture<Collection<Account>> results = collector.collect(stream);
stream.map(acc -> acc.id).map(new StatefulOperator()).uid(MODIFY_UID).addSink(new DiscardingSink<>());
JobGraph jobGraph = sEnv.getStreamGraph().getJobGraph();
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath, false));
ClusterClient<?> client = MINI_CLUSTER_RESOURCE.getClusterClient();
Optional<SerializedThrowable> serializedThrowable = client.submitJob(jobGraph).thenCompose(client::requestJobResult).get().getSerializedThrowable();
Assert.assertFalse(serializedThrowable.isPresent());
Assert.assertEquals("Unexpected output", 3, results.get().size());
}
Aggregations