use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class RescalingITCase method createJobGraphWithKeyedAndNonPartitionedOperatorState.
private static JobGraph createJobGraphWithKeyedAndNonPartitionedOperatorState(int parallelism, int maxParallelism, int fixedParallelism, int numberKeys, int numberElements, boolean terminateAfterEmission, int checkpointingInterval) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
env.getConfig().setMaxParallelism(maxParallelism);
env.enableCheckpointing(checkpointingInterval);
env.setRestartStrategy(RestartStrategies.noRestart());
DataStream<Integer> input = env.addSource(new SubtaskIndexNonPartitionedStateSource(numberKeys, numberElements, terminateAfterEmission)).setParallelism(fixedParallelism).keyBy(new KeySelector<Integer, Integer>() {
private static final long serialVersionUID = -7952298871120320940L;
@Override
public Integer getKey(Integer value) throws Exception {
return value;
}
});
SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys);
DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements));
result.addSink(new CollectionSink<Tuple2<Integer, Integer>>());
return env.getStreamGraph().getJobGraph();
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class SavepointITCase method testCanRestoreWithModifiedStatelessOperators.
/**
* FLINK-5985
*
* This test ensures we can restore from a savepoint under modifications to the job graph that only concern
* stateless operators.
*/
@Test
public void testCanRestoreWithModifiedStatelessOperators() throws Exception {
// Config
int numTaskManagers = 2;
int numSlotsPerTaskManager = 2;
int parallelism = 2;
// Test deadline
final Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
final File tmpDir = CommonTestUtils.createTempDirectory();
final File savepointDir = new File(tmpDir, "savepoints");
TestingCluster flink = null;
String savepointPath;
try {
// Flink configuration
final Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTaskManager);
config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
LOG.info("Flink configuration: " + config + ".");
// Start Flink
flink = new TestingCluster(config);
LOG.info("Starting Flink cluster.");
flink.start(true);
// Retrieve the job manager
LOG.info("Retrieving JobManager.");
ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
LOG.info("JobManager: " + jobManager + ".");
final StatefulCounter statefulCounter = new StatefulCounter();
StatefulCounter.resetForTest(parallelism);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
env.addSource(new InfiniteTestSource()).shuffle().map(new MapFunction<Integer, Integer>() {
@Override
public Integer map(Integer value) throws Exception {
return 4 * value;
}
}).shuffle().map(statefulCounter).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {
@Override
public Integer map(Integer value) throws Exception {
return 2 * value;
}
}).addSink(new DiscardingSink<Integer>());
JobGraph originalJobGraph = env.getStreamGraph().getJobGraph();
JobSubmissionResult submissionResult = flink.submitJobDetached(originalJobGraph);
JobID jobID = submissionResult.getJobID();
// wait for the Tasks to be ready
StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
Future<Object> savepointPathFuture = jobManager.ask(new TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
savepointPath = ((TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
Future<Object> savepointFuture = jobManager.ask(new RequestSavepoint(savepointPath), deadline.timeLeft());
((ResponseSavepoint) Await.result(savepointFuture, deadline.timeLeft())).savepoint();
LOG.info("Retrieved savepoint: " + savepointPath + ".");
// Shut down the Flink cluster (thereby canceling the job)
LOG.info("Shutting down Flink cluster.");
flink.shutdown();
flink.awaitTermination();
} finally {
flink.shutdown();
flink.awaitTermination();
}
try {
LOG.info("Restarting Flink cluster.");
flink.start(true);
// Retrieve the job manager
LOG.info("Retrieving JobManager.");
ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
LOG.info("JobManager: " + jobManager + ".");
// Reset static test helpers
StatefulCounter.resetForTest(parallelism);
// Gather all task deployment descriptors
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
// generate a modified job graph that adds a stateless op
env.addSource(new InfiniteTestSource()).shuffle().map(new StatefulCounter()).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {
@Override
public Integer map(Integer value) throws Exception {
return value;
}
}).addSink(new DiscardingSink<Integer>());
JobGraph modifiedJobGraph = env.getStreamGraph().getJobGraph();
// Set the savepoint path
modifiedJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
LOG.info("Resubmitting job " + modifiedJobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
// Submit the job
flink.submitJobDetached(modifiedJobGraph);
// Await state is restored
StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
// Await some progress after restore
StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
} finally {
flink.shutdown();
flink.awaitTermination();
}
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class StreamCheckpointNotifierITCase method testProgram.
/**
* Runs the following program:
*
* <pre>
* [ (source)->(filter) ] -> [ (co-map) ] -> [ (map) ] -> [ (groupBy/reduce)->(sink) ]
* </pre>
*/
@Test
public void testProgram() {
try {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
assertEquals("test setup broken", PARALLELISM, env.getParallelism());
env.enableCheckpointing(500);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 0L));
final int numElements = 10000;
final int numTaskTotal = PARALLELISM * 5;
DataStream<Long> stream = env.addSource(new GeneratingSourceFunction(numElements, numTaskTotal));
stream.filter(new LongRichFilterFunction()).connect(stream).flatMap(new LeftIdentityCoRichFlatMapFunction()).map(new IdentityMapFunction()).startNewChain().keyBy(0).reduce(new OnceFailingReducer(numElements)).addSink(new DiscardingSink<Tuple1<Long>>());
env.execute();
final long failureCheckpointID = OnceFailingReducer.failureCheckpointID;
assertNotEquals(0L, failureCheckpointID);
List<List<Long>[]> allLists = Arrays.asList(GeneratingSourceFunction.completedCheckpoints, LongRichFilterFunction.completedCheckpoints, LeftIdentityCoRichFlatMapFunction.completedCheckpoints, IdentityMapFunction.completedCheckpoints, OnceFailingReducer.completedCheckpoints);
for (List<Long>[] parallelNotifications : allLists) {
for (List<Long> notifications : parallelNotifications) {
assertTrue("No checkpoint notification was received.", notifications.size() > 0);
assertFalse("Failure checkpoint was marked as completed.", notifications.contains(failureCheckpointID));
assertFalse("No checkpoint received after failure.", notifications.get(notifications.size() - 1) == failureCheckpointID);
assertTrue("Checkpoint notification was received multiple times", notifications.size() == new HashSet<Long>(notifications).size());
}
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class AbstractEventTimeWindowCheckpointingITCase method testPreAggregatedTumblingTimeWindow.
@Test
public void testPreAggregatedTumblingTimeWindow() {
final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
final int WINDOW_SIZE = windowSize();
final int NUM_KEYS = numKeys();
FailingSource.reset();
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
env.setParallelism(PARALLELISM);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.enableCheckpointing(100);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
env.getConfig().disableSysoutLogging();
env.setStateBackend(this.stateBackend);
env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS)).reduce(new ReduceFunction<Tuple2<Long, IntType>>() {
@Override
public Tuple2<Long, IntType> reduce(Tuple2<Long, IntType> a, Tuple2<Long, IntType> b) {
return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
}
}, new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {
private boolean open = false;
@Override
public void open(Configuration parameters) {
assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
open = true;
}
@Override
public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> input, Collector<Tuple4<Long, Long, Long, IntType>> out) {
// validate that the function has been opened properly
assertTrue(open);
for (Tuple2<Long, IntType> in : input) {
out.collect(new Tuple4<>(in.f0, window.getStart(), window.getEnd(), in.f1));
}
}
}).addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE)).setParallelism(1);
tryExecute(env, "Tumbling Window Test");
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class AccumulatorLiveITCase method testStreaming.
@Test
public void testStreaming() throws Exception {
StreamExecutionEnvironment env = new DummyStreamExecutionEnvironment();
env.setParallelism(1);
DataStream<String> input = env.fromCollection(inputData);
input.flatMap(new NotifyingMapper()).writeUsingOutputFormat(new NotifyingOutputFormat()).disableChaining();
jobGraph = env.getStreamGraph().getJobGraph();
jobID = jobGraph.getJobID();
verifyResults();
}
Aggregations