use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.
the class StreamingJobGraphGeneratorNodeHashTest method testManualHashAssignment.
// ------------------------------------------------------------------------
// Manual hash assignment
// ------------------------------------------------------------------------
/**
* Tests that manual hash assignments are mapped to the same operator ID.
*
* <pre>
* /-> [ (map) ] -> [ (sink)@sink0 ]
* [ (src@source ) ] -+
* \-> [ (map) ] -> [ (sink)@sink1 ]
* </pre>
*
* <pre>
* /-> [ (map) ] -> [ (reduce) ] -> [ (sink)@sink0 ]
* [ (src)@source ] -+
* \-> [ (map) ] -> [ (reduce) ] -> [ (sink)@sink1 ]
* </pre>
*/
@Test
public void testManualHashAssignment() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
env.setParallelism(4);
env.disableOperatorChaining();
DataStream<String> src = env.addSource(new NoOpSourceFunction()).name("source").uid("source");
src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction()).name("sink0").uid("sink0");
src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction()).name("sink1").uid("sink1");
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
Set<JobVertexID> ids = new HashSet<>();
for (JobVertex vertex : jobGraph.getVertices()) {
assertTrue(ids.add(vertex.getID()));
}
// Resubmit a slightly different program
env = StreamExecutionEnvironment.createLocalEnvironment();
env.setParallelism(4);
env.disableOperatorChaining();
src = env.addSource(new NoOpSourceFunction()).map(new NoOpMapFunction()).name("source").uid("source");
src.map(new NoOpMapFunction()).keyBy(new NoOpKeySelector()).reduce(new NoOpReduceFunction()).addSink(new NoOpSinkFunction()).name("sink0").uid("sink0");
src.map(new NoOpMapFunction()).keyBy(new NoOpKeySelector()).reduce(new NoOpReduceFunction()).addSink(new NoOpSinkFunction()).name("sink1").uid("sink1");
JobGraph newJobGraph = env.getStreamGraph().getJobGraph();
assertNotEquals(jobGraph.getJobID(), newJobGraph.getJobID());
for (JobVertex vertex : newJobGraph.getVertices()) {
// Verify that the expected IDs are the same
if (vertex.getName().endsWith("source") || vertex.getName().endsWith("sink0") || vertex.getName().endsWith("sink1")) {
assertTrue(ids.contains(vertex.getID()));
}
}
}
use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.
the class StreamingJobGraphGeneratorNodeHashTest method testNodeHashIdenticalNodes.
/**
* Tests that there are no collisions with two identical intermediate nodes connected to the
* same predecessor.
*
* <pre>
* /-> [ (map) ] -> [ (sink) ]
* [ (src) ] -+
* \-> [ (map) ] -> [ (sink) ]
* </pre>
*/
@Test
public void testNodeHashIdenticalNodes() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
env.setParallelism(4);
env.disableOperatorChaining();
DataStream<String> src = env.addSource(new NoOpSourceFunction());
src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction());
src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction());
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
Set<JobVertexID> vertexIds = new HashSet<>();
for (JobVertex vertex : jobGraph.getVertices()) {
assertTrue(vertexIds.add(vertex.getID()));
}
}
use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.
the class RescalingITCase method testSavepointRescalingNonPartitionedStateCausesException.
/**
* Tests that a job cannot be restarted from a savepoint with a different parallelism if the
* rescaled operator has non-partitioned state.
*
* @throws Exception
*/
@Test
public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception {
final int parallelism = numSlots / 2;
final int parallelism2 = numSlots;
final int maxParallelism = 13;
FiniteDuration timeout = new FiniteDuration(3, TimeUnit.MINUTES);
Deadline deadline = timeout.fromNow();
JobID jobID = null;
ActorGateway jobManager = null;
try {
jobManager = cluster.getLeaderGateway(deadline.timeLeft());
JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);
jobID = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
Object savepointResponse = null;
// wait until the operator is started
StateSourceBase.workStartedLatch.await();
Future<Object> savepointPathFuture = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
FiniteDuration waitingTime = new FiniteDuration(10, TimeUnit.SECONDS);
savepointResponse = Await.result(savepointPathFuture, waitingTime);
assertTrue(String.valueOf(savepointResponse), savepointResponse instanceof JobManagerMessages.TriggerSavepointSuccess);
final String savepointPath = ((JobManagerMessages.TriggerSavepointSuccess) savepointResponse).savepointPath();
Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), deadline.timeLeft());
Future<Object> cancellationResponseFuture = jobManager.ask(new JobManagerMessages.CancelJob(jobID), deadline.timeLeft());
Object cancellationResponse = Await.result(cancellationResponseFuture, deadline.timeLeft());
assertTrue(cancellationResponse instanceof JobManagerMessages.CancellationSuccess);
Await.ready(jobRemovedFuture, deadline.timeLeft());
// job successfully removed
jobID = null;
JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);
scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
jobID = scaledJobGraph.getJobID();
cluster.submitJobAndWait(scaledJobGraph, false);
jobID = null;
} catch (JobExecutionException exception) {
if (exception.getCause() instanceof IllegalStateException) {
// we expect a IllegalStateException wrapped
// in a JobExecutionException, because the job containing non-partitioned state
// is being rescaled
} else {
throw exception;
}
} finally {
// clear any left overs from a possibly failed job
if (jobID != null && jobManager != null) {
Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), timeout);
try {
Await.ready(jobRemovedFuture, timeout);
} catch (TimeoutException | InterruptedException ie) {
fail("Failed while cleaning up the cluster.");
}
}
}
}
use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.
the class RescalingITCase method testSavepointRescalingWithKeyedAndNonPartitionedState.
/**
* Tests that a job with non partitioned state can be restarted from a savepoint with a
* different parallelism if the operator with non-partitioned state are not rescaled.
*
* @throws Exception
*/
@Test
public void testSavepointRescalingWithKeyedAndNonPartitionedState() throws Exception {
int numberKeys = 42;
int numberElements = 1000;
int numberElements2 = 500;
int parallelism = numSlots / 2;
int parallelism2 = numSlots;
int maxParallelism = 13;
FiniteDuration timeout = new FiniteDuration(3, TimeUnit.MINUTES);
Deadline deadline = timeout.fromNow();
ActorGateway jobManager = null;
JobID jobID = null;
try {
jobManager = cluster.getLeaderGateway(deadline.timeLeft());
JobGraph jobGraph = createJobGraphWithKeyedAndNonPartitionedOperatorState(parallelism, maxParallelism, parallelism, numberKeys, numberElements, false, 100);
jobID = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// wait til the sources have emitted numberElements for each key and completed a checkpoint
SubtaskIndexFlatMapper.workCompletedLatch.await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
// verify the current state
Set<Tuple2<Integer, Integer>> actualResult = CollectionSink.getElementsSet();
Set<Tuple2<Integer, Integer>> expectedResult = new HashSet<>();
for (int key = 0; key < numberKeys; key++) {
int keyGroupIndex = KeyGroupRangeAssignment.assignToKeyGroup(key, maxParallelism);
expectedResult.add(Tuple2.of(KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, parallelism, keyGroupIndex), numberElements * key));
}
assertEquals(expectedResult, actualResult);
// clear the CollectionSink set for the restarted job
CollectionSink.clearElementsSet();
Future<Object> savepointPathFuture = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
final String savepointPath = ((JobManagerMessages.TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), deadline.timeLeft());
Future<Object> cancellationResponseFuture = jobManager.ask(new JobManagerMessages.CancelJob(jobID), deadline.timeLeft());
Object cancellationResponse = Await.result(cancellationResponseFuture, deadline.timeLeft());
assertTrue(cancellationResponse instanceof JobManagerMessages.CancellationSuccess);
Await.ready(jobRemovedFuture, deadline.timeLeft());
jobID = null;
JobGraph scaledJobGraph = createJobGraphWithKeyedAndNonPartitionedOperatorState(parallelism2, maxParallelism, parallelism, numberKeys, numberElements + numberElements2, true, 100);
scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
jobID = scaledJobGraph.getJobID();
cluster.submitJobAndWait(scaledJobGraph, false);
jobID = null;
Set<Tuple2<Integer, Integer>> actualResult2 = CollectionSink.getElementsSet();
Set<Tuple2<Integer, Integer>> expectedResult2 = new HashSet<>();
for (int key = 0; key < numberKeys; key++) {
int keyGroupIndex = KeyGroupRangeAssignment.assignToKeyGroup(key, maxParallelism);
expectedResult2.add(Tuple2.of(KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, parallelism2, keyGroupIndex), key * (numberElements + numberElements2)));
}
assertEquals(expectedResult2, actualResult2);
} finally {
// clear the CollectionSink set for the restarted job
CollectionSink.clearElementsSet();
// clear any left overs from a possibly failed job
if (jobID != null && jobManager != null) {
Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), timeout);
try {
Await.ready(jobRemovedFuture, timeout);
} catch (TimeoutException | InterruptedException ie) {
fail("Failed while cleaning up the cluster.");
}
}
}
}
use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.
the class SavepointITCase method testSavepointForJobWithIteration.
@Test
public void testSavepointForJobWithIteration() throws Exception {
for (int i = 0; i < ITER_TEST_PARALLELISM; ++i) {
ITER_TEST_SNAPSHOT_WAIT[i] = new OneShotLatch();
ITER_TEST_RESTORE_WAIT[i] = new OneShotLatch();
ITER_TEST_CHECKPOINT_VERIFY[i] = 0;
}
TemporaryFolder folder = new TemporaryFolder();
folder.create();
// Temporary directory for file state backend
final File tmpDir = folder.newFolder();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final IntegerStreamSource source = new IntegerStreamSource();
IterativeStream<Integer> iteration = env.addSource(source).flatMap(new RichFlatMapFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void flatMap(Integer in, Collector<Integer> clctr) throws Exception {
clctr.collect(in);
}
}).setParallelism(ITER_TEST_PARALLELISM).keyBy(new KeySelector<Integer, Object>() {
private static final long serialVersionUID = 1L;
@Override
public Object getKey(Integer value) throws Exception {
return value;
}
}).flatMap(new DuplicateFilter()).setParallelism(ITER_TEST_PARALLELISM).iterate();
DataStream<Integer> iterationBody = iteration.map(new MapFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Integer map(Integer value) throws Exception {
return value;
}
}).setParallelism(ITER_TEST_PARALLELISM);
iteration.closeWith(iterationBody);
StreamGraph streamGraph = env.getStreamGraph();
streamGraph.setJobName("Test");
JobGraph jobGraph = streamGraph.getJobGraph();
Configuration config = new Configuration();
config.addAll(jobGraph.getJobConfiguration());
config.setLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1L);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 2 * jobGraph.getMaximumParallelism());
final File checkpointDir = new File(tmpDir, "checkpoints");
final File savepointDir = new File(tmpDir, "savepoints");
if (!checkpointDir.mkdir() || !savepointDir.mkdirs()) {
fail("Test setup failed: failed to create temporary directories.");
}
config.setString(CoreOptions.STATE_BACKEND, "filesystem");
config.setString(FsStateBackendFactory.CHECKPOINT_DIRECTORY_URI_CONF_KEY, checkpointDir.toURI().toString());
config.setString(FsStateBackendFactory.MEMORY_THRESHOLD_CONF_KEY, "0");
config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
TestingCluster cluster = new TestingCluster(config, false);
String savepointPath = null;
try {
cluster.start();
cluster.submitJobDetached(jobGraph);
for (OneShotLatch latch : ITER_TEST_SNAPSHOT_WAIT) {
latch.await();
}
savepointPath = cluster.triggerSavepoint(jobGraph.getJobID());
source.cancel();
jobGraph = streamGraph.getJobGraph();
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
cluster.submitJobDetached(jobGraph);
for (OneShotLatch latch : ITER_TEST_RESTORE_WAIT) {
latch.await();
}
source.cancel();
} finally {
if (null != savepointPath) {
cluster.disposeSavepoint(savepointPath);
}
cluster.stop();
cluster.awaitTermination();
}
}
Aggregations