use of org.apache.flink.runtime.jobmanager.scheduler.Scheduler in project flink by apache.
the class ExecutionGraphConstructionTest method testAttachViaDataSets.
@Test
public void testAttachViaDataSets() throws Exception {
final JobID jobId = new JobID();
final String jobName = "Test Job Sample Name";
final Configuration cfg = new Configuration();
// construct part one of the execution graph
JobVertex v1 = new JobVertex("vertex1");
JobVertex v2 = new JobVertex("vertex2");
JobVertex v3 = new JobVertex("vertex3");
v1.setParallelism(5);
v2.setParallelism(7);
v3.setParallelism(2);
v1.setInvokableClass(AbstractInvokable.class);
v2.setInvokableClass(AbstractInvokable.class);
v3.setInvokableClass(AbstractInvokable.class);
// this creates an intermediate result for v1
v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
// create results for v2 and v3
IntermediateDataSet v2result = v2.createAndAddResultDataSet(ResultPartitionType.PIPELINED);
IntermediateDataSet v3result_1 = v3.createAndAddResultDataSet(ResultPartitionType.PIPELINED);
IntermediateDataSet v3result_2 = v3.createAndAddResultDataSet(ResultPartitionType.PIPELINED);
List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3));
ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
try {
eg.attachJobGraph(ordered);
} catch (JobException e) {
e.printStackTrace();
fail("Job failed with exception: " + e.getMessage());
}
// attach the second part of the graph
JobVertex v4 = new JobVertex("vertex4");
JobVertex v5 = new JobVertex("vertex5");
v4.setParallelism(11);
v5.setParallelism(4);
v4.setInvokableClass(AbstractInvokable.class);
v5.setInvokableClass(AbstractInvokable.class);
v4.connectDataSetAsInput(v2result, DistributionPattern.ALL_TO_ALL);
v4.connectDataSetAsInput(v3result_1, DistributionPattern.ALL_TO_ALL);
v5.connectNewDataSetAsInput(v4, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
v5.connectDataSetAsInput(v3result_2, DistributionPattern.ALL_TO_ALL);
List<JobVertex> ordered2 = new ArrayList<JobVertex>(Arrays.asList(v4, v5));
try {
eg.attachJobGraph(ordered2);
} catch (JobException e) {
e.printStackTrace();
fail("Job failed with exception: " + e.getMessage());
}
// verify
verifyTestGraph(eg, jobId, v1, v2, v3, v4, v5);
}
use of org.apache.flink.runtime.jobmanager.scheduler.Scheduler in project flink by apache.
the class ExecutionGraphConstructionTest method testCannotConnectMissingId.
@Test
public void testCannotConnectMissingId() throws Exception {
final JobID jobId = new JobID();
final String jobName = "Test Job Sample Name";
final Configuration cfg = new Configuration();
// construct part one of the execution graph
JobVertex v1 = new JobVertex("vertex1");
v1.setParallelism(7);
v1.setInvokableClass(AbstractInvokable.class);
List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1));
ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
try {
eg.attachJobGraph(ordered);
} catch (JobException e) {
e.printStackTrace();
fail("Job failed with exception: " + e.getMessage());
}
// attach the second part of the graph
JobVertex v2 = new JobVertex("vertex2");
v2.setInvokableClass(AbstractInvokable.class);
v2.connectIdInput(new IntermediateDataSetID(), DistributionPattern.ALL_TO_ALL);
List<JobVertex> ordered2 = new ArrayList<JobVertex>(Arrays.asList(v2));
try {
eg.attachJobGraph(ordered2);
fail("Attached wrong jobgraph");
} catch (JobException e) {
// expected
}
}
use of org.apache.flink.runtime.jobmanager.scheduler.Scheduler in project flink by apache.
the class PointwisePatternTest method testNToN.
@Test
public void testNToN() throws Exception {
final int N = 23;
JobVertex v1 = new JobVertex("vertex1");
JobVertex v2 = new JobVertex("vertex2");
v1.setParallelism(N);
v2.setParallelism(N);
v1.setInvokableClass(AbstractInvokable.class);
v2.setInvokableClass(AbstractInvokable.class);
v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2));
ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
try {
eg.attachJobGraph(ordered);
} catch (JobException e) {
e.printStackTrace();
fail("Job failed with exception: " + e.getMessage());
}
ExecutionJobVertex target = eg.getAllVertices().get(v2.getID());
for (ExecutionVertex ev : target.getTaskVertices()) {
assertEquals(1, ev.getNumberOfInputs());
ExecutionEdge[] inEdges = ev.getInputEdges(0);
assertEquals(1, inEdges.length);
assertEquals(ev.getParallelSubtaskIndex(), inEdges[0].getSource().getPartitionNumber());
}
}
use of org.apache.flink.runtime.jobmanager.scheduler.Scheduler in project flink by apache.
the class PointwisePatternTest method testHighToLow.
private void testHighToLow(int highDop, int lowDop) throws Exception {
if (highDop < lowDop) {
throw new IllegalArgumentException();
}
final int factor = highDop / lowDop;
final int delta = highDop % lowDop == 0 ? 0 : 1;
JobVertex v1 = new JobVertex("vertex1");
JobVertex v2 = new JobVertex("vertex2");
v1.setParallelism(highDop);
v2.setParallelism(lowDop);
v1.setInvokableClass(AbstractInvokable.class);
v2.setInvokableClass(AbstractInvokable.class);
v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2));
ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
try {
eg.attachJobGraph(ordered);
} catch (JobException e) {
e.printStackTrace();
fail("Job failed with exception: " + e.getMessage());
}
ExecutionJobVertex target = eg.getAllVertices().get(v2.getID());
int[] timesUsed = new int[highDop];
for (ExecutionVertex ev : target.getTaskVertices()) {
assertEquals(1, ev.getNumberOfInputs());
ExecutionEdge[] inEdges = ev.getInputEdges(0);
assertTrue(inEdges.length >= factor && inEdges.length <= factor + delta);
for (ExecutionEdge ee : inEdges) {
timesUsed[ee.getSource().getPartitionNumber()]++;
}
}
for (int used : timesUsed) {
assertEquals(1, used);
}
}
use of org.apache.flink.runtime.jobmanager.scheduler.Scheduler in project flink by apache.
the class RescalePartitionerTest method testExecutionGraphGeneration.
@Test
public void testExecutionGraphGeneration() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
// get input data
DataStream<String> text = env.addSource(new ParallelSourceFunction<String>() {
private static final long serialVersionUID = 7772338606389180774L;
@Override
public void run(SourceContext<String> ctx) throws Exception {
}
@Override
public void cancel() {
}
}).setParallelism(2);
DataStream<Tuple2<String, Integer>> counts = text.rescale().flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
private static final long serialVersionUID = -5255930322161596829L;
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
}
});
counts.rescale().print().setParallelism(2);
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
final JobID jobId = new JobID();
final String jobName = "Semi-Rebalance Test Job";
final Configuration cfg = new Configuration();
List<JobVertex> jobVertices = jobGraph.getVerticesSortedTopologicallyFromSources();
JobVertex sourceVertex = jobVertices.get(0);
JobVertex mapVertex = jobVertices.get(1);
JobVertex sinkVertex = jobVertices.get(2);
assertEquals(2, sourceVertex.getParallelism());
assertEquals(4, mapVertex.getParallelism());
assertEquals(2, sinkVertex.getParallelism());
ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new ArrayList<BlobKey>(), new ArrayList<URL>(), new Scheduler(TestingUtils.defaultExecutionContext()), ExecutionGraph.class.getClassLoader(), new UnregisteredMetricsGroup());
try {
eg.attachJobGraph(jobVertices);
} catch (JobException e) {
e.printStackTrace();
fail("Building ExecutionGraph failed: " + e.getMessage());
}
ExecutionJobVertex execSourceVertex = eg.getJobVertex(sourceVertex.getID());
ExecutionJobVertex execMapVertex = eg.getJobVertex(mapVertex.getID());
ExecutionJobVertex execSinkVertex = eg.getJobVertex(sinkVertex.getID());
assertEquals(0, execSourceVertex.getInputs().size());
assertEquals(1, execMapVertex.getInputs().size());
assertEquals(4, execMapVertex.getParallelism());
ExecutionVertex[] mapTaskVertices = execMapVertex.getTaskVertices();
// verify that we have each parallel input partition exactly twice, i.e. that one source
// sends to two unique mappers
Map<Integer, Integer> mapInputPartitionCounts = new HashMap<>();
for (ExecutionVertex mapTaskVertex : mapTaskVertices) {
assertEquals(1, mapTaskVertex.getNumberOfInputs());
assertEquals(1, mapTaskVertex.getInputEdges(0).length);
ExecutionEdge inputEdge = mapTaskVertex.getInputEdges(0)[0];
assertEquals(sourceVertex.getID(), inputEdge.getSource().getProducer().getJobvertexId());
int inputPartition = inputEdge.getSource().getPartitionNumber();
if (!mapInputPartitionCounts.containsKey(inputPartition)) {
mapInputPartitionCounts.put(inputPartition, 1);
} else {
mapInputPartitionCounts.put(inputPartition, mapInputPartitionCounts.get(inputPartition) + 1);
}
}
assertEquals(2, mapInputPartitionCounts.size());
for (int count : mapInputPartitionCounts.values()) {
assertEquals(2, count);
}
assertEquals(1, execSinkVertex.getInputs().size());
assertEquals(2, execSinkVertex.getParallelism());
ExecutionVertex[] sinkTaskVertices = execSinkVertex.getTaskVertices();
// verify each sink instance has two inputs from the map and that each map subpartition
// only occurs in one unique input edge
Set<Integer> mapSubpartitions = new HashSet<>();
for (ExecutionVertex sinkTaskVertex : sinkTaskVertices) {
assertEquals(1, sinkTaskVertex.getNumberOfInputs());
assertEquals(2, sinkTaskVertex.getInputEdges(0).length);
ExecutionEdge inputEdge1 = sinkTaskVertex.getInputEdges(0)[0];
ExecutionEdge inputEdge2 = sinkTaskVertex.getInputEdges(0)[1];
assertEquals(mapVertex.getID(), inputEdge1.getSource().getProducer().getJobvertexId());
assertEquals(mapVertex.getID(), inputEdge2.getSource().getProducer().getJobvertexId());
int inputPartition1 = inputEdge1.getSource().getPartitionNumber();
assertFalse(mapSubpartitions.contains(inputPartition1));
mapSubpartitions.add(inputPartition1);
int inputPartition2 = inputEdge2.getSource().getPartitionNumber();
assertFalse(mapSubpartitions.contains(inputPartition2));
mapSubpartitions.add(inputPartition2);
}
assertEquals(4, mapSubpartitions.size());
}
Aggregations