use of org.apache.flink.runtime.jobgraph.IntermediateDataSet in project flink by apache.
the class ExecutionGraphConstructionTest method testMoreThanOneConsumerForIntermediateResult.
@Test
public void testMoreThanOneConsumerForIntermediateResult() {
try {
final JobID jobId = new JobID();
final String jobName = "Test Job Sample Name";
final Configuration cfg = new Configuration();
JobVertex v1 = new JobVertex("vertex1");
JobVertex v2 = new JobVertex("vertex2");
JobVertex v3 = new JobVertex("vertex3");
v1.setParallelism(5);
v2.setParallelism(7);
v3.setParallelism(2);
IntermediateDataSet result = v1.createAndAddResultDataSet(ResultPartitionType.PIPELINED);
v2.connectDataSetAsInput(result, DistributionPattern.ALL_TO_ALL);
v3.connectDataSetAsInput(result, DistributionPattern.ALL_TO_ALL);
List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3));
ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
try {
eg.attachJobGraph(ordered);
fail("Should not be possible");
} catch (RuntimeException e) {
// expected
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.jobgraph.IntermediateDataSet in project flink by apache.
the class ExecutionGraphConstructionTest method testAttachViaIds.
@Test
public void testAttachViaIds() throws Exception {
final JobID jobId = new JobID();
final String jobName = "Test Job Sample Name";
final Configuration cfg = new Configuration();
// construct part one of the execution graph
JobVertex v1 = new JobVertex("vertex1");
JobVertex v2 = new JobVertex("vertex2");
JobVertex v3 = new JobVertex("vertex3");
v1.setParallelism(5);
v2.setParallelism(7);
v3.setParallelism(2);
v1.setInvokableClass(AbstractInvokable.class);
v2.setInvokableClass(AbstractInvokable.class);
v3.setInvokableClass(AbstractInvokable.class);
// this creates an intermediate result for v1
v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
// create results for v2 and v3
IntermediateDataSet v2result = v2.createAndAddResultDataSet(ResultPartitionType.PIPELINED);
IntermediateDataSet v3result_1 = v3.createAndAddResultDataSet(ResultPartitionType.PIPELINED);
IntermediateDataSet v3result_2 = v3.createAndAddResultDataSet(ResultPartitionType.PIPELINED);
List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3));
ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
try {
eg.attachJobGraph(ordered);
} catch (JobException e) {
e.printStackTrace();
fail("Job failed with exception: " + e.getMessage());
}
// attach the second part of the graph
JobVertex v4 = new JobVertex("vertex4");
JobVertex v5 = new JobVertex("vertex5");
v4.setParallelism(11);
v5.setParallelism(4);
v4.setInvokableClass(AbstractInvokable.class);
v5.setInvokableClass(AbstractInvokable.class);
v4.connectIdInput(v2result.getId(), DistributionPattern.ALL_TO_ALL);
v4.connectIdInput(v3result_1.getId(), DistributionPattern.ALL_TO_ALL);
v5.connectNewDataSetAsInput(v4, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
v5.connectIdInput(v3result_2.getId(), DistributionPattern.ALL_TO_ALL);
List<JobVertex> ordered2 = new ArrayList<JobVertex>(Arrays.asList(v4, v5));
try {
eg.attachJobGraph(ordered2);
} catch (JobException e) {
e.printStackTrace();
fail("Job failed with exception: " + e.getMessage());
}
// verify
verifyTestGraph(eg, jobId, v1, v2, v3, v4, v5);
}
use of org.apache.flink.runtime.jobgraph.IntermediateDataSet in project flink by apache.
the class UnionClosedBranchingTest method testUnionClosedBranchingTest.
@Test
public void testUnionClosedBranchingTest() throws Exception {
// -----------------------------------------------------------------------------------------
// Build test program
// -----------------------------------------------------------------------------------------
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(executionMode);
env.setParallelism(4);
DataSet<Tuple1<Integer>> src1 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
DataSet<Tuple1<Integer>> src2 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
DataSet<Tuple1<Integer>> union = src1.union(src2);
DataSet<Tuple2<Integer, Integer>> join = union.join(union).where(0).equalTo(0).projectFirst(0).projectSecond(0);
join.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
SinkPlanNode sinkNode = optimizedPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
// Verify that the compiler correctly sets the expected data exchange modes.
for (Channel channel : joinNode.getInputs()) {
assertEquals("Unexpected data exchange mode between union and join node.", unionToJoin, channel.getDataExchangeMode());
assertEquals("Unexpected ship strategy between union and join node.", unionToJoinStrategy, channel.getShipStrategy());
}
for (SourcePlanNode src : optimizedPlan.getDataSources()) {
for (Channel channel : src.getOutgoingChannels()) {
assertEquals("Unexpected data exchange mode between source and union node.", sourceToUnion, channel.getDataExchangeMode());
assertEquals("Unexpected ship strategy between source and union node.", sourceToUnionStrategy, channel.getShipStrategy());
}
}
// -----------------------------------------------------------------------------------------
// Verify generated JobGraph
// -----------------------------------------------------------------------------------------
JobGraphGenerator jgg = new JobGraphGenerator();
JobGraph jobGraph = jgg.compileJobGraph(optimizedPlan);
List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
// Sanity check for the test setup
assertEquals("Unexpected number of vertices created.", 4, vertices.size());
// Verify all sources
JobVertex[] sources = new JobVertex[] { vertices.get(0), vertices.get(1) };
for (JobVertex src : sources) {
// Sanity check
assertTrue("Unexpected vertex type. Test setup is broken.", src.isInputVertex());
// The union is not translated to an extra union task, but the join uses a union
// input gate to read multiple inputs. The source create a single result per consumer.
assertEquals("Unexpected number of created results.", 2, src.getNumberOfProducedIntermediateDataSets());
for (IntermediateDataSet dataSet : src.getProducedDataSets()) {
ResultPartitionType dsType = dataSet.getResultType();
// Ensure batch exchange unless PIPELINED_FORCE is enabled.
if (!executionMode.equals(ExecutionMode.PIPELINED_FORCED)) {
assertTrue("Expected batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
} else {
assertFalse("Expected non-batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
}
}
}
}
use of org.apache.flink.runtime.jobgraph.IntermediateDataSet in project flink by apache.
the class ExecutionJobVertex method initialize.
protected void initialize(int maxPriorAttemptsHistoryLength, Time timeout, long createTimestamp, SubtaskAttemptNumberStore initialAttemptCounts, CoordinatorStore coordinatorStore) throws JobException {
checkState(parallelismInfo.getParallelism() > 0);
checkState(!isInitialized());
this.taskVertices = new ExecutionVertex[parallelismInfo.getParallelism()];
this.inputs = new ArrayList<>(jobVertex.getInputs().size());
// create the intermediate results
this.producedDataSets = new IntermediateResult[jobVertex.getNumberOfProducedIntermediateDataSets()];
for (int i = 0; i < jobVertex.getProducedDataSets().size(); i++) {
final IntermediateDataSet result = jobVertex.getProducedDataSets().get(i);
this.producedDataSets[i] = new IntermediateResult(result, this, this.parallelismInfo.getParallelism(), result.getResultType());
}
// create all task vertices
for (int i = 0; i < this.parallelismInfo.getParallelism(); i++) {
ExecutionVertex vertex = new ExecutionVertex(this, i, producedDataSets, timeout, createTimestamp, maxPriorAttemptsHistoryLength, initialAttemptCounts.getAttemptCount(i));
this.taskVertices[i] = vertex;
}
// execution vertices
for (IntermediateResult ir : this.producedDataSets) {
if (ir.getNumberOfAssignedPartitions() != this.parallelismInfo.getParallelism()) {
throw new RuntimeException("The intermediate result's partitions were not correctly assigned.");
}
}
final List<SerializedValue<OperatorCoordinator.Provider>> coordinatorProviders = getJobVertex().getOperatorCoordinators();
if (coordinatorProviders.isEmpty()) {
this.operatorCoordinators = Collections.emptyList();
} else {
final ArrayList<OperatorCoordinatorHolder> coordinators = new ArrayList<>(coordinatorProviders.size());
try {
for (final SerializedValue<OperatorCoordinator.Provider> provider : coordinatorProviders) {
coordinators.add(OperatorCoordinatorHolder.create(provider, this, graph.getUserClassLoader(), coordinatorStore));
}
} catch (Exception | LinkageError e) {
IOUtils.closeAllQuietly(coordinators);
throw new JobException("Cannot instantiate the coordinator for operator " + getName(), e);
}
this.operatorCoordinators = Collections.unmodifiableList(coordinators);
}
// set up the input splits, if the vertex has any
try {
@SuppressWarnings("unchecked") InputSplitSource<InputSplit> splitSource = (InputSplitSource<InputSplit>) jobVertex.getInputSplitSource();
if (splitSource != null) {
Thread currentThread = Thread.currentThread();
ClassLoader oldContextClassLoader = currentThread.getContextClassLoader();
currentThread.setContextClassLoader(graph.getUserClassLoader());
try {
inputSplits = splitSource.createInputSplits(this.parallelismInfo.getParallelism());
if (inputSplits != null) {
splitAssigner = splitSource.getInputSplitAssigner(inputSplits);
}
} finally {
currentThread.setContextClassLoader(oldContextClassLoader);
}
} else {
inputSplits = null;
}
} catch (Throwable t) {
throw new JobException("Creating the input splits caused an error: " + t.getMessage(), t);
}
}
use of org.apache.flink.runtime.jobgraph.IntermediateDataSet in project flink by apache.
the class DefaultLogicalResultTest method assertResultsEquals.
static void assertResultsEquals(final Iterable<IntermediateDataSet> results, final Iterable<DefaultLogicalResult> logicalResults) {
final Map<IntermediateDataSetID, DefaultLogicalResult> logicalResultMap = IterableUtils.toStream(logicalResults).collect(Collectors.toMap(DefaultLogicalResult::getId, Function.identity()));
for (IntermediateDataSet result : results) {
final DefaultLogicalResult logicalResult = logicalResultMap.remove(result.getId());
assertNotNull(logicalResult);
assertResultInfoEquals(result, logicalResult);
}
// this ensures the two collections exactly matches
assertEquals(0, logicalResultMap.size());
}
Aggregations