Examples with IntermediateDataSet - org.apache.flink.runtime.jobgraph.IntermediateDataSet

Example 1 with IntermediateDataSet

use of org.apache.flink.runtime.jobgraph.IntermediateDataSet in project flink by apache.

the class ExecutionGraphConstructionTest method testMoreThanOneConsumerForIntermediateResult.

@Test
public void testMoreThanOneConsumerForIntermediateResult() {
    try {
        final JobID jobId = new JobID();
        final String jobName = "Test Job Sample Name";
        final Configuration cfg = new Configuration();
        JobVertex v1 = new JobVertex("vertex1");
        JobVertex v2 = new JobVertex("vertex2");
        JobVertex v3 = new JobVertex("vertex3");
        v1.setParallelism(5);
        v2.setParallelism(7);
        v3.setParallelism(2);
        IntermediateDataSet result = v1.createAndAddResultDataSet(ResultPartitionType.PIPELINED);
        v2.connectDataSetAsInput(result, DistributionPattern.ALL_TO_ALL);
        v3.connectDataSetAsInput(result, DistributionPattern.ALL_TO_ALL);
        List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3));
        ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
        try {
            eg.attachJobGraph(ordered);
            fail("Should not be possible");
        } catch (RuntimeException e) {
        // expected
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}

Also used : IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) Configuration(org.apache.flink.configuration.Configuration) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) JobException(org.apache.flink.runtime.JobException) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 2 with IntermediateDataSet

use of org.apache.flink.runtime.jobgraph.IntermediateDataSet in project flink by apache.

the class ExecutionGraphConstructionTest method testAttachViaIds.

@Test
public void testAttachViaIds() throws Exception {
    final JobID jobId = new JobID();
    final String jobName = "Test Job Sample Name";
    final Configuration cfg = new Configuration();
    // construct part one of the execution graph
    JobVertex v1 = new JobVertex("vertex1");
    JobVertex v2 = new JobVertex("vertex2");
    JobVertex v3 = new JobVertex("vertex3");
    v1.setParallelism(5);
    v2.setParallelism(7);
    v3.setParallelism(2);
    v1.setInvokableClass(AbstractInvokable.class);
    v2.setInvokableClass(AbstractInvokable.class);
    v3.setInvokableClass(AbstractInvokable.class);
    // this creates an intermediate result for v1
    v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
    // create results for v2 and v3
    IntermediateDataSet v2result = v2.createAndAddResultDataSet(ResultPartitionType.PIPELINED);
    IntermediateDataSet v3result_1 = v3.createAndAddResultDataSet(ResultPartitionType.PIPELINED);
    IntermediateDataSet v3result_2 = v3.createAndAddResultDataSet(ResultPartitionType.PIPELINED);
    List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3));
    ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
    try {
        eg.attachJobGraph(ordered);
    } catch (JobException e) {
        e.printStackTrace();
        fail("Job failed with exception: " + e.getMessage());
    }
    // attach the second part of the graph
    JobVertex v4 = new JobVertex("vertex4");
    JobVertex v5 = new JobVertex("vertex5");
    v4.setParallelism(11);
    v5.setParallelism(4);
    v4.setInvokableClass(AbstractInvokable.class);
    v5.setInvokableClass(AbstractInvokable.class);
    v4.connectIdInput(v2result.getId(), DistributionPattern.ALL_TO_ALL);
    v4.connectIdInput(v3result_1.getId(), DistributionPattern.ALL_TO_ALL);
    v5.connectNewDataSetAsInput(v4, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
    v5.connectIdInput(v3result_2.getId(), DistributionPattern.ALL_TO_ALL);
    List<JobVertex> ordered2 = new ArrayList<JobVertex>(Arrays.asList(v4, v5));
    try {
        eg.attachJobGraph(ordered2);
    } catch (JobException e) {
        e.printStackTrace();
        fail("Job failed with exception: " + e.getMessage());
    }
    // verify
    verifyTestGraph(eg, jobId, v1, v2, v3, v4, v5);
}

Example 3 with IntermediateDataSet

use of org.apache.flink.runtime.jobgraph.IntermediateDataSet in project flink by apache.

the class UnionClosedBranchingTest method testUnionClosedBranchingTest.

@Test
public void testUnionClosedBranchingTest() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setExecutionMode(executionMode);
    env.setParallelism(4);
    DataSet<Tuple1<Integer>> src1 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
    DataSet<Tuple1<Integer>> src2 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
    DataSet<Tuple1<Integer>> union = src1.union(src2);
    DataSet<Tuple2<Integer, Integer>> join = union.join(union).where(0).equalTo(0).projectFirst(0).projectSecond(0);
    join.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    SinkPlanNode sinkNode = optimizedPlan.getDataSinks().iterator().next();
    DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
    // Verify that the compiler correctly sets the expected data exchange modes.
    for (Channel channel : joinNode.getInputs()) {
        assertEquals("Unexpected data exchange mode between union and join node.", unionToJoin, channel.getDataExchangeMode());
        assertEquals("Unexpected ship strategy between union and join node.", unionToJoinStrategy, channel.getShipStrategy());
    }
    for (SourcePlanNode src : optimizedPlan.getDataSources()) {
        for (Channel channel : src.getOutgoingChannels()) {
            assertEquals("Unexpected data exchange mode between source and union node.", sourceToUnion, channel.getDataExchangeMode());
            assertEquals("Unexpected ship strategy between source and union node.", sourceToUnionStrategy, channel.getShipStrategy());
        }
    }
    // -----------------------------------------------------------------------------------------
    // Verify generated JobGraph
    // -----------------------------------------------------------------------------------------
    JobGraphGenerator jgg = new JobGraphGenerator();
    JobGraph jobGraph = jgg.compileJobGraph(optimizedPlan);
    List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
    // Sanity check for the test setup
    assertEquals("Unexpected number of vertices created.", 4, vertices.size());
    // Verify all sources
    JobVertex[] sources = new JobVertex[] { vertices.get(0), vertices.get(1) };
    for (JobVertex src : sources) {
        // Sanity check
        assertTrue("Unexpected vertex type. Test setup is broken.", src.isInputVertex());
        // The union is not translated to an extra union task, but the join uses a union
        // input gate to read multiple inputs. The source create a single result per consumer.
        assertEquals("Unexpected number of created results.", 2, src.getNumberOfProducedIntermediateDataSets());
        for (IntermediateDataSet dataSet : src.getProducedDataSets()) {
            ResultPartitionType dsType = dataSet.getResultType();
            // Ensure batch exchange unless PIPELINED_FORCE is enabled.
            if (!executionMode.equals(ExecutionMode.PIPELINED_FORCED)) {
                assertTrue("Expected batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
            } else {
                assertFalse("Expected non-batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
            }
        }
    }
}

Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) Channel(org.apache.flink.optimizer.plan.Channel) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Example 4 with IntermediateDataSet

use of org.apache.flink.runtime.jobgraph.IntermediateDataSet in project flink by apache.

the class ExecutionJobVertex method initialize.

protected void initialize(int maxPriorAttemptsHistoryLength, Time timeout, long createTimestamp, SubtaskAttemptNumberStore initialAttemptCounts, CoordinatorStore coordinatorStore) throws JobException {
    checkState(parallelismInfo.getParallelism() > 0);
    checkState(!isInitialized());
    this.taskVertices = new ExecutionVertex[parallelismInfo.getParallelism()];
    this.inputs = new ArrayList<>(jobVertex.getInputs().size());
    // create the intermediate results
    this.producedDataSets = new IntermediateResult[jobVertex.getNumberOfProducedIntermediateDataSets()];
    for (int i = 0; i < jobVertex.getProducedDataSets().size(); i++) {
        final IntermediateDataSet result = jobVertex.getProducedDataSets().get(i);
        this.producedDataSets[i] = new IntermediateResult(result, this, this.parallelismInfo.getParallelism(), result.getResultType());
    }
    // create all task vertices
    for (int i = 0; i < this.parallelismInfo.getParallelism(); i++) {
        ExecutionVertex vertex = new ExecutionVertex(this, i, producedDataSets, timeout, createTimestamp, maxPriorAttemptsHistoryLength, initialAttemptCounts.getAttemptCount(i));
        this.taskVertices[i] = vertex;
    }
    // execution vertices
    for (IntermediateResult ir : this.producedDataSets) {
        if (ir.getNumberOfAssignedPartitions() != this.parallelismInfo.getParallelism()) {
            throw new RuntimeException("The intermediate result's partitions were not correctly assigned.");
        }
    }
    final List<SerializedValue<OperatorCoordinator.Provider>> coordinatorProviders = getJobVertex().getOperatorCoordinators();
    if (coordinatorProviders.isEmpty()) {
        this.operatorCoordinators = Collections.emptyList();
    } else {
        final ArrayList<OperatorCoordinatorHolder> coordinators = new ArrayList<>(coordinatorProviders.size());
        try {
            for (final SerializedValue<OperatorCoordinator.Provider> provider : coordinatorProviders) {
                coordinators.add(OperatorCoordinatorHolder.create(provider, this, graph.getUserClassLoader(), coordinatorStore));
            }
        } catch (Exception | LinkageError e) {
            IOUtils.closeAllQuietly(coordinators);
            throw new JobException("Cannot instantiate the coordinator for operator " + getName(), e);
        }
        this.operatorCoordinators = Collections.unmodifiableList(coordinators);
    }
    // set up the input splits, if the vertex has any
    try {
        @SuppressWarnings("unchecked") InputSplitSource<InputSplit> splitSource = (InputSplitSource<InputSplit>) jobVertex.getInputSplitSource();
        if (splitSource != null) {
            Thread currentThread = Thread.currentThread();
            ClassLoader oldContextClassLoader = currentThread.getContextClassLoader();
            currentThread.setContextClassLoader(graph.getUserClassLoader());
            try {
                inputSplits = splitSource.createInputSplits(this.parallelismInfo.getParallelism());
                if (inputSplits != null) {
                    splitAssigner = splitSource.getInputSplitAssigner(inputSplits);
                }
            } finally {
                currentThread.setContextClassLoader(oldContextClassLoader);
            }
        } else {
            inputSplits = null;
        }
    } catch (Throwable t) {
        throw new JobException("Creating the input splits caused an error: " + t.getMessage(), t);
    }
}

Also used : IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) ArrayList(java.util.ArrayList) JobException(org.apache.flink.runtime.JobException) InputSplitSource(org.apache.flink.core.io.InputSplitSource) InputSplit(org.apache.flink.core.io.InputSplit) OperatorCoordinator(org.apache.flink.runtime.operators.coordination.OperatorCoordinator) SerializedValue(org.apache.flink.util.SerializedValue) JobException(org.apache.flink.runtime.JobException) IOException(java.io.IOException) OperatorCoordinatorHolder(org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder)

Example 5 with IntermediateDataSet

use of org.apache.flink.runtime.jobgraph.IntermediateDataSet in project flink by apache.

the class DefaultLogicalResultTest method assertResultsEquals.

static void assertResultsEquals(final Iterable<IntermediateDataSet> results, final Iterable<DefaultLogicalResult> logicalResults) {
    final Map<IntermediateDataSetID, DefaultLogicalResult> logicalResultMap = IterableUtils.toStream(logicalResults).collect(Collectors.toMap(DefaultLogicalResult::getId, Function.identity()));
    for (IntermediateDataSet result : results) {
        final DefaultLogicalResult logicalResult = logicalResultMap.remove(result.getId());
        assertNotNull(logicalResult);
        assertResultInfoEquals(result, logicalResult);
    }
    // this ensures the two collections exactly matches
    assertEquals(0, logicalResultMap.size());
}

Also used : IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID)

Aggregations

IntermediateDataSet (org.apache.flink.runtime.jobgraph.IntermediateDataSet)11 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)6 ArrayList (java.util.ArrayList)5 JobException (org.apache.flink.runtime.JobException)4 Test (org.junit.Test)4 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)3 JobID (org.apache.flink.api.common.JobID)3 Configuration (org.apache.flink.configuration.Configuration)3 NoRestartStrategy (org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy)3 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)3 JobEdge (org.apache.flink.runtime.jobgraph.JobEdge)3 Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)3 HashMap (java.util.HashMap)2 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)2 IOException (java.io.IOException)1 String.format (java.lang.String.format)1 Collection (java.util.Collection)1 Collections.emptyList (java.util.Collections.emptyList)1 List (java.util.List)1 ListIterator (java.util.ListIterator)1