Search in sources :

Example 1 with InputSplitAssigner

use of org.apache.flink.core.io.InputSplitAssigner in project flink by apache.

the class ExecutionGraphConstructionTest method testSetupInputSplits.

@Test
public void testSetupInputSplits() {
    try {
        final InputSplit[] emptySplits = new InputSplit[0];
        InputSplitAssigner assigner1 = mock(InputSplitAssigner.class);
        InputSplitAssigner assigner2 = mock(InputSplitAssigner.class);
        @SuppressWarnings("unchecked") InputSplitSource<InputSplit> source1 = mock(InputSplitSource.class);
        @SuppressWarnings("unchecked") InputSplitSource<InputSplit> source2 = mock(InputSplitSource.class);
        when(source1.createInputSplits(Matchers.anyInt())).thenReturn(emptySplits);
        when(source2.createInputSplits(Matchers.anyInt())).thenReturn(emptySplits);
        when(source1.getInputSplitAssigner(emptySplits)).thenReturn(assigner1);
        when(source2.getInputSplitAssigner(emptySplits)).thenReturn(assigner2);
        final JobID jobId = new JobID();
        final String jobName = "Test Job Sample Name";
        final Configuration cfg = new Configuration();
        JobVertex v1 = new JobVertex("vertex1");
        JobVertex v2 = new JobVertex("vertex2");
        JobVertex v3 = new JobVertex("vertex3");
        JobVertex v4 = new JobVertex("vertex4");
        JobVertex v5 = new JobVertex("vertex5");
        v1.setParallelism(5);
        v2.setParallelism(7);
        v3.setParallelism(2);
        v4.setParallelism(11);
        v5.setParallelism(4);
        v1.setInvokableClass(AbstractInvokable.class);
        v2.setInvokableClass(AbstractInvokable.class);
        v3.setInvokableClass(AbstractInvokable.class);
        v4.setInvokableClass(AbstractInvokable.class);
        v5.setInvokableClass(AbstractInvokable.class);
        v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v4.connectNewDataSetAsInput(v3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v5.connectNewDataSetAsInput(v4, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v5.connectNewDataSetAsInput(v3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v3.setInputSplitSource(source1);
        v5.setInputSplitSource(source2);
        List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3, v4, v5));
        ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
        try {
            eg.attachJobGraph(ordered);
        } catch (JobException e) {
            e.printStackTrace();
            fail("Job failed with exception: " + e.getMessage());
        }
        assertEquals(assigner1, eg.getAllVertices().get(v3.getID()).getSplitAssigner());
        assertEquals(assigner2, eg.getAllVertices().get(v5.getID()).getSplitAssigner());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : InputSplitAssigner(org.apache.flink.core.io.InputSplitAssigner) Configuration(org.apache.flink.configuration.Configuration) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) JobException(org.apache.flink.runtime.JobException) JobException(org.apache.flink.runtime.JobException) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) InputSplit(org.apache.flink.core.io.InputSplit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 2 with InputSplitAssigner

use of org.apache.flink.core.io.InputSplitAssigner in project flink by apache.

the class JobMaster method requestNextInputSplit.

@RpcMethod
public SerializedInputSplit requestNextInputSplit(final UUID leaderSessionID, final JobVertexID vertexID, final ExecutionAttemptID executionAttempt) throws Exception {
    validateLeaderSessionId(leaderSessionID);
    final Execution execution = executionGraph.getRegisteredExecutions().get(executionAttempt);
    if (execution == null) {
        // but TaskManager get some delay to aware of that situation
        if (log.isDebugEnabled()) {
            log.debug("Can not find Execution for attempt {}.", executionAttempt);
        }
        // but we should TaskManager be aware of this
        throw new Exception("Can not find Execution for attempt " + executionAttempt);
    }
    final ExecutionJobVertex vertex = executionGraph.getJobVertex(vertexID);
    if (vertex == null) {
        log.error("Cannot find execution vertex for vertex ID {}.", vertexID);
        throw new Exception("Cannot find execution vertex for vertex ID " + vertexID);
    }
    final InputSplitAssigner splitAssigner = vertex.getSplitAssigner();
    if (splitAssigner == null) {
        log.error("No InputSplitAssigner for vertex ID {}.", vertexID);
        throw new Exception("No InputSplitAssigner for vertex ID " + vertexID);
    }
    final Slot slot = execution.getAssignedResource();
    final int taskId = execution.getVertex().getParallelSubtaskIndex();
    final String host = slot != null ? slot.getTaskManagerLocation().getHostname() : null;
    final InputSplit nextInputSplit = splitAssigner.getNextInputSplit(host, taskId);
    if (log.isDebugEnabled()) {
        log.debug("Send next input split {}.", nextInputSplit);
    }
    try {
        final byte[] serializedInputSplit = InstantiationUtil.serializeObject(nextInputSplit);
        return new SerializedInputSplit(serializedInputSplit);
    } catch (Exception ex) {
        log.error("Could not serialize the next input split of class {}.", nextInputSplit.getClass(), ex);
        IOException reason = new IOException("Could not serialize the next input split of class " + nextInputSplit.getClass() + ".", ex);
        vertex.fail(reason);
        throw reason;
    }
}
Also used : InputSplitAssigner(org.apache.flink.core.io.InputSplitAssigner) Execution(org.apache.flink.runtime.executiongraph.Execution) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Slot(org.apache.flink.runtime.instance.Slot) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) IOException(java.io.IOException) InputSplit(org.apache.flink.core.io.InputSplit) TimeoutException(java.util.concurrent.TimeoutException) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) LeaderIdMismatchException(org.apache.flink.runtime.highavailability.LeaderIdMismatchException) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) RpcEndpoint(org.apache.flink.runtime.rpc.RpcEndpoint) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) RpcMethod(org.apache.flink.runtime.rpc.RpcMethod)

Aggregations

InputSplit (org.apache.flink.core.io.InputSplit)2 InputSplitAssigner (org.apache.flink.core.io.InputSplitAssigner)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 TimeoutException (java.util.concurrent.TimeoutException)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 JobID (org.apache.flink.api.common.JobID)1 Configuration (org.apache.flink.configuration.Configuration)1 JobException (org.apache.flink.runtime.JobException)1 CheckpointException (org.apache.flink.runtime.checkpoint.CheckpointException)1 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)1 Execution (org.apache.flink.runtime.executiongraph.Execution)1 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)1 NoRestartStrategy (org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy)1 LeaderIdMismatchException (org.apache.flink.runtime.highavailability.LeaderIdMismatchException)1 Slot (org.apache.flink.runtime.instance.Slot)1 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)1 PartitionProducerDisposedException (org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException)1 Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)1 AllocatedSlot (org.apache.flink.runtime.jobmanager.slots.AllocatedSlot)1