Search in sources :

Example 1 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class JobTaskVertexTest method testInputFormatVertex.

@Test
public void testInputFormatVertex() {
    try {
        final TestInputFormat inputFormat = new TestInputFormat();
        final InputFormatVertex vertex = new InputFormatVertex("Name");
        new TaskConfig(vertex.getConfiguration()).setStubWrapper(new UserCodeObjectWrapper<InputFormat<?, ?>>(inputFormat));
        final ClassLoader cl = getClass().getClassLoader();
        vertex.initializeOnMaster(cl);
        InputSplit[] splits = vertex.getInputSplitSource().createInputSplits(77);
        assertNotNull(splits);
        assertEquals(1, splits.length);
        assertEquals(TestSplit.class, splits[0].getClass());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : GenericInputFormat(org.apache.flink.api.common.io.GenericInputFormat) InputFormat(org.apache.flink.api.common.io.InputFormat) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) GenericInputSplit(org.apache.flink.core.io.GenericInputSplit) InputSplit(org.apache.flink.core.io.InputSplit) IOException(java.io.IOException) Test(org.junit.Test)

Example 2 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class ExecutionJobVertex method computeLocalInputSplitsPerTask.

// --------------------------------------------------------------------------------------------
//  Static / pre-assigned input splits
// --------------------------------------------------------------------------------------------
private List<LocatableInputSplit>[] computeLocalInputSplitsPerTask(InputSplit[] splits) throws JobException {
    final int numSubTasks = getParallelism();
    // sanity check
    if (numSubTasks > splits.length) {
        throw new JobException("Strictly local assignment requires at least as many splits as subtasks.");
    }
    // group the splits by host while preserving order per host
    Map<String, List<LocatableInputSplit>> splitsByHost = new HashMap<String, List<LocatableInputSplit>>();
    for (InputSplit split : splits) {
        // check that split has exactly one local host
        if (!(split instanceof LocatableInputSplit)) {
            throw new JobException("Invalid InputSplit type " + split.getClass().getCanonicalName() + ". " + "Strictly local assignment requires LocatableInputSplit");
        }
        LocatableInputSplit lis = (LocatableInputSplit) split;
        if (lis.getHostnames() == null) {
            throw new JobException("LocatableInputSplit has no host information. " + "Strictly local assignment requires exactly one hostname for each LocatableInputSplit.");
        } else if (lis.getHostnames().length != 1) {
            throw new JobException("Strictly local assignment requires exactly one hostname for each LocatableInputSplit.");
        }
        String hostName = lis.getHostnames()[0];
        if (hostName == null) {
            throw new JobException("For strictly local input split assignment, no null host names are allowed.");
        }
        List<LocatableInputSplit> hostSplits = splitsByHost.get(hostName);
        if (hostSplits == null) {
            hostSplits = new ArrayList<LocatableInputSplit>();
            splitsByHost.put(hostName, hostSplits);
        }
        hostSplits.add(lis);
    }
    int numHosts = splitsByHost.size();
    if (numSubTasks < numHosts) {
        throw new JobException("Strictly local split assignment requires at least as " + "many parallel subtasks as distinct split hosts. Please increase the parallelism " + "of DataSource " + this.getJobVertex().getName() + " to at least " + numHosts + ".");
    }
    // get list of hosts in deterministic order
    List<String> hosts = new ArrayList<String>(splitsByHost.keySet());
    Collections.sort(hosts);
    @SuppressWarnings("unchecked") List<LocatableInputSplit>[] subTaskSplitAssignment = (List<LocatableInputSplit>[]) new List<?>[numSubTasks];
    final int subtasksPerHost = numSubTasks / numHosts;
    final int hostsWithOneMore = numSubTasks % numHosts;
    int subtaskNum = 0;
    // over the subtasks
    for (int hostNum = 0; hostNum < numHosts; hostNum++) {
        String host = hosts.get(hostNum);
        List<LocatableInputSplit> splitsOnHost = splitsByHost.get(host);
        int numSplitsOnHost = splitsOnHost.size();
        // the number of subtasks to split this over.
        // NOTE: if the host has few splits, some subtasks will not get anything.
        int subtasks = Math.min(numSplitsOnHost, hostNum < hostsWithOneMore ? subtasksPerHost + 1 : subtasksPerHost);
        int splitsPerSubtask = numSplitsOnHost / subtasks;
        int subtasksWithOneMore = numSplitsOnHost % subtasks;
        int splitnum = 0;
        // go over the subtasks and grab a subrange of the input splits
        for (int i = 0; i < subtasks; i++) {
            int numSplitsForSubtask = (i < subtasksWithOneMore ? splitsPerSubtask + 1 : splitsPerSubtask);
            List<LocatableInputSplit> splitList;
            if (numSplitsForSubtask == numSplitsOnHost) {
                splitList = splitsOnHost;
            } else {
                splitList = new ArrayList<LocatableInputSplit>(numSplitsForSubtask);
                for (int k = 0; k < numSplitsForSubtask; k++) {
                    splitList.add(splitsOnHost.get(splitnum++));
                }
            }
            subTaskSplitAssignment[subtaskNum++] = splitList;
        }
    }
    return subTaskSplitAssignment;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) JobException(org.apache.flink.runtime.JobException) LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit) ArrayList(java.util.ArrayList) List(java.util.List) InputSplit(org.apache.flink.core.io.InputSplit) LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit)

Example 3 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class TaskInputSplitProvider method getNextInputSplit.

@Override
public InputSplit getNextInputSplit(ClassLoader userCodeClassLoader) throws InputSplitProviderException {
    Preconditions.checkNotNull(userCodeClassLoader);
    final Future<Object> response = jobManager.ask(new JobManagerMessages.RequestNextInputSplit(jobID, vertexID, executionID), timeout);
    final Object result;
    try {
        result = Await.result(response, timeout);
    } catch (Exception e) {
        throw new InputSplitProviderException("Did not receive next input split from JobManager.", e);
    }
    if (result instanceof JobManagerMessages.NextInputSplit) {
        final JobManagerMessages.NextInputSplit nextInputSplit = (JobManagerMessages.NextInputSplit) result;
        byte[] serializedData = nextInputSplit.splitData();
        if (serializedData == null) {
            return null;
        } else {
            final Object deserialized;
            try {
                deserialized = InstantiationUtil.deserializeObject(serializedData, userCodeClassLoader);
            } catch (Exception e) {
                throw new InputSplitProviderException("Could not deserialize the serialized input split.", e);
            }
            return (InputSplit) deserialized;
        }
    } else {
        throw new InputSplitProviderException("RequestNextInputSplit requires a response of type " + "NextInputSplit. Instead response is of type " + result.getClass() + '.');
    }
}
Also used : JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) InputSplitProviderException(org.apache.flink.runtime.jobgraph.tasks.InputSplitProviderException) InputSplit(org.apache.flink.core.io.InputSplit) InputSplitProviderException(org.apache.flink.runtime.jobgraph.tasks.InputSplitProviderException)

Example 4 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class DataSourceTask method getInputSplits.

private Iterator<InputSplit> getInputSplits() {
    final InputSplitProvider provider = getEnvironment().getInputSplitProvider();
    return new Iterator<InputSplit>() {

        private InputSplit nextSplit;

        private boolean exhausted;

        @Override
        public boolean hasNext() {
            if (exhausted) {
                return false;
            }
            if (nextSplit != null) {
                return true;
            }
            final InputSplit split;
            try {
                split = provider.getNextInputSplit(getUserCodeClassLoader());
            } catch (InputSplitProviderException e) {
                throw new RuntimeException("Could not retrieve next input split.", e);
            }
            if (split != null) {
                this.nextSplit = split;
                return true;
            } else {
                exhausted = true;
                return false;
            }
        }

        @Override
        public InputSplit next() {
            if (this.nextSplit == null && !hasNext()) {
                throw new NoSuchElementException();
            }
            final InputSplit tmp = this.nextSplit;
            this.nextSplit = null;
            return tmp;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    };
}
Also used : Iterator(java.util.Iterator) InputSplitProviderException(org.apache.flink.runtime.jobgraph.tasks.InputSplitProviderException) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) InputSplit(org.apache.flink.core.io.InputSplit) NoSuchElementException(java.util.NoSuchElementException)

Example 5 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class ExecutionGraphConstructionTest method testSetupInputSplits.

@Test
public void testSetupInputSplits() {
    try {
        final InputSplit[] emptySplits = new InputSplit[0];
        InputSplitAssigner assigner1 = mock(InputSplitAssigner.class);
        InputSplitAssigner assigner2 = mock(InputSplitAssigner.class);
        @SuppressWarnings("unchecked") InputSplitSource<InputSplit> source1 = mock(InputSplitSource.class);
        @SuppressWarnings("unchecked") InputSplitSource<InputSplit> source2 = mock(InputSplitSource.class);
        when(source1.createInputSplits(Matchers.anyInt())).thenReturn(emptySplits);
        when(source2.createInputSplits(Matchers.anyInt())).thenReturn(emptySplits);
        when(source1.getInputSplitAssigner(emptySplits)).thenReturn(assigner1);
        when(source2.getInputSplitAssigner(emptySplits)).thenReturn(assigner2);
        final JobID jobId = new JobID();
        final String jobName = "Test Job Sample Name";
        final Configuration cfg = new Configuration();
        JobVertex v1 = new JobVertex("vertex1");
        JobVertex v2 = new JobVertex("vertex2");
        JobVertex v3 = new JobVertex("vertex3");
        JobVertex v4 = new JobVertex("vertex4");
        JobVertex v5 = new JobVertex("vertex5");
        v1.setParallelism(5);
        v2.setParallelism(7);
        v3.setParallelism(2);
        v4.setParallelism(11);
        v5.setParallelism(4);
        v1.setInvokableClass(AbstractInvokable.class);
        v2.setInvokableClass(AbstractInvokable.class);
        v3.setInvokableClass(AbstractInvokable.class);
        v4.setInvokableClass(AbstractInvokable.class);
        v5.setInvokableClass(AbstractInvokable.class);
        v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v4.connectNewDataSetAsInput(v3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v5.connectNewDataSetAsInput(v4, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v5.connectNewDataSetAsInput(v3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v3.setInputSplitSource(source1);
        v5.setInputSplitSource(source2);
        List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3, v4, v5));
        ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
        try {
            eg.attachJobGraph(ordered);
        } catch (JobException e) {
            e.printStackTrace();
            fail("Job failed with exception: " + e.getMessage());
        }
        assertEquals(assigner1, eg.getAllVertices().get(v3.getID()).getSplitAssigner());
        assertEquals(assigner2, eg.getAllVertices().get(v5.getID()).getSplitAssigner());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : InputSplitAssigner(org.apache.flink.core.io.InputSplitAssigner) Configuration(org.apache.flink.configuration.Configuration) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) JobException(org.apache.flink.runtime.JobException) JobException(org.apache.flink.runtime.JobException) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) InputSplit(org.apache.flink.core.io.InputSplit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

InputSplit (org.apache.flink.core.io.InputSplit)21 Test (org.junit.Test)12 HashSet (java.util.HashSet)7 LocatableInputSplit (org.apache.flink.core.io.LocatableInputSplit)6 LocatableInputSplitAssigner (org.apache.flink.api.common.io.LocatableInputSplitAssigner)5 InputSplitProviderException (org.apache.flink.runtime.jobgraph.tasks.InputSplitProviderException)4 ArrayList (java.util.ArrayList)3 NoSuchElementException (java.util.NoSuchElementException)3 GenericInputSplit (org.apache.flink.core.io.GenericInputSplit)3 IOException (java.io.IOException)2 Iterator (java.util.Iterator)2 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)2 JobID (org.apache.flink.api.common.JobID)2 DefaultInputSplitAssigner (org.apache.flink.api.common.io.DefaultInputSplitAssigner)2 InputFormat (org.apache.flink.api.common.io.InputFormat)2 RichInputFormat (org.apache.flink.api.common.io.RichInputFormat)2 GenericParameterValuesProvider (org.apache.flink.api.java.io.jdbc.split.GenericParameterValuesProvider)2 ParameterValuesProvider (org.apache.flink.api.java.io.jdbc.split.ParameterValuesProvider)2 InputSplitAssigner (org.apache.flink.core.io.InputSplitAssigner)2 JobException (org.apache.flink.runtime.JobException)2