use of org.apache.flink.core.io.InputSplit in project flink by apache.
the class JobTaskVertexTest method testInputFormatVertex.
@Test
public void testInputFormatVertex() {
try {
final TestInputFormat inputFormat = new TestInputFormat();
final InputFormatVertex vertex = new InputFormatVertex("Name");
new TaskConfig(vertex.getConfiguration()).setStubWrapper(new UserCodeObjectWrapper<InputFormat<?, ?>>(inputFormat));
final ClassLoader cl = getClass().getClassLoader();
vertex.initializeOnMaster(cl);
InputSplit[] splits = vertex.getInputSplitSource().createInputSplits(77);
assertNotNull(splits);
assertEquals(1, splits.length);
assertEquals(TestSplit.class, splits[0].getClass());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.core.io.InputSplit in project flink by apache.
the class ExecutionJobVertex method computeLocalInputSplitsPerTask.
// --------------------------------------------------------------------------------------------
// Static / pre-assigned input splits
// --------------------------------------------------------------------------------------------
private List<LocatableInputSplit>[] computeLocalInputSplitsPerTask(InputSplit[] splits) throws JobException {
final int numSubTasks = getParallelism();
// sanity check
if (numSubTasks > splits.length) {
throw new JobException("Strictly local assignment requires at least as many splits as subtasks.");
}
// group the splits by host while preserving order per host
Map<String, List<LocatableInputSplit>> splitsByHost = new HashMap<String, List<LocatableInputSplit>>();
for (InputSplit split : splits) {
// check that split has exactly one local host
if (!(split instanceof LocatableInputSplit)) {
throw new JobException("Invalid InputSplit type " + split.getClass().getCanonicalName() + ". " + "Strictly local assignment requires LocatableInputSplit");
}
LocatableInputSplit lis = (LocatableInputSplit) split;
if (lis.getHostnames() == null) {
throw new JobException("LocatableInputSplit has no host information. " + "Strictly local assignment requires exactly one hostname for each LocatableInputSplit.");
} else if (lis.getHostnames().length != 1) {
throw new JobException("Strictly local assignment requires exactly one hostname for each LocatableInputSplit.");
}
String hostName = lis.getHostnames()[0];
if (hostName == null) {
throw new JobException("For strictly local input split assignment, no null host names are allowed.");
}
List<LocatableInputSplit> hostSplits = splitsByHost.get(hostName);
if (hostSplits == null) {
hostSplits = new ArrayList<LocatableInputSplit>();
splitsByHost.put(hostName, hostSplits);
}
hostSplits.add(lis);
}
int numHosts = splitsByHost.size();
if (numSubTasks < numHosts) {
throw new JobException("Strictly local split assignment requires at least as " + "many parallel subtasks as distinct split hosts. Please increase the parallelism " + "of DataSource " + this.getJobVertex().getName() + " to at least " + numHosts + ".");
}
// get list of hosts in deterministic order
List<String> hosts = new ArrayList<String>(splitsByHost.keySet());
Collections.sort(hosts);
@SuppressWarnings("unchecked") List<LocatableInputSplit>[] subTaskSplitAssignment = (List<LocatableInputSplit>[]) new List<?>[numSubTasks];
final int subtasksPerHost = numSubTasks / numHosts;
final int hostsWithOneMore = numSubTasks % numHosts;
int subtaskNum = 0;
// over the subtasks
for (int hostNum = 0; hostNum < numHosts; hostNum++) {
String host = hosts.get(hostNum);
List<LocatableInputSplit> splitsOnHost = splitsByHost.get(host);
int numSplitsOnHost = splitsOnHost.size();
// the number of subtasks to split this over.
// NOTE: if the host has few splits, some subtasks will not get anything.
int subtasks = Math.min(numSplitsOnHost, hostNum < hostsWithOneMore ? subtasksPerHost + 1 : subtasksPerHost);
int splitsPerSubtask = numSplitsOnHost / subtasks;
int subtasksWithOneMore = numSplitsOnHost % subtasks;
int splitnum = 0;
// go over the subtasks and grab a subrange of the input splits
for (int i = 0; i < subtasks; i++) {
int numSplitsForSubtask = (i < subtasksWithOneMore ? splitsPerSubtask + 1 : splitsPerSubtask);
List<LocatableInputSplit> splitList;
if (numSplitsForSubtask == numSplitsOnHost) {
splitList = splitsOnHost;
} else {
splitList = new ArrayList<LocatableInputSplit>(numSplitsForSubtask);
for (int k = 0; k < numSplitsForSubtask; k++) {
splitList.add(splitsOnHost.get(splitnum++));
}
}
subTaskSplitAssignment[subtaskNum++] = splitList;
}
}
return subTaskSplitAssignment;
}
use of org.apache.flink.core.io.InputSplit in project flink by apache.
the class TaskInputSplitProvider method getNextInputSplit.
@Override
public InputSplit getNextInputSplit(ClassLoader userCodeClassLoader) throws InputSplitProviderException {
Preconditions.checkNotNull(userCodeClassLoader);
final Future<Object> response = jobManager.ask(new JobManagerMessages.RequestNextInputSplit(jobID, vertexID, executionID), timeout);
final Object result;
try {
result = Await.result(response, timeout);
} catch (Exception e) {
throw new InputSplitProviderException("Did not receive next input split from JobManager.", e);
}
if (result instanceof JobManagerMessages.NextInputSplit) {
final JobManagerMessages.NextInputSplit nextInputSplit = (JobManagerMessages.NextInputSplit) result;
byte[] serializedData = nextInputSplit.splitData();
if (serializedData == null) {
return null;
} else {
final Object deserialized;
try {
deserialized = InstantiationUtil.deserializeObject(serializedData, userCodeClassLoader);
} catch (Exception e) {
throw new InputSplitProviderException("Could not deserialize the serialized input split.", e);
}
return (InputSplit) deserialized;
}
} else {
throw new InputSplitProviderException("RequestNextInputSplit requires a response of type " + "NextInputSplit. Instead response is of type " + result.getClass() + '.');
}
}
use of org.apache.flink.core.io.InputSplit in project flink by apache.
the class DataSourceTask method getInputSplits.
private Iterator<InputSplit> getInputSplits() {
final InputSplitProvider provider = getEnvironment().getInputSplitProvider();
return new Iterator<InputSplit>() {
private InputSplit nextSplit;
private boolean exhausted;
@Override
public boolean hasNext() {
if (exhausted) {
return false;
}
if (nextSplit != null) {
return true;
}
final InputSplit split;
try {
split = provider.getNextInputSplit(getUserCodeClassLoader());
} catch (InputSplitProviderException e) {
throw new RuntimeException("Could not retrieve next input split.", e);
}
if (split != null) {
this.nextSplit = split;
return true;
} else {
exhausted = true;
return false;
}
}
@Override
public InputSplit next() {
if (this.nextSplit == null && !hasNext()) {
throw new NoSuchElementException();
}
final InputSplit tmp = this.nextSplit;
this.nextSplit = null;
return tmp;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
use of org.apache.flink.core.io.InputSplit in project flink by apache.
the class ExecutionGraphConstructionTest method testSetupInputSplits.
@Test
public void testSetupInputSplits() {
try {
final InputSplit[] emptySplits = new InputSplit[0];
InputSplitAssigner assigner1 = mock(InputSplitAssigner.class);
InputSplitAssigner assigner2 = mock(InputSplitAssigner.class);
@SuppressWarnings("unchecked") InputSplitSource<InputSplit> source1 = mock(InputSplitSource.class);
@SuppressWarnings("unchecked") InputSplitSource<InputSplit> source2 = mock(InputSplitSource.class);
when(source1.createInputSplits(Matchers.anyInt())).thenReturn(emptySplits);
when(source2.createInputSplits(Matchers.anyInt())).thenReturn(emptySplits);
when(source1.getInputSplitAssigner(emptySplits)).thenReturn(assigner1);
when(source2.getInputSplitAssigner(emptySplits)).thenReturn(assigner2);
final JobID jobId = new JobID();
final String jobName = "Test Job Sample Name";
final Configuration cfg = new Configuration();
JobVertex v1 = new JobVertex("vertex1");
JobVertex v2 = new JobVertex("vertex2");
JobVertex v3 = new JobVertex("vertex3");
JobVertex v4 = new JobVertex("vertex4");
JobVertex v5 = new JobVertex("vertex5");
v1.setParallelism(5);
v2.setParallelism(7);
v3.setParallelism(2);
v4.setParallelism(11);
v5.setParallelism(4);
v1.setInvokableClass(AbstractInvokable.class);
v2.setInvokableClass(AbstractInvokable.class);
v3.setInvokableClass(AbstractInvokable.class);
v4.setInvokableClass(AbstractInvokable.class);
v5.setInvokableClass(AbstractInvokable.class);
v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
v4.connectNewDataSetAsInput(v3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
v5.connectNewDataSetAsInput(v4, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
v5.connectNewDataSetAsInput(v3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
v3.setInputSplitSource(source1);
v5.setInputSplitSource(source2);
List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3, v4, v5));
ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
try {
eg.attachJobGraph(ordered);
} catch (JobException e) {
e.printStackTrace();
fail("Job failed with exception: " + e.getMessage());
}
assertEquals(assigner1, eg.getAllVertices().get(v3.getID()).getSplitAssigner());
assertEquals(assigner2, eg.getAllVertices().get(v5.getID()).getSplitAssigner());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations