Search in sources :

Example 6 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class MockInputSplitProvider method addInputSplits.

/**
	 * Generates a set of input splits from an input path
	 * 
	 * @param path
	 *        the path of the local file to generate the input splits from
	 * @param noSplits
	 *        the number of input splits to be generated from the given input file
	 */
public void addInputSplits(final String path, final int noSplits) {
    final InputSplit[] tmp = new InputSplit[noSplits];
    final String[] hosts = { "localhost" };
    final String localPath;
    try {
        localPath = new URI(path).getPath();
    } catch (URISyntaxException e) {
        throw new IllegalArgumentException("Path URI can not be transformed to local path.");
    }
    final File inFile = new File(localPath);
    final long splitLength = inFile.length() / noSplits;
    long pos = 0;
    for (int i = 0; i < noSplits - 1; i++) {
        tmp[i] = new FileInputSplit(i, new Path(path), pos, splitLength, hosts);
        pos += splitLength;
    }
    tmp[noSplits - 1] = new FileInputSplit(noSplits - 1, new Path(path), pos, inFile.length() - pos, hosts);
    this.inputSplits = tmp;
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) URISyntaxException(java.net.URISyntaxException) InputSplit(org.apache.flink.core.io.InputSplit) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) URI(java.net.URI) File(java.io.File)

Example 7 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class GenericDataSourceBase method executeOnCollections.

// --------------------------------------------------------------------------------------------
protected List<OUT> executeOnCollections(RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
    @SuppressWarnings("unchecked") InputFormat<OUT, InputSplit> inputFormat = (InputFormat<OUT, InputSplit>) this.formatWrapper.getUserCodeObject();
    //configure the input format
    inputFormat.configure(this.parameters);
    //open the input format
    if (inputFormat instanceof RichInputFormat) {
        ((RichInputFormat) inputFormat).setRuntimeContext(ctx);
        ((RichInputFormat) inputFormat).openInputFormat();
    }
    List<OUT> result = new ArrayList<OUT>();
    // splits
    InputSplit[] splits = inputFormat.createInputSplits(1);
    TypeSerializer<OUT> serializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);
    for (InputSplit split : splits) {
        inputFormat.open(split);
        while (!inputFormat.reachedEnd()) {
            OUT next = inputFormat.nextRecord(serializer.createInstance());
            if (next != null) {
                result.add(serializer.copy(next));
            }
        }
        inputFormat.close();
    }
    //close the input format
    if (inputFormat instanceof RichInputFormat) {
        ((RichInputFormat) inputFormat).closeInputFormat();
    }
    return result;
}
Also used : RichInputFormat(org.apache.flink.api.common.io.RichInputFormat) InputFormat(org.apache.flink.api.common.io.InputFormat) RichInputFormat(org.apache.flink.api.common.io.RichInputFormat) ArrayList(java.util.ArrayList) InputSplit(org.apache.flink.core.io.InputSplit)

Example 8 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class DefaultSplitAssignerTest method testSerialSplitAssignment.

@Test
public void testSerialSplitAssignment() {
    try {
        final int NUM_SPLITS = 50;
        Set<InputSplit> splits = new HashSet<InputSplit>();
        for (int i = 0; i < NUM_SPLITS; i++) {
            splits.add(new GenericInputSplit(i, NUM_SPLITS));
        }
        DefaultInputSplitAssigner ia = new DefaultInputSplitAssigner(splits);
        InputSplit is = null;
        while ((is = ia.getNextInputSplit("", 0)) != null) {
            assertTrue(splits.remove(is));
        }
        assertTrue(splits.isEmpty());
        assertNull(ia.getNextInputSplit("", 0));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : GenericInputSplit(org.apache.flink.core.io.GenericInputSplit) DefaultInputSplitAssigner(org.apache.flink.api.common.io.DefaultInputSplitAssigner) GenericInputSplit(org.apache.flink.core.io.GenericInputSplit) InputSplit(org.apache.flink.core.io.InputSplit) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 9 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class DefaultSplitAssignerTest method testConcurrentSplitAssignment.

@Test
public void testConcurrentSplitAssignment() {
    try {
        final int NUM_THREADS = 10;
        final int NUM_SPLITS = 500;
        final int SUM_OF_IDS = (NUM_SPLITS - 1) * (NUM_SPLITS) / 2;
        Set<InputSplit> splits = new HashSet<InputSplit>();
        for (int i = 0; i < NUM_SPLITS; i++) {
            splits.add(new GenericInputSplit(i, NUM_SPLITS));
        }
        final DefaultInputSplitAssigner ia = new DefaultInputSplitAssigner(splits);
        final AtomicInteger splitsRetrieved = new AtomicInteger(0);
        final AtomicInteger sumOfIds = new AtomicInteger(0);
        Runnable retriever = new Runnable() {

            @Override
            public void run() {
                String host = "";
                GenericInputSplit split;
                while ((split = (GenericInputSplit) ia.getNextInputSplit(host, 0)) != null) {
                    splitsRetrieved.incrementAndGet();
                    sumOfIds.addAndGet(split.getSplitNumber());
                }
            }
        };
        // create the threads
        Thread[] threads = new Thread[NUM_THREADS];
        for (int i = 0; i < NUM_THREADS; i++) {
            threads[i] = new Thread(retriever);
            threads[i].setDaemon(true);
        }
        // launch concurrently
        for (int i = 0; i < NUM_THREADS; i++) {
            threads[i].start();
        }
        // sync
        for (int i = 0; i < NUM_THREADS; i++) {
            threads[i].join(5000);
        }
        // verify
        for (int i = 0; i < NUM_THREADS; i++) {
            if (threads[i].isAlive()) {
                fail("The concurrency test case is erroneous, the thread did not respond in time.");
            }
        }
        assertEquals(NUM_SPLITS, splitsRetrieved.get());
        assertEquals(SUM_OF_IDS, sumOfIds.get());
        // nothing left
        assertNull(ia.getNextInputSplit("", 0));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : GenericInputSplit(org.apache.flink.core.io.GenericInputSplit) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DefaultInputSplitAssigner(org.apache.flink.api.common.io.DefaultInputSplitAssigner) GenericInputSplit(org.apache.flink.core.io.GenericInputSplit) InputSplit(org.apache.flink.core.io.InputSplit) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 10 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class LocatableSplitAssignerTest method testSerialSplitAssignmentSomeForRemoteHost.

@Test
public void testSerialSplitAssignmentSomeForRemoteHost() {
    try {
        // host1 reads all local
        // host2 reads 10 local and 10 remote
        // host3 reads all remote
        final String[] hosts = { "host1", "host2", "host3" };
        final int NUM_LOCAL_HOST1_SPLITS = 20;
        final int NUM_LOCAL_HOST2_SPLITS = 10;
        final int NUM_REMOTE_SPLITS = 30;
        final int NUM_LOCAL_SPLITS = NUM_LOCAL_HOST1_SPLITS + NUM_LOCAL_HOST2_SPLITS;
        // load local splits
        int splitCnt = 0;
        Set<LocatableInputSplit> splits = new HashSet<LocatableInputSplit>();
        // host1 splits
        for (int i = 0; i < NUM_LOCAL_HOST1_SPLITS; i++) {
            splits.add(new LocatableInputSplit(splitCnt++, "host1"));
        }
        // host2 splits
        for (int i = 0; i < NUM_LOCAL_HOST2_SPLITS; i++) {
            splits.add(new LocatableInputSplit(splitCnt++, "host2"));
        }
        // load remote splits
        for (int i = 0; i < NUM_REMOTE_SPLITS; i++) {
            splits.add(new LocatableInputSplit(splitCnt++, "remoteHost"));
        }
        // get all available splits
        LocatableInputSplitAssigner ia = new LocatableInputSplitAssigner(splits);
        InputSplit is = null;
        int i = 0;
        while ((is = ia.getNextInputSplit(hosts[i++ % hosts.length], 0)) != null) {
            assertTrue(splits.remove(is));
        }
        // check we had all
        assertTrue(splits.isEmpty());
        assertNull(ia.getNextInputSplit("anotherHost", 0));
        assertEquals(NUM_REMOTE_SPLITS, ia.getNumberOfRemoteAssignments());
        assertEquals(NUM_LOCAL_SPLITS, ia.getNumberOfLocalAssignments());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit) LocatableInputSplitAssigner(org.apache.flink.api.common.io.LocatableInputSplitAssigner) InputSplit(org.apache.flink.core.io.InputSplit) LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

InputSplit (org.apache.flink.core.io.InputSplit)21 Test (org.junit.Test)12 HashSet (java.util.HashSet)7 LocatableInputSplit (org.apache.flink.core.io.LocatableInputSplit)6 LocatableInputSplitAssigner (org.apache.flink.api.common.io.LocatableInputSplitAssigner)5 InputSplitProviderException (org.apache.flink.runtime.jobgraph.tasks.InputSplitProviderException)4 ArrayList (java.util.ArrayList)3 NoSuchElementException (java.util.NoSuchElementException)3 GenericInputSplit (org.apache.flink.core.io.GenericInputSplit)3 IOException (java.io.IOException)2 Iterator (java.util.Iterator)2 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)2 JobID (org.apache.flink.api.common.JobID)2 DefaultInputSplitAssigner (org.apache.flink.api.common.io.DefaultInputSplitAssigner)2 InputFormat (org.apache.flink.api.common.io.InputFormat)2 RichInputFormat (org.apache.flink.api.common.io.RichInputFormat)2 GenericParameterValuesProvider (org.apache.flink.api.java.io.jdbc.split.GenericParameterValuesProvider)2 ParameterValuesProvider (org.apache.flink.api.java.io.jdbc.split.ParameterValuesProvider)2 InputSplitAssigner (org.apache.flink.core.io.InputSplitAssigner)2 JobException (org.apache.flink.runtime.JobException)2