Search in sources :

Example 11 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class LocatableSplitAssignerTest method testSerialSplitAssignmentWithNullHost.

@Test
public void testSerialSplitAssignmentWithNullHost() {
    try {
        final int NUM_SPLITS = 50;
        final String[][] hosts = new String[][] { new String[] { "localhost" }, new String[0], null };
        // load some splits
        Set<LocatableInputSplit> splits = new HashSet<LocatableInputSplit>();
        for (int i = 0; i < NUM_SPLITS; i++) {
            splits.add(new LocatableInputSplit(i, hosts[i % 3]));
        }
        // get all available splits
        LocatableInputSplitAssigner ia = new LocatableInputSplitAssigner(splits);
        InputSplit is = null;
        while ((is = ia.getNextInputSplit(null, 0)) != null) {
            assertTrue(splits.remove(is));
        }
        // check we had all
        assertTrue(splits.isEmpty());
        assertNull(ia.getNextInputSplit("", 0));
        assertEquals(NUM_SPLITS, ia.getNumberOfRemoteAssignments());
        assertEquals(0, ia.getNumberOfLocalAssignments());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit) LocatableInputSplitAssigner(org.apache.flink.api.common.io.LocatableInputSplitAssigner) InputSplit(org.apache.flink.core.io.InputSplit) LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 12 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class LocatableSplitAssignerTest method testSerialSplitAssignmentMixedLocalHost.

@Test
public void testSerialSplitAssignmentMixedLocalHost() {
    try {
        final String[] hosts = { "host1", "host1", "host1", "host2", "host2", "host3" };
        final int NUM_SPLITS = 10 * hosts.length;
        // load some splits
        Set<LocatableInputSplit> splits = new HashSet<LocatableInputSplit>();
        for (int i = 0; i < NUM_SPLITS; i++) {
            splits.add(new LocatableInputSplit(i, hosts[i % hosts.length]));
        }
        // get all available splits
        LocatableInputSplitAssigner ia = new LocatableInputSplitAssigner(splits);
        InputSplit is = null;
        int i = 0;
        while ((is = ia.getNextInputSplit(hosts[i++ % hosts.length], 0)) != null) {
            assertTrue(splits.remove(is));
        }
        // check we had all
        assertTrue(splits.isEmpty());
        assertNull(ia.getNextInputSplit("anotherHost", 0));
        assertEquals(0, ia.getNumberOfRemoteAssignments());
        assertEquals(NUM_SPLITS, ia.getNumberOfLocalAssignments());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit) LocatableInputSplitAssigner(org.apache.flink.api.common.io.LocatableInputSplitAssigner) InputSplit(org.apache.flink.core.io.InputSplit) LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 13 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class JDBCInputFormatTest method testJDBCInputFormatWithParallelismAndNumericColumnSplitting.

@Test
public void testJDBCInputFormatWithParallelismAndNumericColumnSplitting() throws IOException, InstantiationException, IllegalAccessException {
    final int fetchSize = 1;
    final Long min = new Long(JDBCTestBase.testData[0][0] + "");
    final Long max = new Long(JDBCTestBase.testData[JDBCTestBase.testData.length - fetchSize][0] + "");
    ParameterValuesProvider pramProvider = new NumericBetweenParametersProvider(fetchSize, min, max);
    jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat().setDrivername(DRIVER_CLASS).setDBUrl(DB_URL).setQuery(JDBCTestBase.SELECT_ALL_BOOKS_SPLIT_BY_ID).setRowTypeInfo(rowTypeInfo).setParametersProvider(pramProvider).setResultSetType(ResultSet.TYPE_SCROLL_INSENSITIVE).finish();
    jdbcInputFormat.openInputFormat();
    InputSplit[] splits = jdbcInputFormat.createInputSplits(1);
    //this query exploit parallelism (1 split for every id)
    Assert.assertEquals(testData.length, splits.length);
    int recordCount = 0;
    Row row = new Row(5);
    for (int i = 0; i < splits.length; i++) {
        jdbcInputFormat.open(splits[i]);
        while (!jdbcInputFormat.reachedEnd()) {
            Row next = jdbcInputFormat.nextRecord(row);
            if (next == null) {
                break;
            }
            if (next.getField(0) != null) {
                Assert.assertEquals("Field 0 should be int", Integer.class, next.getField(0).getClass());
            }
            if (next.getField(1) != null) {
                Assert.assertEquals("Field 1 should be String", String.class, next.getField(1).getClass());
            }
            if (next.getField(2) != null) {
                Assert.assertEquals("Field 2 should be String", String.class, next.getField(2).getClass());
            }
            if (next.getField(3) != null) {
                Assert.assertEquals("Field 3 should be float", Double.class, next.getField(3).getClass());
            }
            if (next.getField(4) != null) {
                Assert.assertEquals("Field 4 should be int", Integer.class, next.getField(4).getClass());
            }
            for (int x = 0; x < 5; x++) {
                if (testData[recordCount][x] != null) {
                    Assert.assertEquals(testData[recordCount][x], next.getField(x));
                }
            }
            recordCount++;
        }
        jdbcInputFormat.close();
    }
    jdbcInputFormat.closeInputFormat();
    Assert.assertEquals(testData.length, recordCount);
}
Also used : NumericBetweenParametersProvider(org.apache.flink.api.java.io.jdbc.split.NumericBetweenParametersProvider) GenericParameterValuesProvider(org.apache.flink.api.java.io.jdbc.split.GenericParameterValuesProvider) ParameterValuesProvider(org.apache.flink.api.java.io.jdbc.split.ParameterValuesProvider) Row(org.apache.flink.types.Row) InputSplit(org.apache.flink.core.io.InputSplit) Test(org.junit.Test)

Example 14 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class JDBCInputFormatTest method testJDBCInputFormatWithParallelismAndGenericSplitting.

@Test
public void testJDBCInputFormatWithParallelismAndGenericSplitting() throws IOException, InstantiationException, IllegalAccessException {
    Serializable[][] queryParameters = new String[2][1];
    queryParameters[0] = new String[] { "Kumar" };
    queryParameters[1] = new String[] { "Tan Ah Teck" };
    ParameterValuesProvider paramProvider = new GenericParameterValuesProvider(queryParameters);
    jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat().setDrivername(DRIVER_CLASS).setDBUrl(DB_URL).setQuery(JDBCTestBase.SELECT_ALL_BOOKS_SPLIT_BY_AUTHOR).setRowTypeInfo(rowTypeInfo).setParametersProvider(paramProvider).setResultSetType(ResultSet.TYPE_SCROLL_INSENSITIVE).finish();
    jdbcInputFormat.openInputFormat();
    InputSplit[] splits = jdbcInputFormat.createInputSplits(1);
    //this query exploit parallelism (1 split for every queryParameters row)
    Assert.assertEquals(queryParameters.length, splits.length);
    int recordCount = 0;
    Row row = new Row(5);
    for (int i = 0; i < splits.length; i++) {
        jdbcInputFormat.open(splits[i]);
        while (!jdbcInputFormat.reachedEnd()) {
            Row next = jdbcInputFormat.nextRecord(row);
            if (next == null) {
                break;
            }
            if (next.getField(0) != null) {
                Assert.assertEquals("Field 0 should be int", Integer.class, next.getField(0).getClass());
            }
            if (next.getField(1) != null) {
                Assert.assertEquals("Field 1 should be String", String.class, next.getField(1).getClass());
            }
            if (next.getField(2) != null) {
                Assert.assertEquals("Field 2 should be String", String.class, next.getField(2).getClass());
            }
            if (next.getField(3) != null) {
                Assert.assertEquals("Field 3 should be float", Double.class, next.getField(3).getClass());
            }
            if (next.getField(4) != null) {
                Assert.assertEquals("Field 4 should be int", Integer.class, next.getField(4).getClass());
            }
            recordCount++;
        }
        jdbcInputFormat.close();
    }
    Assert.assertEquals(3, recordCount);
    jdbcInputFormat.closeInputFormat();
}
Also used : GenericParameterValuesProvider(org.apache.flink.api.java.io.jdbc.split.GenericParameterValuesProvider) ParameterValuesProvider(org.apache.flink.api.java.io.jdbc.split.ParameterValuesProvider) Row(org.apache.flink.types.Row) InputSplit(org.apache.flink.core.io.InputSplit) GenericParameterValuesProvider(org.apache.flink.api.java.io.jdbc.split.GenericParameterValuesProvider) Test(org.junit.Test)

Example 15 with InputSplit

use of org.apache.flink.core.io.InputSplit in project flink by apache.

the class ReplicatingInputSplitAssigner method getNextInputSplit.

@Override
public InputSplit getNextInputSplit(String host, int taskId) {
    // get assignment count
    Integer assignCnt;
    if (taskId < this.assignCounts.length) {
        assignCnt = this.assignCounts[taskId];
    } else {
        int newSize = this.assignCounts.length * 2;
        if (taskId >= newSize) {
            newSize = taskId;
        }
        int[] newAssignCounts = Arrays.copyOf(assignCounts, newSize);
        Arrays.fill(newAssignCounts, assignCounts.length, newSize, 0);
        assignCnt = 0;
    }
    if (assignCnt >= inputSplits.length) {
        // all splits for this task have been assigned
        return null;
    } else {
        // return next splits
        InputSplit is = inputSplits[assignCnt];
        assignCounts[taskId] = assignCnt + 1;
        return is;
    }
}
Also used : InputSplit(org.apache.flink.core.io.InputSplit)

Aggregations

InputSplit (org.apache.flink.core.io.InputSplit)21 Test (org.junit.Test)12 HashSet (java.util.HashSet)7 LocatableInputSplit (org.apache.flink.core.io.LocatableInputSplit)6 LocatableInputSplitAssigner (org.apache.flink.api.common.io.LocatableInputSplitAssigner)5 InputSplitProviderException (org.apache.flink.runtime.jobgraph.tasks.InputSplitProviderException)4 ArrayList (java.util.ArrayList)3 NoSuchElementException (java.util.NoSuchElementException)3 GenericInputSplit (org.apache.flink.core.io.GenericInputSplit)3 IOException (java.io.IOException)2 Iterator (java.util.Iterator)2 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)2 JobID (org.apache.flink.api.common.JobID)2 DefaultInputSplitAssigner (org.apache.flink.api.common.io.DefaultInputSplitAssigner)2 InputFormat (org.apache.flink.api.common.io.InputFormat)2 RichInputFormat (org.apache.flink.api.common.io.RichInputFormat)2 GenericParameterValuesProvider (org.apache.flink.api.java.io.jdbc.split.GenericParameterValuesProvider)2 ParameterValuesProvider (org.apache.flink.api.java.io.jdbc.split.ParameterValuesProvider)2 InputSplitAssigner (org.apache.flink.core.io.InputSplitAssigner)2 JobException (org.apache.flink.runtime.JobException)2