Search in sources :

Example 11 with FileSourceSplit

use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.

the class OrcColumnarRowInputFormatTest method testReadFileAndRestoreWithFilter.

@Test
public void testReadFileAndRestoreWithFilter() throws IOException {
    List<Predicate> filter = Collections.singletonList(new Or(new Between("_col0", PredicateLeaf.Type.LONG, 0L, 975000L), new Equals("_col0", PredicateLeaf.Type.LONG, 980001L), new Between("_col0", PredicateLeaf.Type.LONG, 990000L, 1800000L)));
    OrcColumnarRowInputFormat<?, FileSourceSplit> format = createFormat(FLAT_FILE_TYPE, new int[] { 0, 1 }, filter);
    // pick a middle split
    FileSourceSplit split = createSplits(flatFile, 1).get(0);
    int breakCnt = 975001;
    int expectedCnt = 1795000;
    long expectedTotalF0 = 1615113397500L;
    innerTestRestore(format, split, breakCnt, expectedCnt, expectedTotalF0);
}
Also used : Equals(org.apache.flink.orc.OrcFilters.Equals) Assert.assertEquals(org.junit.Assert.assertEquals) Or(org.apache.flink.orc.OrcFilters.Or) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) Between(org.apache.flink.orc.OrcFilters.Between) Predicate(org.apache.flink.orc.OrcFilters.Predicate) Test(org.junit.Test)

Example 12 with FileSourceSplit

use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.

the class OrcColumnarRowInputFormatTest method testReadDecimalTypeFile.

@Test
public void testReadDecimalTypeFile() throws IOException {
    OrcColumnarRowInputFormat<?, FileSourceSplit> format = createFormat(DECIMAL_FILE_TYPE, new int[] { 0 });
    AtomicInteger cnt = new AtomicInteger(0);
    AtomicInteger nullCount = new AtomicInteger(0);
    // read all splits
    for (FileSourceSplit split : createSplits(decimalFile, 4)) {
        forEach(format, split, row -> {
            if (cnt.get() == 0) {
                // validate first row
                assertNotNull(row);
                assertEquals(1, row.getArity());
                assertEquals(DecimalDataUtils.castFrom(-1000.5d, 10, 5), row.getDecimal(0, 10, 5));
            } else {
                if (!row.isNullAt(0)) {
                    assertNotNull(row.getDecimal(0, 10, 5));
                } else {
                    nullCount.incrementAndGet();
                }
            }
            cnt.incrementAndGet();
        });
    }
    assertEquals(6000, cnt.get());
    assertEquals(2000, nullCount.get());
}
Also used : FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Test(org.junit.Test)

Example 13 with FileSourceSplit

use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.

the class OrcColumnarRowInputFormatTest method testReadFileInSplits.

@Test
public void testReadFileInSplits() throws IOException {
    OrcColumnarRowInputFormat<?, FileSourceSplit> format = createFormat(FLAT_FILE_TYPE, new int[] { 0, 1 });
    AtomicInteger cnt = new AtomicInteger(0);
    AtomicLong totalF0 = new AtomicLong(0);
    // read all splits
    for (FileSourceSplit split : createSplits(flatFile, 4)) {
        forEach(format, split, row -> {
            Assert.assertFalse(row.isNullAt(0));
            Assert.assertFalse(row.isNullAt(1));
            totalF0.addAndGet(row.getInt(0));
            Assert.assertNotNull(row.getString(1).toString());
            cnt.incrementAndGet();
        });
    }
    // check that all rows have been read
    assertEquals(1920800, cnt.get());
    assertEquals(1844737280400L, totalF0.get());
}
Also used : AtomicLong(java.util.concurrent.atomic.AtomicLong) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Test(org.junit.Test)

Example 14 with FileSourceSplit

use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.

the class StaticFileSplitEnumeratorTest method testSplitRequestForRegisteredReader.

@Test
public void testSplitRequestForRegisteredReader() throws Exception {
    final TestingSplitEnumeratorContext<FileSourceSplit> context = new TestingSplitEnumeratorContext<>(4);
    final FileSourceSplit split = createRandomSplit();
    final StaticFileSplitEnumerator enumerator = createEnumerator(context, split);
    context.registerReader(3, "somehost");
    enumerator.addReader(3);
    enumerator.handleSplitRequest(3, "somehost");
    assertThat(enumerator.snapshotState(1L).getSplits(), empty());
    assertThat(context.getSplitAssignments().get(3).getAssignedSplits(), contains(split));
}
Also used : FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) TestingSplitEnumeratorContext(org.apache.flink.connector.testutils.source.reader.TestingSplitEnumeratorContext) Test(org.junit.Test)

Example 15 with FileSourceSplit

use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.

the class LocalityAwareSplitAssignerTest method testAssignmentOfManySplitsRandomly.

@Test
public void testAssignmentOfManySplitsRandomly() {
    final long seed = Calendar.getInstance().getTimeInMillis();
    final int numSplits = 1000;
    final String[] splitHosts = new String[256];
    final String[] requestingHosts = new String[256];
    final Random rand = new Random(seed);
    for (int i = 0; i < splitHosts.length; i++) {
        splitHosts[i] = "localHost" + i;
    }
    for (int i = 0; i < requestingHosts.length; i++) {
        if (i % 2 == 0) {
            requestingHosts[i] = "localHost" + i;
        } else {
            requestingHosts[i] = "remoteHost" + i;
        }
    }
    String[] stringArray = {};
    Set<String> hosts = new HashSet<>();
    Set<FileSourceSplit> splits = new HashSet<>();
    for (int i = 0; i < numSplits; i++) {
        while (hosts.size() < 3) {
            hosts.add(splitHosts[rand.nextInt(splitHosts.length)]);
        }
        splits.add(createSplit(i, hosts.toArray(stringArray)));
        hosts.clear();
    }
    final LocalityAwareSplitAssigner ia = new LocalityAwareSplitAssigner(splits);
    for (int i = 0; i < numSplits; i++) {
        final Optional<FileSourceSplit> split = ia.getNext(requestingHosts[rand.nextInt(requestingHosts.length)]);
        assertTrue(split.isPresent());
        assertTrue(splits.remove(split.get()));
    }
    assertTrue(splits.isEmpty());
    assertFalse(ia.getNext("testHost").isPresent());
}
Also used : Random(java.util.Random) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

FileSourceSplit (org.apache.flink.connector.file.src.FileSourceSplit)50 Test (org.junit.Test)32 Path (org.apache.flink.core.fs.Path)20 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)11 BulkFormat (org.apache.flink.connector.file.src.reader.BulkFormat)11 Configuration (org.apache.flink.configuration.Configuration)10 ArrayList (java.util.ArrayList)9 TestingSplitEnumeratorContext (org.apache.flink.connector.testutils.source.reader.TestingSplitEnumeratorContext)7 IOException (java.io.IOException)6 RowData (org.apache.flink.table.data.RowData)6 LogicalType (org.apache.flink.table.types.logical.LogicalType)6 LinkedHashMap (java.util.LinkedHashMap)5 TestingFileSystem (org.apache.flink.connector.file.src.testutils.TestingFileSystem)5 FileStatus (org.apache.flink.core.fs.FileStatus)5 AtomicLong (java.util.concurrent.atomic.AtomicLong)4 BigIntType (org.apache.flink.table.types.logical.BigIntType)4 DoubleType (org.apache.flink.table.types.logical.DoubleType)4 IntType (org.apache.flink.table.types.logical.IntType)4 SmallIntType (org.apache.flink.table.types.logical.SmallIntType)4 TinyIntType (org.apache.flink.table.types.logical.TinyIntType)4