Search in sources :

Example 16 with FileSourceSplit

use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.

the class LocalityAwareSplitAssignerTest method testAssignmentMultiLocalHost.

@SuppressWarnings("UnnecessaryLocalVariable")
@Test
public void testAssignmentMultiLocalHost() {
    final String[] localHosts = { "local1", "local2", "local3" };
    final String[] remoteHosts = { "remote1", "remote2", "remote3" };
    final String[] requestingHosts = { "local3", "local2", "local1", "other" };
    final int numThreeLocalSplits = 10;
    final int numTwoLocalSplits = 10;
    final int numOneLocalSplits = 10;
    final int numLocalSplits = 30;
    final int numRemoteSplits = 10;
    final int numSplits = 40;
    final String[] threeLocalHosts = localHosts;
    final String[] twoLocalHosts = { localHosts[0], localHosts[1], remoteHosts[0] };
    final String[] oneLocalHost = { localHosts[0], remoteHosts[0], remoteHosts[1] };
    final String[] noLocalHost = remoteHosts;
    int splitCnt = 0;
    final Set<FileSourceSplit> splits = new HashSet<>();
    // add splits with three local hosts
    for (int i = 0; i < numThreeLocalSplits; i++) {
        splits.add(createSplit(splitCnt++, threeLocalHosts));
    }
    // add splits with two local hosts
    for (int i = 0; i < numTwoLocalSplits; i++) {
        splits.add(createSplit(splitCnt++, twoLocalHosts));
    }
    // add splits with two local hosts
    for (int i = 0; i < numOneLocalSplits; i++) {
        splits.add(createSplit(splitCnt++, oneLocalHost));
    }
    // add splits with two local hosts
    for (int i = 0; i < numRemoteSplits; i++) {
        splits.add(createSplit(splitCnt++, noLocalHost));
    }
    // get all available splits
    final LocalityAwareSplitAssigner ia = new LocalityAwareSplitAssigner(splits);
    for (int i = 0; i < numSplits; i++) {
        final String host = requestingHosts[i % requestingHosts.length];
        final Optional<FileSourceSplit> ois = ia.getNext(host);
        assertTrue(ois.isPresent());
        final FileSourceSplit is = ois.get();
        assertTrue(splits.remove(is));
        // check priority of split
        if (host.equals(localHosts[0])) {
            assertArrayEquals(is.hostnames(), oneLocalHost);
        } else if (host.equals(localHosts[1])) {
            assertArrayEquals(is.hostnames(), twoLocalHosts);
        } else if (host.equals(localHosts[2])) {
            assertArrayEquals(is.hostnames(), threeLocalHosts);
        } else {
            assertArrayEquals(is.hostnames(), noLocalHost);
        }
    }
    // check we had all
    assertTrue(splits.isEmpty());
    assertFalse(ia.getNext("anotherHost").isPresent());
    assertEquals(numRemoteSplits, ia.getNumberOfRemoteAssignments());
    assertEquals(numLocalSplits, ia.getNumberOfLocalAssignments());
}
Also used : FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 17 with FileSourceSplit

use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.

the class NonSplittingRecursiveEnumeratorTest method testFilesWithNoBlockInfo.

@Test
public void testFilesWithNoBlockInfo() throws Exception {
    final Path testPath = new Path("testfs:///dir/file1");
    testFs = TestingFileSystem.createForFileStatus("testfs", TestingFileSystem.TestFileStatus.forFileWithBlocks(testPath, 12345L));
    testFs.register();
    final NonSplittingRecursiveEnumerator enumerator = createEnumerator();
    final Collection<FileSourceSplit> splits = enumerator.enumerateSplits(new Path[] { new Path("testfs:///dir") }, 0);
    assertEquals(1, splits.size());
    assertSplitsEqual(new FileSourceSplit("ignoredId", testPath, 0L, 12345L, 0, 12345L), splits.iterator().next());
}
Also used : Path(org.apache.flink.core.fs.Path) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) Test(org.junit.Test)

Example 18 with FileSourceSplit

use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.

the class NonSplittingRecursiveEnumeratorTest method testDefaultHiddenFilesFilter.

@Test
public void testDefaultHiddenFilesFilter() throws Exception {
    final Path[] testPaths = new Path[] { new Path("testfs:///visiblefile"), new Path("testfs:///.hiddenfile1"), new Path("testfs:///_hiddenfile2") };
    testFs = TestingFileSystem.createWithFiles("testfs", testPaths);
    testFs.register();
    final NonSplittingRecursiveEnumerator enumerator = createEnumerator();
    final Collection<FileSourceSplit> splits = enumerator.enumerateSplits(new Path[] { new Path("testfs:///") }, 1);
    assertEquals(Collections.singletonList(new Path("testfs:///visiblefile")), toPaths(splits));
}
Also used : Path(org.apache.flink.core.fs.Path) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) Test(org.junit.Test)

Example 19 with FileSourceSplit

use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.

the class ContinuousFileSplitEnumeratorTest method testDiscoverSplitWhenNoReaderRegistered.

@Test
public void testDiscoverSplitWhenNoReaderRegistered() throws Exception {
    final TestingFileEnumerator fileEnumerator = new TestingFileEnumerator();
    final TestingSplitEnumeratorContext<FileSourceSplit> context = new TestingSplitEnumeratorContext<>(4);
    final ContinuousFileSplitEnumerator enumerator = createEnumerator(fileEnumerator, context);
    // make one split available and trigger the periodic discovery
    final FileSourceSplit split = createRandomSplit();
    fileEnumerator.addSplits(split);
    context.triggerAllActions();
    assertThat(enumerator.snapshotState(1L).getSplits(), contains(split));
}
Also used : TestingFileEnumerator(org.apache.flink.connector.file.src.testutils.TestingFileEnumerator) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) TestingSplitEnumeratorContext(org.apache.flink.connector.testutils.source.reader.TestingSplitEnumeratorContext) Test(org.junit.Test)

Example 20 with FileSourceSplit

use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.

the class ContinuousFileSplitEnumeratorTest method testDiscoverWhenReaderRegistered.

@Test
public void testDiscoverWhenReaderRegistered() throws Exception {
    final TestingFileEnumerator fileEnumerator = new TestingFileEnumerator();
    final TestingSplitEnumeratorContext<FileSourceSplit> context = new TestingSplitEnumeratorContext<>(4);
    final ContinuousFileSplitEnumerator enumerator = createEnumerator(fileEnumerator, context);
    // register one reader, and let it request a split
    context.registerReader(2, "localhost");
    enumerator.addReader(2);
    enumerator.handleSplitRequest(2, "localhost");
    // make one split available and trigger the periodic discovery
    final FileSourceSplit split = createRandomSplit();
    fileEnumerator.addSplits(split);
    context.triggerAllActions();
    assertThat(enumerator.snapshotState(1L).getSplits(), empty());
    assertThat(context.getSplitAssignments().get(2).getAssignedSplits(), contains(split));
}
Also used : TestingFileEnumerator(org.apache.flink.connector.file.src.testutils.TestingFileEnumerator) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) TestingSplitEnumeratorContext(org.apache.flink.connector.testutils.source.reader.TestingSplitEnumeratorContext) Test(org.junit.Test)

Aggregations

FileSourceSplit (org.apache.flink.connector.file.src.FileSourceSplit)50 Test (org.junit.Test)32 Path (org.apache.flink.core.fs.Path)20 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)11 BulkFormat (org.apache.flink.connector.file.src.reader.BulkFormat)11 Configuration (org.apache.flink.configuration.Configuration)10 ArrayList (java.util.ArrayList)9 TestingSplitEnumeratorContext (org.apache.flink.connector.testutils.source.reader.TestingSplitEnumeratorContext)7 IOException (java.io.IOException)6 RowData (org.apache.flink.table.data.RowData)6 LogicalType (org.apache.flink.table.types.logical.LogicalType)6 LinkedHashMap (java.util.LinkedHashMap)5 TestingFileSystem (org.apache.flink.connector.file.src.testutils.TestingFileSystem)5 FileStatus (org.apache.flink.core.fs.FileStatus)5 AtomicLong (java.util.concurrent.atomic.AtomicLong)4 BigIntType (org.apache.flink.table.types.logical.BigIntType)4 DoubleType (org.apache.flink.table.types.logical.DoubleType)4 IntType (org.apache.flink.table.types.logical.IntType)4 SmallIntType (org.apache.flink.table.types.logical.SmallIntType)4 TinyIntType (org.apache.flink.table.types.logical.TinyIntType)4