use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class OrcColumnarRowInputFormatTest method testReadFileAndRestoreWithFilter.
@Test
public void testReadFileAndRestoreWithFilter() throws IOException {
List<Predicate> filter = Collections.singletonList(new Or(new Between("_col0", PredicateLeaf.Type.LONG, 0L, 975000L), new Equals("_col0", PredicateLeaf.Type.LONG, 980001L), new Between("_col0", PredicateLeaf.Type.LONG, 990000L, 1800000L)));
OrcColumnarRowInputFormat<?, FileSourceSplit> format = createFormat(FLAT_FILE_TYPE, new int[] { 0, 1 }, filter);
// pick a middle split
FileSourceSplit split = createSplits(flatFile, 1).get(0);
int breakCnt = 975001;
int expectedCnt = 1795000;
long expectedTotalF0 = 1615113397500L;
innerTestRestore(format, split, breakCnt, expectedCnt, expectedTotalF0);
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class OrcColumnarRowInputFormatTest method testReadDecimalTypeFile.
@Test
public void testReadDecimalTypeFile() throws IOException {
OrcColumnarRowInputFormat<?, FileSourceSplit> format = createFormat(DECIMAL_FILE_TYPE, new int[] { 0 });
AtomicInteger cnt = new AtomicInteger(0);
AtomicInteger nullCount = new AtomicInteger(0);
// read all splits
for (FileSourceSplit split : createSplits(decimalFile, 4)) {
forEach(format, split, row -> {
if (cnt.get() == 0) {
// validate first row
assertNotNull(row);
assertEquals(1, row.getArity());
assertEquals(DecimalDataUtils.castFrom(-1000.5d, 10, 5), row.getDecimal(0, 10, 5));
} else {
if (!row.isNullAt(0)) {
assertNotNull(row.getDecimal(0, 10, 5));
} else {
nullCount.incrementAndGet();
}
}
cnt.incrementAndGet();
});
}
assertEquals(6000, cnt.get());
assertEquals(2000, nullCount.get());
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class OrcColumnarRowInputFormatTest method testReadFileInSplits.
@Test
public void testReadFileInSplits() throws IOException {
OrcColumnarRowInputFormat<?, FileSourceSplit> format = createFormat(FLAT_FILE_TYPE, new int[] { 0, 1 });
AtomicInteger cnt = new AtomicInteger(0);
AtomicLong totalF0 = new AtomicLong(0);
// read all splits
for (FileSourceSplit split : createSplits(flatFile, 4)) {
forEach(format, split, row -> {
Assert.assertFalse(row.isNullAt(0));
Assert.assertFalse(row.isNullAt(1));
totalF0.addAndGet(row.getInt(0));
Assert.assertNotNull(row.getString(1).toString());
cnt.incrementAndGet();
});
}
// check that all rows have been read
assertEquals(1920800, cnt.get());
assertEquals(1844737280400L, totalF0.get());
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class StaticFileSplitEnumeratorTest method testSplitRequestForRegisteredReader.
@Test
public void testSplitRequestForRegisteredReader() throws Exception {
final TestingSplitEnumeratorContext<FileSourceSplit> context = new TestingSplitEnumeratorContext<>(4);
final FileSourceSplit split = createRandomSplit();
final StaticFileSplitEnumerator enumerator = createEnumerator(context, split);
context.registerReader(3, "somehost");
enumerator.addReader(3);
enumerator.handleSplitRequest(3, "somehost");
assertThat(enumerator.snapshotState(1L).getSplits(), empty());
assertThat(context.getSplitAssignments().get(3).getAssignedSplits(), contains(split));
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class LocalityAwareSplitAssignerTest method testAssignmentOfManySplitsRandomly.
@Test
public void testAssignmentOfManySplitsRandomly() {
final long seed = Calendar.getInstance().getTimeInMillis();
final int numSplits = 1000;
final String[] splitHosts = new String[256];
final String[] requestingHosts = new String[256];
final Random rand = new Random(seed);
for (int i = 0; i < splitHosts.length; i++) {
splitHosts[i] = "localHost" + i;
}
for (int i = 0; i < requestingHosts.length; i++) {
if (i % 2 == 0) {
requestingHosts[i] = "localHost" + i;
} else {
requestingHosts[i] = "remoteHost" + i;
}
}
String[] stringArray = {};
Set<String> hosts = new HashSet<>();
Set<FileSourceSplit> splits = new HashSet<>();
for (int i = 0; i < numSplits; i++) {
while (hosts.size() < 3) {
hosts.add(splitHosts[rand.nextInt(splitHosts.length)]);
}
splits.add(createSplit(i, hosts.toArray(stringArray)));
hosts.clear();
}
final LocalityAwareSplitAssigner ia = new LocalityAwareSplitAssigner(splits);
for (int i = 0; i < numSplits; i++) {
final Optional<FileSourceSplit> split = ia.getNext(requestingHosts[rand.nextInt(requestingHosts.length)]);
assertTrue(split.isPresent());
assertTrue(splits.remove(split.get()));
}
assertTrue(splits.isEmpty());
assertFalse(ia.getNext("testHost").isPresent());
}
Aggregations