use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class LimitableBulkFormatTest method testLimitOverBatches.
@Test
public void testLimitOverBatches() throws IOException {
// set limit
Long limit = 2048L;
// configuration for small batches
Configuration conf = new Configuration();
conf.set(StreamFormat.FETCH_IO_SIZE, MemorySize.parse("4k"));
// read
BulkFormat<String, FileSourceSplit> format = LimitableBulkFormat.create(new StreamFormatAdapter<>(new TextLineInputFormat()), limit);
BulkFormat.Reader<String> reader = format.createReader(conf, new FileSourceSplit("id", new Path(file.toURI()), 0, file.length(), file.lastModified(), file.length()));
// check
AtomicInteger i = new AtomicInteger(0);
Utils.forEachRemaining(reader, s -> i.incrementAndGet());
Assert.assertEquals(limit.intValue(), i.get());
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class OrcColumnarRowInputFormatTest method testReadFileWithPartitionFields.
@Test
public void testReadFileWithPartitionFields() throws IOException {
LinkedHashMap<String, String> partSpec = new LinkedHashMap<>();
partSpec.put("f1", "1");
partSpec.put("f3", "3");
partSpec.put("f5", "f5");
partSpec.put("f8", BigDecimal.valueOf(5.333).toString());
partSpec.put("f13", "f13");
final Path flatFile = copyFileFromResource("test-data-flat.orc", partSpec);
RowType tableType = RowType.of(/* 0 */
DataTypes.INT().getLogicalType(), // part-1
DataTypes.INT().getLogicalType(), /* 2 */
DataTypes.STRING().getLogicalType(), // part-2
DataTypes.BIGINT().getLogicalType(), /* 4 */
DataTypes.STRING().getLogicalType(), // part-3
DataTypes.STRING().getLogicalType(), /* 6 */
DataTypes.STRING().getLogicalType(), /* 7 */
DataTypes.INT().getLogicalType(), // part-4
DataTypes.DECIMAL(10, 5).getLogicalType(), /* 9 */
DataTypes.STRING().getLogicalType(), /* 11*/
DataTypes.INT().getLogicalType(), /* 12*/
DataTypes.INT().getLogicalType(), // part-5
DataTypes.STRING().getLogicalType(), /* 14*/
DataTypes.INT().getLogicalType());
int[] projectedFields = { 8, 1, 3, 0, 5, 2 };
OrcColumnarRowInputFormat<?, FileSourceSplit> format = createPartitionFormat(tableType, new ArrayList<>(partSpec.keySet()), projectedFields);
AtomicInteger cnt = new AtomicInteger(0);
AtomicLong totalF0 = new AtomicLong(0);
// read all splits
for (FileSourceSplit split : createSplits(flatFile, 4)) {
forEach(format, split, row -> {
// data values
Assert.assertFalse(row.isNullAt(3));
Assert.assertFalse(row.isNullAt(5));
totalF0.addAndGet(row.getInt(3));
Assert.assertNotNull(row.getString(5).toString());
// part values
Assert.assertFalse(row.isNullAt(0));
Assert.assertFalse(row.isNullAt(1));
Assert.assertFalse(row.isNullAt(2));
Assert.assertFalse(row.isNullAt(4));
Assert.assertEquals(DecimalDataUtils.castFrom(5.333, 10, 5), row.getDecimal(0, 10, 5));
Assert.assertEquals(1, row.getInt(1));
Assert.assertEquals(3, row.getLong(2));
Assert.assertEquals("f5", row.getString(4).toString());
cnt.incrementAndGet();
});
}
// check that all rows have been read
assertEquals(1920800, cnt.get());
assertEquals(1844737280400L, totalF0.get());
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class OrcColumnarRowInputFormatTest method testReadFileAndRestore.
@Test
public void testReadFileAndRestore() throws IOException {
OrcColumnarRowInputFormat<?, FileSourceSplit> format = createFormat(FLAT_FILE_TYPE, new int[] { 0, 1 });
// pick a middle split
FileSourceSplit split = createSplits(flatFile, 3).get(1);
int expectedCnt = 660000;
innerTestRestore(format, split, expectedCnt / 2, expectedCnt, 656700330000L);
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class OrcColumnarRowInputFormatTest method testReadFileWithSelectFields.
@Test
public void testReadFileWithSelectFields() throws IOException {
OrcColumnarRowInputFormat<?, FileSourceSplit> format = createFormat(FLAT_FILE_TYPE, new int[] { 2, 0, 1 });
AtomicInteger cnt = new AtomicInteger(0);
AtomicLong totalF0 = new AtomicLong(0);
// read all splits
for (FileSourceSplit split : createSplits(flatFile, 4)) {
forEach(format, split, row -> {
Assert.assertFalse(row.isNullAt(0));
Assert.assertFalse(row.isNullAt(1));
Assert.assertFalse(row.isNullAt(2));
Assert.assertNotNull(row.getString(0).toString());
totalF0.addAndGet(row.getInt(1));
Assert.assertNotNull(row.getString(2).toString());
cnt.incrementAndGet();
});
}
// check that all rows have been read
assertEquals(1920800, cnt.get());
assertEquals(1844737280400L, totalF0.get());
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class OrcColumnarRowInputFormatTest method innerTestRestore.
private void innerTestRestore(OrcColumnarRowInputFormat<?, FileSourceSplit> format, FileSourceSplit split, int breakCnt, int expectedCnt, long expectedTotalF0) throws IOException {
AtomicInteger cnt = new AtomicInteger(0);
AtomicLong totalF0 = new AtomicLong(0);
Consumer<RowData> consumer = row -> {
Assert.assertFalse(row.isNullAt(0));
Assert.assertFalse(row.isNullAt(1));
totalF0.addAndGet(row.getInt(0));
assertNotNull(row.getString(1).toString());
cnt.incrementAndGet();
};
// ---------- restore reading ---------------
long offset = -1;
long recordSkipCount = -1;
try (BulkFormat.Reader<RowData> reader = createReader(format, split)) {
while (cnt.get() < breakCnt) {
BulkFormat.RecordIterator<RowData> batch = reader.readBatch();
Assert.assertNotNull(batch);
RecordAndPosition<RowData> record;
while ((record = batch.next()) != null && cnt.get() < breakCnt) {
consumer.accept(record.getRecord());
offset = record.getOffset();
recordSkipCount = record.getRecordSkipCount();
}
batch.releaseBatch();
}
}
Utils.forEachRemaining(restoreReader(format, split, offset, recordSkipCount), consumer);
// ---------- end restore reading ---------------
// the results should be the same as:
// forEach(format, split, consumer);
// check that all rows have been read
assertEquals(expectedCnt, cnt.get());
assertEquals(expectedTotalF0, totalF0.get());
}
Aggregations