Search in sources :

Example 1 with RecordAndPosition

use of org.apache.flink.connector.file.src.util.RecordAndPosition in project flink by apache.

the class OrcColumnarRowInputFormatTest method innerTestRestore.

private void innerTestRestore(OrcColumnarRowInputFormat<?, FileSourceSplit> format, FileSourceSplit split, int breakCnt, int expectedCnt, long expectedTotalF0) throws IOException {
    AtomicInteger cnt = new AtomicInteger(0);
    AtomicLong totalF0 = new AtomicLong(0);
    Consumer<RowData> consumer = row -> {
        Assert.assertFalse(row.isNullAt(0));
        Assert.assertFalse(row.isNullAt(1));
        totalF0.addAndGet(row.getInt(0));
        assertNotNull(row.getString(1).toString());
        cnt.incrementAndGet();
    };
    // ---------- restore reading ---------------
    long offset = -1;
    long recordSkipCount = -1;
    try (BulkFormat.Reader<RowData> reader = createReader(format, split)) {
        while (cnt.get() < breakCnt) {
            BulkFormat.RecordIterator<RowData> batch = reader.readBatch();
            Assert.assertNotNull(batch);
            RecordAndPosition<RowData> record;
            while ((record = batch.next()) != null && cnt.get() < breakCnt) {
                consumer.accept(record.getRecord());
                offset = record.getOffset();
                recordSkipCount = record.getRecordSkipCount();
            }
            batch.releaseBatch();
        }
    }
    Utils.forEachRemaining(restoreReader(format, split, offset, recordSkipCount), consumer);
    // ---------- end restore reading ---------------
    // the results should be the same as:
    // forEach(format, split, consumer);
    // check that all rows have been read
    assertEquals(expectedCnt, cnt.get());
    assertEquals(expectedTotalF0, totalF0.get());
}
Also used : PartitionFieldExtractor(org.apache.flink.connector.file.table.PartitionFieldExtractor) DecimalDataUtils(org.apache.flink.table.data.DecimalDataUtils) Between(org.apache.flink.orc.OrcFilters.Between) RowType(org.apache.flink.table.types.logical.RowType) CheckpointedPosition(org.apache.flink.connector.file.src.util.CheckpointedPosition) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Equals(org.apache.flink.orc.OrcFilters.Equals) BigDecimal(java.math.BigDecimal) DecimalType(org.apache.flink.table.types.logical.DecimalType) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) Path(org.apache.flink.core.fs.Path) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Predicate(org.apache.flink.orc.OrcFilters.Predicate) Configuration(org.apache.hadoop.conf.Configuration) Or(org.apache.flink.orc.OrcFilters.Or) ClassRule(org.junit.ClassRule) Utils(org.apache.flink.connector.file.src.util.Utils) FileStatus(org.apache.flink.core.fs.FileStatus) RowData(org.apache.flink.table.data.RowData) IOUtils(org.apache.flink.util.IOUtils) Assert.assertNotNull(org.junit.Assert.assertNotNull) FileOutputStream(java.io.FileOutputStream) DataTypes(org.apache.flink.table.api.DataTypes) Test(org.junit.Test) IOException(java.io.IOException) UUID(java.util.UUID) File(java.io.File) PredicateLeaf(org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf) Consumer(java.util.function.Consumer) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) InternalTypeInfo(org.apache.flink.table.runtime.typeutils.InternalTypeInfo) LogicalType(org.apache.flink.table.types.logical.LogicalType) RecordAndPosition(org.apache.flink.connector.file.src.util.RecordAndPosition) OrcShim(org.apache.flink.orc.shim.OrcShim) PartitionPathUtils.generatePartitionPath(org.apache.flink.table.utils.PartitionPathUtils.generatePartitionPath) BulkFormat(org.apache.flink.connector.file.src.reader.BulkFormat) Assert(org.junit.Assert) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) AtomicLong(java.util.concurrent.atomic.AtomicLong) RowData(org.apache.flink.table.data.RowData) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BulkFormat(org.apache.flink.connector.file.src.reader.BulkFormat)

Aggregations

File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 BigDecimal (java.math.BigDecimal)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 UUID (java.util.UUID)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 Consumer (java.util.function.Consumer)1 FileSourceSplit (org.apache.flink.connector.file.src.FileSourceSplit)1 BulkFormat (org.apache.flink.connector.file.src.reader.BulkFormat)1 CheckpointedPosition (org.apache.flink.connector.file.src.util.CheckpointedPosition)1 RecordAndPosition (org.apache.flink.connector.file.src.util.RecordAndPosition)1 Utils (org.apache.flink.connector.file.src.util.Utils)1 PartitionFieldExtractor (org.apache.flink.connector.file.table.PartitionFieldExtractor)1 FileStatus (org.apache.flink.core.fs.FileStatus)1 Path (org.apache.flink.core.fs.Path)1