Search in sources :

Example 16 with MergeOnReadInputSplit

use of org.apache.hudi.table.format.mor.MergeOnReadInputSplit in project hudi by apache.

the class TestStreamReadOperator method testWriteRecords.

@Test
void testWriteRecords() throws Exception {
    TestData.writeData(TestData.DATA_SET_INSERT, conf);
    try (OneInputStreamOperatorTestHarness<MergeOnReadInputSplit, RowData> harness = createReader()) {
        harness.setup();
        harness.open();
        SteppingMailboxProcessor processor = createLocalMailbox(harness);
        StreamReadMonitoringFunction func = TestUtils.getMonitorFunc(conf);
        List<MergeOnReadInputSplit> splits = generateSplits(func);
        assertThat("Should have 4 splits", splits.size(), is(4));
        for (MergeOnReadInputSplit split : splits) {
            // Process this element to enqueue to mail-box.
            harness.processElement(split, -1);
            // Run the mail-box once to read all records from the given split.
            assertThat("Should process 1 split", processor.runMailboxStep());
        }
        // Assert the output has expected elements.
        TestData.assertRowDataEquals(harness.extractOutputValues(), TestData.DATA_SET_INSERT);
        TestData.writeData(TestData.DATA_SET_UPDATE_INSERT, conf);
        final List<MergeOnReadInputSplit> splits2 = generateSplits(func);
        assertThat("Should have 4 splits", splits2.size(), is(4));
        for (MergeOnReadInputSplit split : splits2) {
            // Process this element to enqueue to mail-box.
            harness.processElement(split, -1);
            // Run the mail-box once to read all records from the given split.
            assertThat("Should processed 1 split", processor.runMailboxStep());
        }
        // The result sets behaves like append only: DATA_SET_ONE + DATA_SET_TWO
        List<RowData> expected = new ArrayList<>(TestData.DATA_SET_INSERT);
        expected.addAll(TestData.DATA_SET_UPDATE_INSERT);
        TestData.assertRowDataEquals(harness.extractOutputValues(), expected);
    }
}
Also used : MergeOnReadInputSplit(org.apache.hudi.table.format.mor.MergeOnReadInputSplit) RowData(org.apache.flink.table.data.RowData) SteppingMailboxProcessor(org.apache.flink.streaming.runtime.tasks.mailbox.SteppingMailboxProcessor) ArrayList(java.util.ArrayList) Test(org.junit.jupiter.api.Test)

Aggregations

MergeOnReadInputSplit (org.apache.hudi.table.format.mor.MergeOnReadInputSplit)16 ArrayList (java.util.ArrayList)8 Test (org.junit.jupiter.api.Test)8 List (java.util.List)7 Collectors (java.util.stream.Collectors)6 Configuration (org.apache.flink.configuration.Configuration)6 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)6 FlinkOptions (org.apache.hudi.configuration.FlinkOptions)6 File (java.io.File)5 Comparator (java.util.Comparator)5 CountDownLatch (java.util.concurrent.CountDownLatch)5 TimeUnit (java.util.concurrent.TimeUnit)5 SourceFunction (org.apache.flink.streaming.api.functions.source.SourceFunction)5 StreamSource (org.apache.flink.streaming.api.operators.StreamSource)5 Watermark (org.apache.flink.streaming.api.watermark.Watermark)5 AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)5 RowData (org.apache.flink.table.data.RowData)5 StreamerUtil (org.apache.hudi.util.StreamerUtil)5 TestConfigurations (org.apache.hudi.utils.TestConfigurations)5 TestData (org.apache.hudi.utils.TestData)5