Search in sources :

Example 71 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class ArrowReaderWriterTest method getTestData.

@Override
public RowData[] getTestData() {
    RowData row1 = StreamRecordUtils.row((byte) 1, (short) 2, 3, 4L, true, 1.0f, 1.0, "hello", "hello".getBytes(), DecimalData.fromUnscaledLong(1, 10, 3), 100, 3600000, 3600000, 3600000, 3600000, TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000, 100000), new GenericArrayData(new StringData[] { StringData.fromString("hello"), StringData.fromString("中文"), null }), GenericRowData.of(1, StringData.fromString("hello"), new GenericArrayData(new StringData[] { StringData.fromString("hello") }), TimestampData.fromEpochMillis(3600000), GenericRowData.of(1, StringData.fromString("hello"))));
    BinaryRowData row2 = StreamRecordUtils.binaryrow((byte) 1, (short) 2, 3, 4L, false, 1.0f, 1.0, "中文", "中文".getBytes(), DecimalData.fromUnscaledLong(1, 10, 3), 100, 3600000, 3600000, 3600000, 3600000, Tuple2.of(TimestampData.fromEpochMillis(3600000), 0), Tuple2.of(TimestampData.fromEpochMillis(3600000), 2), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 4), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 8), Tuple2.of(TimestampData.fromEpochMillis(3600000), 0), Tuple2.of(TimestampData.fromEpochMillis(3600000), 2), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 4), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 8), Tuple2.of(new GenericArrayData(new String[] { null, null, null }), new ArrayDataSerializer(new VarCharType())), Tuple2.of(GenericRowData.of(1, null, new GenericArrayData(new StringData[] { StringData.fromString("hello") }), null, GenericRowData.of(1, StringData.fromString("hello"))), new RowDataSerializer(rowFieldType)));
    RowData row3 = StreamRecordUtils.row(null, (short) 2, 3, 4L, false, 1.0f, 1.0, "中文", "中文".getBytes(), DecimalData.fromUnscaledLong(1, 10, 3), 100, 3600000, 3600000, 3600000, 3600000, TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000, 100000), new GenericArrayData(new String[] { null, null, null }), GenericRowData.of(1, null, new GenericArrayData(new StringData[] { StringData.fromString("hello") }), null, null));
    BinaryRowData row4 = StreamRecordUtils.binaryrow((byte) 1, null, 3, 4L, true, 1.0f, 1.0, "hello", "hello".getBytes(), DecimalData.fromUnscaledLong(1, 10, 3), 100, 3600000, 3600000, 3600000, 3600000, Tuple2.of(TimestampData.fromEpochMillis(3600000), 0), Tuple2.of(TimestampData.fromEpochMillis(3600000), 2), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 4), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 8), Tuple2.of(TimestampData.fromEpochMillis(3600000), 0), Tuple2.of(TimestampData.fromEpochMillis(3600000), 2), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 4), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 8), Tuple2.of(new GenericArrayData(new StringData[] { StringData.fromString("hello"), StringData.fromString("中文"), null }), new ArrayDataSerializer(new VarCharType())), Tuple2.of(GenericRowData.of(1, null, new GenericArrayData(new StringData[] { StringData.fromString("hello") }), null, null), new RowDataSerializer(rowFieldType)));
    RowData row5 = StreamRecordUtils.row(new Object[fieldTypes.size()]);
    BinaryRowData row6 = StreamRecordUtils.binaryrow(new Object[fieldTypes.size()]);
    return new RowData[] { row1, row2, row3, row4, row5, row6 };
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) GenericArrayData(org.apache.flink.table.data.GenericArrayData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) VarCharType(org.apache.flink.table.types.logical.VarCharType) StringData(org.apache.flink.table.data.StringData) ArrayDataSerializer(org.apache.flink.table.runtime.typeutils.ArrayDataSerializer) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer)

Example 72 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class ArrowSourceFunctionTestBase method testRestore.

@Test
public void testRestore() throws Exception {
    Tuple2<List<RowData>, Integer> testData = getTestData();
    final ArrowSourceFunction arrowSourceFunction = createTestArrowSourceFunction(testData.f0, testData.f1);
    final AbstractStreamOperatorTestHarness<RowData> testHarness = new AbstractStreamOperatorTestHarness<>(new StreamSource<>(arrowSourceFunction), 1, 1, 0);
    testHarness.open();
    final Throwable[] error = new Throwable[1];
    final MultiShotLatch latch = new MultiShotLatch();
    final AtomicInteger numOfEmittedElements = new AtomicInteger(0);
    final List<RowData> results = new ArrayList<>();
    final DummySourceContext<RowData> sourceContext = new DummySourceContext<RowData>() {

        @Override
        public void collect(RowData element) {
            if (numOfEmittedElements.get() == 2) {
                latch.trigger();
                // fail the source function at the second element
                throw new RuntimeException("Fail the arrow source");
            }
            results.add(typeSerializer.copy(element));
            numOfEmittedElements.incrementAndGet();
        }
    };
    // run the source asynchronously
    Thread runner = new Thread(() -> {
        try {
            arrowSourceFunction.run(sourceContext);
        } catch (Throwable t) {
            if (!t.getMessage().equals("Fail the arrow source")) {
                error[0] = t;
            }
        }
    });
    runner.start();
    if (!latch.isTriggered()) {
        latch.await();
    }
    OperatorSubtaskState snapshot;
    synchronized (sourceContext.getCheckpointLock()) {
        snapshot = testHarness.snapshot(0, 0);
    }
    runner.join();
    testHarness.close();
    final ArrowSourceFunction arrowSourceFunction2 = createTestArrowSourceFunction(testData.f0, testData.f1);
    AbstractStreamOperatorTestHarness testHarnessCopy = new AbstractStreamOperatorTestHarness(new StreamSource<>(arrowSourceFunction2), 1, 1, 0);
    testHarnessCopy.initializeState(snapshot);
    testHarnessCopy.open();
    // run the source asynchronously
    Thread runner2 = new Thread(() -> {
        try {
            arrowSourceFunction2.run(new DummySourceContext<RowData>() {

                @Override
                public void collect(RowData element) {
                    results.add(typeSerializer.copy(element));
                    if (numOfEmittedElements.incrementAndGet() == testData.f0.size()) {
                        latch.trigger();
                    }
                }
            });
        } catch (Throwable t) {
            error[0] = t;
        }
    });
    runner2.start();
    if (!latch.isTriggered()) {
        latch.await();
    }
    runner2.join();
    Assert.assertNull(error[0]);
    Assert.assertEquals(testData.f0.size(), numOfEmittedElements.get());
    checkElementsEquals(results, testData.f0);
}
Also used : MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) ArrayList(java.util.ArrayList) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RowData(org.apache.flink.table.data.RowData) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 73 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class PythonStreamGroupWindowAggregateOperatorTest method testGroupWindowAggregateFunction.

@Test
public void testGroupWindowAggregateFunction() throws Exception {
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = getTestHarness(new Configuration());
    long initialTime = 0L;
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
    testHarness.open();
    testHarness.processElement(newRecord(true, initialTime + 1, "c1", "c2", 0L, 0L));
    testHarness.processElement(newRecord(true, initialTime + 2, "c1", "c4", 1L, 6000L));
    testHarness.processElement(newRecord(true, initialTime + 3, "c1", "c6", 2L, 10000L));
    testHarness.processElement(newRecord(true, initialTime + 4, "c2", "c8", 3L, 0L));
    testHarness.processElement(newRecord(true, initialTime + 5, "c3", "c8", 3L, 0L));
    testHarness.processElement(newRecord(false, initialTime + 6, "c3", "c8", 3L, 0L));
    testHarness.processWatermark(Long.MAX_VALUE);
    testHarness.close();
    expectedOutput.add(newWindowRecord(-5000L, 5000L, "c1", 0L));
    expectedOutput.add(newStateCleanupRecord(-5000L, 5000L, "c1"));
    expectedOutput.add(newStateCleanupRecord(-5000L, 5000L, "c3"));
    expectedOutput.add(newWindowRecord(-5000L, 5000L, "c2", 3L));
    expectedOutput.add(newStateCleanupRecord(-5000L, 5000L, "c2"));
    expectedOutput.add(newWindowRecord(0, 10000L, "c1", 0L));
    expectedOutput.add(newStateCleanupRecord(0L, 10000L, "c1"));
    expectedOutput.add(newWindowRecord(0, 10000L, "c2", 3L));
    expectedOutput.add(newStateCleanupRecord(0L, 10000L, "c2"));
    expectedOutput.add(newStateCleanupRecord(0L, 10000L, "c3"));
    expectedOutput.add(newWindowRecord(5000L, 15000L, "c1", 1L));
    expectedOutput.add(newStateCleanupRecord(5000L, 15000L, "c1"));
    expectedOutput.add(newWindowRecord(10000L, 20000L, "c1", 2L));
    expectedOutput.add(newStateCleanupRecord(10000L, 20000L, "c1"));
    expectedOutput.add(new Watermark(Long.MAX_VALUE));
    assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
}
Also used : RowData(org.apache.flink.table.data.RowData) Configuration(org.apache.flink.configuration.Configuration) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 74 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class AbstractBatchArrowPythonAggregateFunctionOperatorTest method getTestHarness.

public OneInputStreamOperatorTestHarness<RowData, RowData> getTestHarness(Configuration config) throws Exception {
    RowType inputType = getInputType();
    RowType outputType = getOutputType();
    AbstractArrowPythonAggregateFunctionOperator operator = getTestOperator(config, new PythonFunctionInfo[] { new PythonFunctionInfo(PythonScalarFunctionOperatorTestBase.DummyPythonFunction.INSTANCE, new Integer[] { 0 }) }, inputType, outputType, new int[] { 0 }, new int[] { 2 });
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = new OneInputStreamOperatorTestHarness<>(operator);
    testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.PYTHON, 0.5);
    testHarness.setup(new RowDataSerializer(outputType));
    return testHarness;
}
Also used : PythonFunctionInfo(org.apache.flink.table.functions.python.PythonFunctionInfo) RowData(org.apache.flink.table.data.RowData) AbstractArrowPythonAggregateFunctionOperator(org.apache.flink.table.runtime.operators.python.aggregate.arrow.AbstractArrowPythonAggregateFunctionOperator) RowType(org.apache.flink.table.types.logical.RowType) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer)

Example 75 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class AbstractStreamArrowPythonAggregateFunctionOperatorTest method getTestHarness.

public OneInputStreamOperatorTestHarness<RowData, RowData> getTestHarness(Configuration config) throws Exception {
    RowType inputType = getInputType();
    RowType outputType = getOutputType();
    AbstractArrowPythonAggregateFunctionOperator operator = getTestOperator(config, new PythonFunctionInfo[] { new PythonFunctionInfo(PythonScalarFunctionOperatorTestBase.DummyPythonFunction.INSTANCE, new Integer[] { 0 }) }, inputType, outputType, new int[] { 0 }, new int[] { 2 });
    int[] grouping = new int[] { 0 };
    RowDataKeySelector keySelector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(getInputType()));
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, keySelector, keySelector.getProducedType());
    testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.PYTHON, 0.5);
    testHarness.setup(new RowDataSerializer(outputType));
    return testHarness;
}
Also used : PythonFunctionInfo(org.apache.flink.table.functions.python.PythonFunctionInfo) RowData(org.apache.flink.table.data.RowData) AbstractArrowPythonAggregateFunctionOperator(org.apache.flink.table.runtime.operators.python.aggregate.arrow.AbstractArrowPythonAggregateFunctionOperator) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) RowType(org.apache.flink.table.types.logical.RowType) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer)

Aggregations

RowData (org.apache.flink.table.data.RowData)602 Test (org.junit.Test)201 GenericRowData (org.apache.flink.table.data.GenericRowData)178 ArrayList (java.util.ArrayList)109 RowType (org.apache.flink.table.types.logical.RowType)105 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)90 Watermark (org.apache.flink.streaming.api.watermark.Watermark)84 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)72 Transformation (org.apache.flink.api.dag.Transformation)70 Configuration (org.apache.flink.configuration.Configuration)68 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)67 List (java.util.List)65 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)54 DataType (org.apache.flink.table.types.DataType)52 Map (java.util.Map)42 LogicalType (org.apache.flink.table.types.logical.LogicalType)41 TableException (org.apache.flink.table.api.TableException)34 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)33 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)32 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)31