Search in sources :

Example 1 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BatchArrowPythonGroupWindowAggregateFunctionOperator method bufferInput.

@Override
public void bufferInput(RowData input) throws Exception {
    BinaryRowData currentKey = groupKeyProjection.apply(input).copy();
    if (isNewKey(currentKey)) {
        if (lastGroupKey != null) {
            invokeCurrentBatch();
        }
        lastGroupKey = currentKey;
        lastGroupSet = groupSetProjection.apply(input).copy();
    }
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData)

Example 2 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BatchArrowPythonGroupWindowAggregateFunctionOperator method triggerWindowProcess.

private void triggerWindowProcess() throws Exception {
    while (windowsGrouping.hasTriggerWindow()) {
        RowIterator<BinaryRowData> elementIterator = windowsGrouping.buildTriggerWindowElementsIterator();
        while (elementIterator.advanceNext()) {
            BinaryRowData winElement = elementIterator.getRow();
            arrowSerializer.write(getFunctionInput(winElement));
            currentBatchCount++;
        }
        if (currentBatchCount > 0) {
            TimeWindow currentWindow = windowsGrouping.getTriggerWindow();
            inputKeyAndWindow.add(Tuple2.of(lastGroupSet, currentWindow));
            arrowSerializer.finishCurrentBatch();
            pythonFunctionRunner.process(baos.toByteArray());
            elementCount += currentBatchCount;
            checkInvokeFinishBundleByCount();
            currentBatchCount = 0;
            baos.reset();
            arrowSerializer.resetWriter();
        }
    }
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) TimeWindow(org.apache.flink.table.runtime.operators.window.TimeWindow)

Example 3 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BatchArrowPythonGroupAggregateFunctionOperator method bufferInput.

@Override
public void bufferInput(RowData input) throws Exception {
    BinaryRowData currentKey = groupKeyProjection.apply(input).copy();
    if (isNewKey(currentKey)) {
        if (lastGroupKey != null) {
            invokeCurrentBatch();
        }
        lastGroupKey = currentKey;
        lastGroupSet = groupSetProjection.apply(input).copy();
        forwardedInputQueue.add(lastGroupSet);
    }
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData)

Example 4 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class ArrowReaderWriterTest method getTestData.

@Override
public RowData[] getTestData() {
    RowData row1 = StreamRecordUtils.row((byte) 1, (short) 2, 3, 4L, true, 1.0f, 1.0, "hello", "hello".getBytes(), DecimalData.fromUnscaledLong(1, 10, 3), 100, 3600000, 3600000, 3600000, 3600000, TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000, 100000), new GenericArrayData(new StringData[] { StringData.fromString("hello"), StringData.fromString("中文"), null }), GenericRowData.of(1, StringData.fromString("hello"), new GenericArrayData(new StringData[] { StringData.fromString("hello") }), TimestampData.fromEpochMillis(3600000), GenericRowData.of(1, StringData.fromString("hello"))));
    BinaryRowData row2 = StreamRecordUtils.binaryrow((byte) 1, (short) 2, 3, 4L, false, 1.0f, 1.0, "中文", "中文".getBytes(), DecimalData.fromUnscaledLong(1, 10, 3), 100, 3600000, 3600000, 3600000, 3600000, Tuple2.of(TimestampData.fromEpochMillis(3600000), 0), Tuple2.of(TimestampData.fromEpochMillis(3600000), 2), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 4), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 8), Tuple2.of(TimestampData.fromEpochMillis(3600000), 0), Tuple2.of(TimestampData.fromEpochMillis(3600000), 2), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 4), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 8), Tuple2.of(new GenericArrayData(new String[] { null, null, null }), new ArrayDataSerializer(new VarCharType())), Tuple2.of(GenericRowData.of(1, null, new GenericArrayData(new StringData[] { StringData.fromString("hello") }), null, GenericRowData.of(1, StringData.fromString("hello"))), new RowDataSerializer(rowFieldType)));
    RowData row3 = StreamRecordUtils.row(null, (short) 2, 3, 4L, false, 1.0f, 1.0, "中文", "中文".getBytes(), DecimalData.fromUnscaledLong(1, 10, 3), 100, 3600000, 3600000, 3600000, 3600000, TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000), TimestampData.fromEpochMillis(3600000, 100000), TimestampData.fromEpochMillis(3600000, 100000), new GenericArrayData(new String[] { null, null, null }), GenericRowData.of(1, null, new GenericArrayData(new StringData[] { StringData.fromString("hello") }), null, null));
    BinaryRowData row4 = StreamRecordUtils.binaryrow((byte) 1, null, 3, 4L, true, 1.0f, 1.0, "hello", "hello".getBytes(), DecimalData.fromUnscaledLong(1, 10, 3), 100, 3600000, 3600000, 3600000, 3600000, Tuple2.of(TimestampData.fromEpochMillis(3600000), 0), Tuple2.of(TimestampData.fromEpochMillis(3600000), 2), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 4), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 8), Tuple2.of(TimestampData.fromEpochMillis(3600000), 0), Tuple2.of(TimestampData.fromEpochMillis(3600000), 2), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 4), Tuple2.of(TimestampData.fromEpochMillis(3600000, 100000), 8), Tuple2.of(new GenericArrayData(new StringData[] { StringData.fromString("hello"), StringData.fromString("中文"), null }), new ArrayDataSerializer(new VarCharType())), Tuple2.of(GenericRowData.of(1, null, new GenericArrayData(new StringData[] { StringData.fromString("hello") }), null, null), new RowDataSerializer(rowFieldType)));
    RowData row5 = StreamRecordUtils.row(new Object[fieldTypes.size()]);
    BinaryRowData row6 = StreamRecordUtils.binaryrow(new Object[fieldTypes.size()]);
    return new RowData[] { row1, row2, row3, row4, row5, row6 };
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) GenericArrayData(org.apache.flink.table.data.GenericArrayData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) VarCharType(org.apache.flink.table.types.logical.VarCharType) StringData(org.apache.flink.table.data.StringData) ArrayDataSerializer(org.apache.flink.table.runtime.typeutils.ArrayDataSerializer) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer)

Example 5 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class UpsertKafkaDynamicTableFactoryTest method testBufferedTableSink.

@SuppressWarnings("rawtypes")
@Test
public void testBufferedTableSink() {
    // Construct table sink using options and table sink factory.
    final DynamicTableSink actualSink = createTableSink(SINK_SCHEMA, getModifiedOptions(getFullSinkOptions(), options -> {
        options.put("sink.buffer-flush.max-rows", "100");
        options.put("sink.buffer-flush.interval", "1s");
    }));
    final DynamicTableSink expectedSink = createExpectedSink(SINK_SCHEMA.toPhysicalRowDataType(), keyEncodingFormat, valueEncodingFormat, SINK_KEY_FIELDS, SINK_VALUE_FIELDS, null, SINK_TOPIC, UPSERT_KAFKA_SINK_PROPERTIES, DeliveryGuarantee.AT_LEAST_ONCE, new SinkBufferFlushMode(100, 1000L), null);
    // Test sink format.
    final KafkaDynamicSink actualUpsertKafkaSink = (KafkaDynamicSink) actualSink;
    assertEquals(expectedSink, actualSink);
    // Test kafka producer.
    DynamicTableSink.SinkRuntimeProvider provider = actualUpsertKafkaSink.getSinkRuntimeProvider(new SinkRuntimeProviderContext(false));
    assertThat(provider, instanceOf(DataStreamSinkProvider.class));
    final DataStreamSinkProvider sinkProvider = (DataStreamSinkProvider) provider;
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    sinkProvider.consumeDataStream(n -> Optional.empty(), env.fromElements(new BinaryRowData(1)));
    final StreamOperatorFactory<?> sinkOperatorFactory = env.getStreamGraph().getStreamNodes().stream().filter(n -> n.getOperatorName().contains("Writer")).findFirst().orElseThrow(() -> new RuntimeException("Expected operator with name Sink in stream graph.")).getOperatorFactory();
    assertThat(sinkOperatorFactory, instanceOf(SinkWriterOperatorFactory.class));
    org.apache.flink.api.connector.sink2.Sink sink = ((SinkWriterOperatorFactory) sinkOperatorFactory).getSink();
    assertThat(sink, instanceOf(ReducingUpsertSink.class));
}
Also used : DataType(org.apache.flink.table.types.DataType) AtomicDataType(org.apache.flink.table.types.AtomicDataType) Arrays(java.util.Arrays) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation) DataStreamScanProvider(org.apache.flink.table.connector.source.DataStreamScanProvider) CoreMatchers.instanceOf(org.hamcrest.CoreMatchers.instanceOf) DecodingFormat(org.apache.flink.table.connector.format.DecodingFormat) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) FactoryMocks.createTableSink(org.apache.flink.table.factories.utils.FactoryMocks.createTableSink) ConfluentRegistryAvroSerializationSchema(org.apache.flink.formats.avro.registry.confluent.ConfluentRegistryAvroSerializationSchema) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) FlinkMatchers.containsCause(org.apache.flink.core.testutils.FlinkMatchers.containsCause) AVRO_CONFLUENT(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptionsUtil.AVRO_CONFLUENT) AvroRowDataSerializationSchema(org.apache.flink.formats.avro.AvroRowDataSerializationSchema) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) DataStreamSinkProvider(org.apache.flink.table.connector.sink.DataStreamSinkProvider) ValidationException(org.apache.flink.table.api.ValidationException) Optional(java.util.Optional) ScanRuntimeProviderContext(org.apache.flink.table.runtime.connector.source.ScanRuntimeProviderContext) SerializationSchema(org.apache.flink.api.common.serialization.SerializationSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) TestFormatFactory(org.apache.flink.table.factories.TestFormatFactory) DeliveryGuarantee(org.apache.flink.connector.base.DeliveryGuarantee) EncodingFormat(org.apache.flink.table.connector.format.EncodingFormat) Sink(org.apache.flink.api.connector.sink2.Sink) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) Column(org.apache.flink.table.catalog.Column) HashMap(java.util.HashMap) RowType(org.apache.flink.table.types.logical.RowType) ScanTableSource(org.apache.flink.table.connector.source.ScanTableSource) SinkV2Provider(org.apache.flink.table.connector.sink.SinkV2Provider) KafkaSink(org.apache.flink.connector.kafka.sink.KafkaSink) RowDataToAvroConverters(org.apache.flink.formats.avro.RowDataToAvroConverters) FactoryMocks.createTableSource(org.apache.flink.table.factories.utils.FactoryMocks.createTableSource) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SinkWriterOperatorFactory(org.apache.flink.streaming.runtime.operators.sink.SinkWriterOperatorFactory) ExpectedException(org.junit.rules.ExpectedException) RowData(org.apache.flink.table.data.RowData) Properties(java.util.Properties) Assert.assertTrue(org.junit.Assert.assertTrue) DataTypes(org.apache.flink.table.api.DataTypes) VarCharType(org.apache.flink.table.types.logical.VarCharType) Test(org.junit.Test) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) KafkaSourceEnumState(org.apache.flink.connector.kafka.source.enumerator.KafkaSourceEnumState) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) Consumer(java.util.function.Consumer) StartupMode(org.apache.flink.streaming.connectors.kafka.config.StartupMode) Rule(org.junit.Rule) KafkaSource(org.apache.flink.connector.kafka.source.KafkaSource) UniqueConstraint(org.apache.flink.table.catalog.UniqueConstraint) SinkRuntimeProviderContext(org.apache.flink.table.runtime.connector.sink.SinkRuntimeProviderContext) FactoryMocks(org.apache.flink.table.factories.utils.FactoryMocks) KafkaPartitionSplit(org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit) Transformation(org.apache.flink.api.dag.Transformation) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) AvroSchemaConverter(org.apache.flink.formats.avro.typeutils.AvroSchemaConverter) SinkRuntimeProviderContext(org.apache.flink.table.runtime.connector.sink.SinkRuntimeProviderContext) Sink(org.apache.flink.api.connector.sink2.Sink) DataStreamSinkProvider(org.apache.flink.table.connector.sink.DataStreamSinkProvider) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) SinkWriterOperatorFactory(org.apache.flink.streaming.runtime.operators.sink.SinkWriterOperatorFactory) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)173 Test (org.junit.Test)81 BinaryRowWriter (org.apache.flink.table.data.writer.BinaryRowWriter)54 RowData (org.apache.flink.table.data.RowData)31 ArrayList (java.util.ArrayList)30 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)22 UniformBinaryRowGenerator (org.apache.flink.table.runtime.util.UniformBinaryRowGenerator)21 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)16 MemorySegment (org.apache.flink.core.memory.MemorySegment)15 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)14 GenericRowData (org.apache.flink.table.data.GenericRowData)13 Random (java.util.Random)12 BinaryRowDataSerializer (org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)12 HashMap (java.util.HashMap)9 RowDataSerializer (org.apache.flink.table.runtime.typeutils.RowDataSerializer)9 Map (java.util.Map)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)7 RandomAccessInputView (org.apache.flink.runtime.io.disk.RandomAccessInputView)6 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)6