Search in sources :

Example 1 with Record

use of org.apache.inlong.sort.flink.Record in project incubator-inlong by apache.

the class RecordTransformerTest method testRecordMatchSerializer.

@Test
public void testRecordMatchSerializer() throws Exception {
    final int bufferSize = 1024;
    final RecordTransformer transformer = new RecordTransformer(bufferSize);
    final FieldInfo field1 = new FieldInfo("field1", new LongFormatInfo());
    final FieldInfo field2 = new FieldInfo("field2", new StringFormatInfo());
    final TestingSinkInfo sinkInfo = new TestingSinkInfo(new FieldInfo[] { field1, field2 });
    final DataFlowInfo dataFlowInfo = new DataFlowInfo(1L, new EmptySourceInfo(), sinkInfo);
    transformer.addDataFlow(dataFlowInfo);
    Map<Long, RowSerializer> rowSerializers = transformer.getRowSerializers();
    final Row row = new Row(2);
    row.setField(0, 1024L);
    row.setField(1, "9527");
    final Record record = new Record(1L, System.currentTimeMillis(), row);
    assertSame(record, transformer.matchRecordAndSerializerField(record, rowSerializers.get(1L)));
}
Also used : RowSerializer(org.apache.flink.api.java.typeutils.runtime.RowSerializer) SerializedRecord(org.apache.inlong.sort.flink.SerializedRecord) Record(org.apache.inlong.sort.flink.Record) LongFormatInfo(org.apache.inlong.sort.formats.common.LongFormatInfo) TestingSinkInfo(org.apache.inlong.sort.util.TestingUtils.TestingSinkInfo) Row(org.apache.flink.types.Row) EmptySourceInfo(org.apache.inlong.sort.util.TestingUtils.EmptySourceInfo) StringFormatInfo(org.apache.inlong.sort.formats.common.StringFormatInfo) FieldInfo(org.apache.inlong.sort.protocol.FieldInfo) DataFlowInfo(org.apache.inlong.sort.protocol.DataFlowInfo) Test(org.junit.Test)

Example 2 with Record

use of org.apache.inlong.sort.flink.Record in project incubator-inlong by apache.

the class RecordTransformerTest method testRecordNotMatchSerializer.

@Test
public void testRecordNotMatchSerializer() throws Exception {
    final int bufferSize = 1024;
    final RecordTransformer transformer = new RecordTransformer(bufferSize);
    final FieldInfo field1 = new FieldInfo("field1", new LongFormatInfo());
    final FieldInfo field2 = new FieldInfo("field2", new StringFormatInfo());
    final TestingSinkInfo sinkInfo = new TestingSinkInfo(new FieldInfo[] { field1, field2 });
    final DataFlowInfo dataFlowInfo = new DataFlowInfo(1L, new EmptySourceInfo(), sinkInfo);
    transformer.addDataFlow(dataFlowInfo);
    Map<Long, RowSerializer> rowSerializers = transformer.getRowSerializers();
    final Row oneFieldRow = new Row(1);
    oneFieldRow.setField(0, 1024L);
    final Record oneFieldRecord = new Record(1L, System.currentTimeMillis(), oneFieldRow);
    assertEquals(2, transformer.matchRecordAndSerializerField(oneFieldRecord, rowSerializers.get(1L)).getRow().getArity());
    final Row threeFieldRow = new Row(3);
    threeFieldRow.setField(0, 1024L);
    threeFieldRow.setField(1, "9527");
    threeFieldRow.setField(2, 2048);
    final Record threeFieldRecord = new Record(1L, System.currentTimeMillis(), threeFieldRow);
    assertEquals(2, transformer.matchRecordAndSerializerField(threeFieldRecord, rowSerializers.get(1L)).getRow().getArity());
}
Also used : RowSerializer(org.apache.flink.api.java.typeutils.runtime.RowSerializer) SerializedRecord(org.apache.inlong.sort.flink.SerializedRecord) Record(org.apache.inlong.sort.flink.Record) LongFormatInfo(org.apache.inlong.sort.formats.common.LongFormatInfo) TestingSinkInfo(org.apache.inlong.sort.util.TestingUtils.TestingSinkInfo) Row(org.apache.flink.types.Row) EmptySourceInfo(org.apache.inlong.sort.util.TestingUtils.EmptySourceInfo) StringFormatInfo(org.apache.inlong.sort.formats.common.StringFormatInfo) FieldInfo(org.apache.inlong.sort.protocol.FieldInfo) DataFlowInfo(org.apache.inlong.sort.protocol.DataFlowInfo) Test(org.junit.Test)

Example 3 with Record

use of org.apache.inlong.sort.flink.Record in project incubator-inlong by apache.

the class RecordTransformerTest method testSerializerNotMatchRecord.

@Test
public void testSerializerNotMatchRecord() throws Exception {
    final Row row = new Row(2);
    row.setField(0, 1024L);
    row.setField(1, "9527");
    final Record record = new Record(1L, System.currentTimeMillis(), row);
    final int bufferSize = 1024;
    final RecordTransformer transformer = new RecordTransformer(bufferSize);
    final FieldInfo field1 = new FieldInfo("field1", new LongFormatInfo());
    final TestingSinkInfo sinkInfo = new TestingSinkInfo(new FieldInfo[] { field1 });
    final DataFlowInfo dataFlowInfo = new DataFlowInfo(1L, new EmptySourceInfo(), sinkInfo);
    transformer.addDataFlow(dataFlowInfo);
    Map<Long, RowSerializer> rowSerializers = transformer.getRowSerializers();
    assertEquals(1, transformer.matchRecordAndSerializerField(record, rowSerializers.get(1L)).getRow().getArity());
    final FieldInfo newField1 = new FieldInfo("field1", new LongFormatInfo());
    final FieldInfo newField2 = new FieldInfo("field2", new LongFormatInfo());
    final FieldInfo newField3 = new FieldInfo("field3", new LongFormatInfo());
    final TestingSinkInfo newSinkInfo = new TestingSinkInfo(new FieldInfo[] { newField1, newField2, newField3 });
    final DataFlowInfo newDataFlowInfo = new DataFlowInfo(1L, new EmptySourceInfo(), newSinkInfo);
    transformer.addDataFlow(newDataFlowInfo);
    Map<Long, RowSerializer> newRowSerializers = transformer.getRowSerializers();
    assertEquals(3, transformer.matchRecordAndSerializerField(record, newRowSerializers.get(1L)).getRow().getArity());
}
Also used : RowSerializer(org.apache.flink.api.java.typeutils.runtime.RowSerializer) SerializedRecord(org.apache.inlong.sort.flink.SerializedRecord) Record(org.apache.inlong.sort.flink.Record) Row(org.apache.flink.types.Row) LongFormatInfo(org.apache.inlong.sort.formats.common.LongFormatInfo) TestingSinkInfo(org.apache.inlong.sort.util.TestingUtils.TestingSinkInfo) EmptySourceInfo(org.apache.inlong.sort.util.TestingUtils.EmptySourceInfo) FieldInfo(org.apache.inlong.sort.protocol.FieldInfo) DataFlowInfo(org.apache.inlong.sort.protocol.DataFlowInfo) Test(org.junit.Test)

Example 4 with Record

use of org.apache.inlong.sort.flink.Record in project incubator-inlong by apache.

the class FieldMappingTransformer method transform.

public Record transform(Record sourceRecord) throws Exception {
    final FieldInfo[] sinkFieldInfos = sinkFieldInfoMap.get(sourceRecord.getDataflowId());
    if (sinkFieldInfos == null) {
        throw new Exception("No field info found for data flow:" + sourceRecord.getDataflowId());
    }
    final Row sourceRow = sourceRecord.getRow();
    final Row sinkRow = new Row(sinkFieldInfos.length);
    int fieldIndex = SOURCE_FIELD_SKIP_STEP;
    for (int i = 0; i < sinkFieldInfos.length; i++) {
        Object fieldValue = null;
        if (sinkFieldInfos[i] instanceof BuiltInFieldInfo) {
            BuiltInFieldInfo builtInFieldInfo = (BuiltInFieldInfo) sinkFieldInfos[i];
            fieldValue = transformBuiltInField(builtInFieldInfo, sourceRecord.getTimestampMillis());
        } else if (fieldIndex < sourceRow.getArity()) {
            fieldValue = sourceRow.getField(fieldIndex);
            fieldIndex++;
        }
        if (fieldValue == null) {
            fieldValue = getDefaultValue(sourceRecord.getDataflowId(), sinkFieldInfos[i].getFormatInfo());
        }
        sinkRow.setField(i, fieldValue);
    }
    return new Record(sourceRecord.getDataflowId(), sourceRecord.getTimestampMillis(), sinkRow);
}
Also used : BuiltInFieldInfo(org.apache.inlong.sort.protocol.BuiltInFieldInfo) Record(org.apache.inlong.sort.flink.Record) Row(org.apache.flink.types.Row) FieldInfo(org.apache.inlong.sort.protocol.FieldInfo) BuiltInFieldInfo(org.apache.inlong.sort.protocol.BuiltInFieldInfo)

Example 5 with Record

use of org.apache.inlong.sort.flink.Record in project incubator-inlong by apache.

the class RecordTransformer method toRecord.

public Record toRecord(SerializedRecord serializedRecord) throws Exception {
    final long dataFlowId = serializedRecord.getDataFlowId();
    dataInputDeserializer.setBuffer(serializedRecord.getData());
    RowSerializer rowSerializer = getRowSerializer(dataFlowId);
    final Row row;
    try {
        row = rowSerializer.deserialize(dataInputDeserializer);
        assert dataInputDeserializer.available() == 0;
    } catch (Exception | AssertionError e) {
        throw new Exception("Schema not match for data flow: " + dataFlowId);
    } finally {
        dataInputDeserializer.releaseArrays();
    }
    return new Record(dataFlowId, serializedRecord.getTimestampMillis(), row);
}
Also used : RowSerializer(org.apache.flink.api.java.typeutils.runtime.RowSerializer) Record(org.apache.inlong.sort.flink.Record) SerializedRecord(org.apache.inlong.sort.flink.SerializedRecord) Row(org.apache.flink.types.Row)

Aggregations

Record (org.apache.inlong.sort.flink.Record)15 SerializedRecord (org.apache.inlong.sort.flink.SerializedRecord)11 Row (org.apache.flink.types.Row)10 FieldInfo (org.apache.inlong.sort.protocol.FieldInfo)9 LongFormatInfo (org.apache.inlong.sort.formats.common.LongFormatInfo)8 DataFlowInfo (org.apache.inlong.sort.protocol.DataFlowInfo)8 Test (org.junit.Test)8 TestingSinkInfo (org.apache.inlong.sort.util.TestingUtils.TestingSinkInfo)7 StringFormatInfo (org.apache.inlong.sort.formats.common.StringFormatInfo)6 RowSerializer (org.apache.flink.api.java.typeutils.runtime.RowSerializer)5 EmptySourceInfo (org.apache.inlong.sort.util.TestingUtils.EmptySourceInfo)5 BuiltInFieldInfo (org.apache.inlong.sort.protocol.BuiltInFieldInfo)3 InLongMsgCsvDeserializationInfo (org.apache.inlong.sort.protocol.deserialization.InLongMsgCsvDeserializationInfo)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 Timestamp (java.sql.Timestamp)2 Configuration (org.apache.inlong.sort.configuration.Configuration)2 InLongMsgMixedSerializedRecord (org.apache.inlong.sort.flink.InLongMsgMixedSerializedRecord)2 SinkInfo (org.apache.inlong.sort.protocol.sink.SinkInfo)2 SourceInfo (org.apache.inlong.sort.protocol.source.SourceInfo)2 TestingSourceInfo (org.apache.inlong.sort.util.TestingUtils.TestingSourceInfo)2