use of org.apache.inlong.sort.flink.SerializedRecord in project incubator-inlong by apache.
the class MultiTopicPulsarSourceFunctionTest method testPulsarSourceFunction.
@Test
public void testPulsarSourceFunction() throws Exception {
MultiTopicPulsarSourceFunction source = new MultiTopicPulsarSourceFunction(testMetaManagerUtil.getConfig());
TestSourceContext<SerializedRecord> sourceContext = new TestSourceContext<>();
source.setRuntimeContext(new MockStreamingRuntimeContext(false, 1, 0));
source.initializeState(new MockFunctionInitializationContext(false, new MockOperatorStateStore(null, null)));
source.open(new Configuration());
final CheckedThread runThread = new CheckedThread() {
@Override
public void go() throws Exception {
source.run(sourceContext);
}
};
runThread.start();
List<SerializedRecord> records = drain(sourceContext, 5);
assertEquals(5, records.size());
source.cancel();
source.close();
runThread.stop();
}
use of org.apache.inlong.sort.flink.SerializedRecord in project incubator-inlong by apache.
the class RecordTransformer method toRecord.
public Record toRecord(SerializedRecord serializedRecord) throws Exception {
final long dataFlowId = serializedRecord.getDataFlowId();
dataInputDeserializer.setBuffer(serializedRecord.getData());
RowSerializer rowSerializer = getRowSerializer(dataFlowId);
final Row row;
try {
row = rowSerializer.deserialize(dataInputDeserializer);
assert dataInputDeserializer.available() == 0;
} catch (Exception | AssertionError e) {
throw new Exception("Schema not match for data flow: " + dataFlowId);
} finally {
dataInputDeserializer.releaseArrays();
}
return new Record(dataFlowId, serializedRecord.getTimestampMillis(), row);
}
use of org.apache.inlong.sort.flink.SerializedRecord in project incubator-inlong by apache.
the class RecordTransformer method toSerializedRecord.
/**
* Serialize Record.
* @param record record to be serialized
* @return serialized record
*/
public SerializedRecord toSerializedRecord(Record record) throws Exception {
if (dataOutputSerializer == null) {
dataOutputSerializer = new DataOutputSerializer(serializationInitialBufferSize);
}
final long dataFlowId = record.getDataflowId();
RowSerializer rowSerializer = getRowSerializer(dataFlowId);
Record newRecord = matchRecordAndSerializerField(record, rowSerializer);
final SerializedRecord serializedRecord;
try {
rowSerializer.serialize(newRecord.getRow(), dataOutputSerializer);
serializedRecord = new SerializedRecord(dataFlowId, record.getTimestampMillis(), dataOutputSerializer.getCopyOfBuffer());
} catch (Exception e) {
throw new Exception("Schema not match for data flow: " + dataFlowId);
} finally {
dataOutputSerializer.clear();
}
return serializedRecord;
}
use of org.apache.inlong.sort.flink.SerializedRecord in project incubator-inlong by apache.
the class DeserializationSchema method processElement.
@Override
public void processElement(SerializedRecord serializedRecord, Context context, Collector<SerializedRecord> collector) throws Exception {
try {
if (enableOutputMetrics && !config.getString(Constants.SOURCE_TYPE).equals(Constants.SOURCE_TYPE_TUBE)) {
// If source is tube, we do not output metrics of package number
final MetricData metricData = new MetricData(// since source could not have side-outputs, so it outputs metrics for source here
MetricSource.SOURCE, MetricType.SUCCESSFUL, serializedRecord.getTimestampMillis(), serializedRecord.getDataFlowId(), "", 1L);
context.output(METRIC_DATA_OUTPUT_TAG, metricData);
}
final CallbackCollector<Record> transformCollector = new CallbackCollector<>(sourceRecord -> {
final Record sinkRecord = fieldMappingTransformer.transform(sourceRecord);
if (enableOutputMetrics) {
MetricData metricData = new MetricData(// TODO, outputs this metric in Sink Function
MetricSource.SINK, MetricType.SUCCESSFUL, sinkRecord.getTimestampMillis(), sinkRecord.getDataflowId(), "", 1);
context.output(METRIC_DATA_OUTPUT_TAG, metricData);
}
SerializedRecord serializedSinkRecord = recordTransformer.toSerializedRecord(sinkRecord);
if (auditImp != null) {
Pair<String, String> groupIdAndStreamId = inLongGroupIdAndStreamIdMap.getOrDefault(serializedRecord.getDataFlowId(), Pair.of("", ""));
auditImp.add(Constants.METRIC_AUDIT_ID_FOR_INPUT, groupIdAndStreamId.getLeft(), groupIdAndStreamId.getRight(), sinkRecord.getTimestampMillis(), 1, serializedSinkRecord.getData().length);
}
collector.collect(serializedSinkRecord);
});
if (serializedRecord instanceof InLongMsgMixedSerializedRecord) {
final InLongMsgMixedSerializedRecord inlongmsgRecord = (InLongMsgMixedSerializedRecord) serializedRecord;
synchronized (schemaLock) {
multiTenancyInLongMsgMixedDeserializer.deserialize(inlongmsgRecord, transformCollector);
}
} else {
synchronized (schemaLock) {
multiTenancyDeserializer.deserialize(serializedRecord, transformCollector);
}
}
} catch (Exception e) {
if (enableOutputMetrics && !config.getString(Constants.SOURCE_TYPE).equals(Constants.SOURCE_TYPE_TUBE)) {
MetricData metricData = new MetricData(MetricSource.DESERIALIZATION, MetricType.ABANDONED, serializedRecord.getTimestampMillis(), serializedRecord.getDataFlowId(), (e.getMessage() == null || e.getMessage().isEmpty()) ? "Exception caught" : e.getMessage(), 1);
context.output(METRIC_DATA_OUTPUT_TAG, metricData);
}
LOG.warn("Abandon data", e);
}
}
use of org.apache.inlong.sort.flink.SerializedRecord in project incubator-inlong by apache.
the class MultiTenancyDeserializer method updateDataFlow.
@Override
public void updateDataFlow(DataFlowInfo dataFlowInfo) {
final DeserializationInfo deserializationInfo = dataFlowInfo.getSourceInfo().getDeserializationInfo();
final Deserializer<SerializedRecord, Record> deserializer = generateDeserializer(dataFlowInfo.getSourceInfo().getFields(), deserializationInfo);
deserializers.put(dataFlowInfo.getId(), deserializer);
}
Aggregations