use of org.apache.inlong.sort.flink.metrics.MetricData in project incubator-inlong by apache.
the class Entrance method main.
/**
* Entrance of a flink job.
*/
public static void main(String[] args) throws Exception {
final ParameterTool parameter = ParameterTool.fromArgs(args);
final Configuration config = parameter.getConfiguration();
final String clusterId = checkNotNull(config.getString(Constants.CLUSTER_ID));
final String sourceType = checkNotNull(config.getString(Constants.SOURCE_TYPE));
final String sinkType = checkNotNull(config.getString(Constants.SINK_TYPE));
final int sourceParallelism = config.getInteger(Constants.SOURCE_PARALLELISM);
final int deserializationParallelism = config.getInteger(Constants.DESERIALIZATION_PARALLELISM);
final int sinkParallelism = config.getInteger(Constants.SINK_PARALLELISM);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.enableCheckpointing(config.getInteger(Constants.CHECKPOINT_INTERVAL_MS));
env.getCheckpointConfig().setMinPauseBetweenCheckpoints(config.getInteger(Constants.MIN_PAUSE_BETWEEN_CHECKPOINTS_MS));
env.getCheckpointConfig().setCheckpointTimeout(config.getInteger(Constants.CHECKPOINT_TIMEOUT_MS));
env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
// Data stream
DataStream<SerializedRecord> sourceStream;
if (sourceType.equals(Constants.SOURCE_TYPE_TUBE)) {
sourceStream = env.addSource(new MultiTopicTubeSourceFunction(config)).setParallelism(sourceParallelism).uid(Constants.SOURCE_UID).name("TubeMQ source").rebalance();
} else if (sourceType.equals(Constants.SOURCE_TYPE_PULSAR)) {
sourceStream = env.addSource(new MultiTopicPulsarSourceFunction(config)).setParallelism(sourceParallelism).uid(Constants.SOURCE_UID).name("Pulsar source").rebalance();
} else {
throw new IllegalArgumentException("Unsupported source type " + sourceType);
}
final SingleOutputStreamOperator<SerializedRecord> deserializationStream = sourceStream.process(new DeserializationSchema(config)).setParallelism(deserializationParallelism).uid(Constants.DESERIALIZATION_SCHEMA_UID).name("Deserialization");
if (sinkType.equals(Constants.SINK_TYPE_CLICKHOUSE)) {
deserializationStream.process(new ClickHouseMultiSinkFunction(config)).setParallelism(sinkParallelism).uid(Constants.SINK_UID).name("Clickhouse Sink");
} else if (sinkType.equals(SINK_TYPE_DORIS)) {
deserializationStream.process(new DorisMultiSinkFunction(config)).uid(Constants.SINK_UID).name("Doris Sink").setParallelism(sinkParallelism);
} else if (sinkType.equals(SINK_TYPE_HIVE)) {
deserializationStream.process(new HiveMultiTenantWriter(config)).name("Hive Sink").uid(Constants.SINK_UID).setParallelism(sinkParallelism).process(new HiveMultiTenantCommitter(config)).name("hive Committer").setParallelism(config.getInteger(Constants.COMMITTER_PARALLELISM));
} else {
throw new IllegalArgumentException("Unsupported sink type " + sinkType);
}
// Metric stream
final boolean enableOutputMetrics = config.getBoolean(Constants.METRICS_ENABLE_OUTPUT);
if (enableOutputMetrics) {
final int metricsAggregatorParallelism = config.getInteger(Constants.METRICS_AGGREGATOR_PARALLELISM);
final int metricsTimestampWatermarkAssignerParallelism = config.getInteger(Constants.METRICS_TIMESTAMP_WATERMARK_ASSIGNER_PARALLELISM);
final int metricsMySQLSinkParallelism = config.getInteger(Constants.METRICS_SINK_PARALLELISM);
final OutputTag<MetricData> outputTag = new OutputTag<MetricData>(Constants.METRIC_DATA_OUTPUT_TAG_ID) {
};
final DataStream<MetricData> metricsDataStream = deserializationStream.getSideOutput(outputTag).assignTimestampsAndWatermarks(new MetricsAssignerWithPeriodicWatermarks()).setParallelism(metricsTimestampWatermarkAssignerParallelism).uid(Constants.METRICS_TIMESTAMP_AND_WATERMARK_ASSIGNER_UID).name("Metrics timestamp/watermark assigner");
final DataStream<MetricData> metricsAggregatorStream = metricsDataStream.keyBy((KeySelector<MetricData, String>) MetricData::getKey).window(TumblingEventTimeWindows.of(Time.minutes(config.getInteger(Constants.METRICS_AGGREGATOR_WINDOW_SIZE)))).allowedLateness(Time.milliseconds(Long.MAX_VALUE)).aggregate(new MetricsAggregateFunction(), new MetricsProcessWindowFunction()).setParallelism(metricsAggregatorParallelism).uid(Constants.METRICS_AGGREGATOR_UID).name("Metrics aggregator");
metricsAggregatorStream.addSink(new MetricsLogSink()).setParallelism(metricsMySQLSinkParallelism).uid(Constants.METRICS_SINK_UID).name("Metrics sink");
}
env.execute(clusterId);
}
use of org.apache.inlong.sort.flink.metrics.MetricData in project incubator-inlong by apache.
the class DeserializationSchema method processElement.
@Override
public void processElement(SerializedRecord serializedRecord, Context context, Collector<SerializedRecord> collector) throws Exception {
try {
if (enableOutputMetrics && !config.getString(Constants.SOURCE_TYPE).equals(Constants.SOURCE_TYPE_TUBE)) {
// If source is tube, we do not output metrics of package number
final MetricData metricData = new MetricData(// since source could not have side-outputs, so it outputs metrics for source here
MetricSource.SOURCE, MetricType.SUCCESSFUL, serializedRecord.getTimestampMillis(), serializedRecord.getDataFlowId(), "", 1L);
context.output(METRIC_DATA_OUTPUT_TAG, metricData);
}
final CallbackCollector<Record> transformCollector = new CallbackCollector<>(sourceRecord -> {
final Record sinkRecord = fieldMappingTransformer.transform(sourceRecord);
if (enableOutputMetrics) {
MetricData metricData = new MetricData(// TODO, outputs this metric in Sink Function
MetricSource.SINK, MetricType.SUCCESSFUL, sinkRecord.getTimestampMillis(), sinkRecord.getDataflowId(), "", 1);
context.output(METRIC_DATA_OUTPUT_TAG, metricData);
}
SerializedRecord serializedSinkRecord = recordTransformer.toSerializedRecord(sinkRecord);
if (auditImp != null) {
Pair<String, String> groupIdAndStreamId = inLongGroupIdAndStreamIdMap.getOrDefault(serializedRecord.getDataFlowId(), Pair.of("", ""));
auditImp.add(Constants.METRIC_AUDIT_ID_FOR_INPUT, groupIdAndStreamId.getLeft(), groupIdAndStreamId.getRight(), sinkRecord.getTimestampMillis(), 1, serializedSinkRecord.getData().length);
}
collector.collect(serializedSinkRecord);
});
if (serializedRecord instanceof InLongMsgMixedSerializedRecord) {
final InLongMsgMixedSerializedRecord inlongmsgRecord = (InLongMsgMixedSerializedRecord) serializedRecord;
synchronized (schemaLock) {
multiTenancyInLongMsgMixedDeserializer.deserialize(inlongmsgRecord, transformCollector);
}
} else {
synchronized (schemaLock) {
multiTenancyDeserializer.deserialize(serializedRecord, transformCollector);
}
}
} catch (Exception e) {
if (enableOutputMetrics && !config.getString(Constants.SOURCE_TYPE).equals(Constants.SOURCE_TYPE_TUBE)) {
MetricData metricData = new MetricData(MetricSource.DESERIALIZATION, MetricType.ABANDONED, serializedRecord.getTimestampMillis(), serializedRecord.getDataFlowId(), (e.getMessage() == null || e.getMessage().isEmpty()) ? "Exception caught" : e.getMessage(), 1);
context.output(METRIC_DATA_OUTPUT_TAG, metricData);
}
LOG.warn("Abandon data", e);
}
}
Aggregations