use of org.apache.flink.util.OutputTag in project beam by apache.
the class DoFnOperatorTest method keyedParDoSideInputCheckpointing.
@Test
public void keyedParDoSideInputCheckpointing() throws Exception {
sideInputCheckpointing(() -> {
StringUtf8Coder keyCoder = StringUtf8Coder.of();
Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
KeySelector<WindowedValue<String>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder().put(1, view1).put(2, view2).build();
DoFnOperator<String, String> doFnOperator = new DoFnOperator<>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.of(FixedWindows.of(Duration.millis(100))), sideInputMapping, /* side-input mapping */
ImmutableList.of(view1, view2), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
return new KeyedTwoInputStreamOperatorTestHarness<>(doFnOperator, keySelector, // we use a dummy key for the second input since it is considered to be broadcast
null, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
});
}
use of org.apache.flink.util.OutputTag in project incubator-inlong by apache.
the class Entrance method main.
/**
* Entrance of a flink job.
*/
public static void main(String[] args) throws Exception {
final ParameterTool parameter = ParameterTool.fromArgs(args);
final Configuration config = parameter.getConfiguration();
final String clusterId = checkNotNull(config.getString(Constants.CLUSTER_ID));
final String sourceType = checkNotNull(config.getString(Constants.SOURCE_TYPE));
final String sinkType = checkNotNull(config.getString(Constants.SINK_TYPE));
final int sourceParallelism = config.getInteger(Constants.SOURCE_PARALLELISM);
final int deserializationParallelism = config.getInteger(Constants.DESERIALIZATION_PARALLELISM);
final int sinkParallelism = config.getInteger(Constants.SINK_PARALLELISM);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.enableCheckpointing(config.getInteger(Constants.CHECKPOINT_INTERVAL_MS));
env.getCheckpointConfig().setMinPauseBetweenCheckpoints(config.getInteger(Constants.MIN_PAUSE_BETWEEN_CHECKPOINTS_MS));
env.getCheckpointConfig().setCheckpointTimeout(config.getInteger(Constants.CHECKPOINT_TIMEOUT_MS));
env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
// Data stream
DataStream<SerializedRecord> sourceStream;
if (sourceType.equals(Constants.SOURCE_TYPE_TUBE)) {
sourceStream = env.addSource(new MultiTopicTubeSourceFunction(config)).setParallelism(sourceParallelism).uid(Constants.SOURCE_UID).name("TubeMQ source").rebalance();
} else if (sourceType.equals(Constants.SOURCE_TYPE_PULSAR)) {
sourceStream = env.addSource(new MultiTopicPulsarSourceFunction(config)).setParallelism(sourceParallelism).uid(Constants.SOURCE_UID).name("Pulsar source").rebalance();
} else {
throw new IllegalArgumentException("Unsupported source type " + sourceType);
}
final SingleOutputStreamOperator<SerializedRecord> deserializationStream = sourceStream.process(new DeserializationSchema(config)).setParallelism(deserializationParallelism).uid(Constants.DESERIALIZATION_SCHEMA_UID).name("Deserialization");
if (sinkType.equals(Constants.SINK_TYPE_CLICKHOUSE)) {
deserializationStream.process(new ClickHouseMultiSinkFunction(config)).setParallelism(sinkParallelism).uid(Constants.SINK_UID).name("Clickhouse Sink");
} else if (sinkType.equals(SINK_TYPE_DORIS)) {
deserializationStream.process(new DorisMultiSinkFunction(config)).uid(Constants.SINK_UID).name("Doris Sink").setParallelism(sinkParallelism);
} else if (sinkType.equals(SINK_TYPE_HIVE)) {
deserializationStream.process(new HiveMultiTenantWriter(config)).name("Hive Sink").uid(Constants.SINK_UID).setParallelism(sinkParallelism).process(new HiveMultiTenantCommitter(config)).name("hive Committer").setParallelism(config.getInteger(Constants.COMMITTER_PARALLELISM));
} else {
throw new IllegalArgumentException("Unsupported sink type " + sinkType);
}
// Metric stream
final boolean enableOutputMetrics = config.getBoolean(Constants.METRICS_ENABLE_OUTPUT);
if (enableOutputMetrics) {
final int metricsAggregatorParallelism = config.getInteger(Constants.METRICS_AGGREGATOR_PARALLELISM);
final int metricsTimestampWatermarkAssignerParallelism = config.getInteger(Constants.METRICS_TIMESTAMP_WATERMARK_ASSIGNER_PARALLELISM);
final int metricsMySQLSinkParallelism = config.getInteger(Constants.METRICS_SINK_PARALLELISM);
final OutputTag<MetricData> outputTag = new OutputTag<MetricData>(Constants.METRIC_DATA_OUTPUT_TAG_ID) {
};
final DataStream<MetricData> metricsDataStream = deserializationStream.getSideOutput(outputTag).assignTimestampsAndWatermarks(new MetricsAssignerWithPeriodicWatermarks()).setParallelism(metricsTimestampWatermarkAssignerParallelism).uid(Constants.METRICS_TIMESTAMP_AND_WATERMARK_ASSIGNER_UID).name("Metrics timestamp/watermark assigner");
final DataStream<MetricData> metricsAggregatorStream = metricsDataStream.keyBy((KeySelector<MetricData, String>) MetricData::getKey).window(TumblingEventTimeWindows.of(Time.minutes(config.getInteger(Constants.METRICS_AGGREGATOR_WINDOW_SIZE)))).allowedLateness(Time.milliseconds(Long.MAX_VALUE)).aggregate(new MetricsAggregateFunction(), new MetricsProcessWindowFunction()).setParallelism(metricsAggregatorParallelism).uid(Constants.METRICS_AGGREGATOR_UID).name("Metrics aggregator");
metricsAggregatorStream.addSink(new MetricsLogSink()).setParallelism(metricsMySQLSinkParallelism).uid(Constants.METRICS_SINK_UID).name("Metrics sink");
}
env.execute(clusterId);
}
use of org.apache.flink.util.OutputTag in project flink-mirror by flink-ci.
the class CEPITCase method testFlatSelectSerializationWithAnonymousClass.
@Test
public void testFlatSelectSerializationWithAnonymousClass() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(envConfiguration);
DataStreamSource<Integer> elements = env.fromElements(1, 2, 3);
OutputTag<Integer> outputTag = new OutputTag<Integer>("AAA") {
};
CEP.pattern(elements, Pattern.begin("A")).inProcessingTime().flatSelect(outputTag, new PatternFlatTimeoutFunction<Integer, Integer>() {
@Override
public void timeout(Map<String, List<Integer>> pattern, long timeoutTimestamp, Collector<Integer> out) throws Exception {
}
}, new PatternFlatSelectFunction<Integer, Object>() {
@Override
public void flatSelect(Map<String, List<Integer>> pattern, Collector<Object> out) throws Exception {
}
});
env.execute();
}
use of org.apache.flink.util.OutputTag in project flink-mirror by flink-ci.
the class CEPOperatorTest method testKeyedAdvancingTimeWithoutElements.
/**
* Tests that the internal time of a CEP operator advances only given watermarks. See FLINK-5033
*/
@Test
public void testKeyedAdvancingTimeWithoutElements() throws Exception {
final Event startEvent = new Event(42, "start", 1.0);
final long watermarkTimestamp1 = 5L;
final long watermarkTimestamp2 = 13L;
final Map<String, List<Event>> expectedSequence = new HashMap<>(2);
expectedSequence.put("start", Collections.<Event>singletonList(startEvent));
final OutputTag<Tuple2<Map<String, List<Event>>, Long>> timedOut = new OutputTag<Tuple2<Map<String, List<Event>>, Long>>("timedOut") {
};
final KeyedOneInputStreamOperatorTestHarness<Integer, Event, Map<String, List<Event>>> harness = new KeyedOneInputStreamOperatorTestHarness<>(new CepOperator<>(Event.createTypeSerializer(), false, new NFAFactory(true), null, null, new TimedOutProcessFunction(timedOut), null), new KeySelector<Event, Integer>() {
private static final long serialVersionUID = 7219185117566268366L;
@Override
public Integer getKey(Event value) throws Exception {
return value.getId();
}
}, BasicTypeInfo.INT_TYPE_INFO);
try {
String rocksDbPath = tempFolder.newFolder().getAbsolutePath();
RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(new MemoryStateBackend());
rocksDBStateBackend.setDbStoragePath(rocksDbPath);
harness.setStateBackend(rocksDBStateBackend);
harness.setup(new KryoSerializer<>((Class<Map<String, List<Event>>>) (Object) Map.class, new ExecutionConfig()));
harness.open();
harness.processElement(new StreamRecord<>(startEvent, 3L));
harness.processWatermark(new Watermark(watermarkTimestamp1));
harness.processWatermark(new Watermark(watermarkTimestamp2));
Queue<Object> result = harness.getOutput();
Queue<StreamRecord<Tuple2<Map<String, List<Event>>, Long>>> sideOutput = harness.getSideOutput(timedOut);
assertEquals(2L, result.size());
assertEquals(1L, sideOutput.size());
Object watermark1 = result.poll();
assertTrue(watermark1 instanceof Watermark);
assertEquals(watermarkTimestamp1, ((Watermark) watermark1).getTimestamp());
Tuple2<Map<String, List<Event>>, Long> leftResult = sideOutput.poll().getValue();
assertEquals(watermarkTimestamp2, (long) leftResult.f1);
assertEquals(expectedSequence, leftResult.f0);
Object watermark2 = result.poll();
assertTrue(watermark2 instanceof Watermark);
assertEquals(watermarkTimestamp2, ((Watermark) watermark2).getTimestamp());
} finally {
harness.close();
}
}
use of org.apache.flink.util.OutputTag in project flink-mirror by flink-ci.
the class SideOutputITCase method testCoProcessFunctionSideOutputWithMultipleConsumers.
/**
* Test CoProcessFunction side output with multiple consumers.
*/
@Test
public void testCoProcessFunctionSideOutputWithMultipleConsumers() throws Exception {
final OutputTag<String> sideOutputTag1 = new OutputTag<String>("side1") {
};
final OutputTag<String> sideOutputTag2 = new OutputTag<String>("side2") {
};
TestListResultSink<String> sideOutputResultSink1 = new TestListResultSink<>();
TestListResultSink<String> sideOutputResultSink2 = new TestListResultSink<>();
TestListResultSink<Integer> resultSink = new TestListResultSink<>();
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(3);
DataStream<Integer> ds1 = see.fromCollection(elements);
DataStream<Integer> ds2 = see.fromCollection(elements);
SingleOutputStreamOperator<Integer> passThroughtStream = ds1.connect(ds2).process(new CoProcessFunction<Integer, Integer, Integer>() {
@Override
public void processElement1(Integer value, Context ctx, Collector<Integer> out) throws Exception {
if (value < 4) {
out.collect(value);
ctx.output(sideOutputTag1, "sideout1-" + String.valueOf(value));
}
}
@Override
public void processElement2(Integer value, Context ctx, Collector<Integer> out) throws Exception {
if (value >= 4) {
out.collect(value);
ctx.output(sideOutputTag2, "sideout2-" + String.valueOf(value));
}
}
});
passThroughtStream.getSideOutput(sideOutputTag1).addSink(sideOutputResultSink1);
passThroughtStream.getSideOutput(sideOutputTag2).addSink(sideOutputResultSink2);
passThroughtStream.addSink(resultSink);
see.execute();
assertEquals(Arrays.asList("sideout1-1", "sideout1-2", "sideout1-3"), sideOutputResultSink1.getSortedResult());
assertEquals(Arrays.asList("sideout2-4", "sideout2-5"), sideOutputResultSink2.getSortedResult());
assertEquals(Arrays.asList(1, 2, 3, 4, 5), resultSink.getSortedResult());
}
Aggregations