use of org.apache.flink.util.OutputTag in project flink by splunk.
the class CEPOperatorTest method testCEPOperatorSideOutputLateElementsEventTime.
@Test
public void testCEPOperatorSideOutputLateElementsEventTime() throws Exception {
Event startEvent = new Event(41, "c", 1.0);
Event middle1Event1 = new Event(41, "a", 2.0);
Event middle1Event2 = new Event(41, "a", 3.0);
Event middle1Event3 = new Event(41, "a", 4.0);
OutputTag<Event> lateDataTag = new OutputTag<Event>("late-data", TypeInformation.of(Event.class));
CepOperator<Event, Integer, Map<String, List<Event>>> operator = CepOperatorTestUtilities.getKeyedCepOperator(false, new ComplexNFAFactory(), null, lateDataTag);
try (OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness = CepOperatorTestUtilities.getCepTestHarness(operator)) {
harness.open();
harness.processWatermark(new Watermark(Long.MIN_VALUE));
harness.processElement(new StreamRecord<>(startEvent, 6));
verifyWatermark(harness.getOutput().poll(), Long.MIN_VALUE);
harness.processWatermark(new Watermark(6L));
verifyWatermark(harness.getOutput().poll(), 6L);
harness.processElement(new StreamRecord<>(middle1Event1, 4));
harness.processElement(new StreamRecord<>(middle1Event2, 5));
harness.processElement(new StreamRecord<>(middle1Event3, 7));
List<Event> late = new ArrayList<>();
while (!harness.getSideOutput(lateDataTag).isEmpty()) {
StreamRecord<Event> eventStreamRecord = harness.getSideOutput(lateDataTag).poll();
late.add(eventStreamRecord.getValue());
}
List<Event> expected = Lists.newArrayList(middle1Event1, middle1Event2);
Assert.assertArrayEquals(expected.toArray(), late.toArray());
}
}
use of org.apache.flink.util.OutputTag in project flink by splunk.
the class OperatorChain method createChainedSources.
@SuppressWarnings("rawtypes")
private Map<StreamConfig.SourceInputConfig, ChainedSource> createChainedSources(StreamTask<OUT, OP> containingTask, StreamConfig.InputConfig[] configuredInputs, Map<Integer, StreamConfig> chainedConfigs, ClassLoader userCodeClassloader, List<StreamOperatorWrapper<?, ?>> allOpWrappers) {
if (Arrays.stream(configuredInputs).noneMatch(input -> input instanceof StreamConfig.SourceInputConfig)) {
return Collections.emptyMap();
}
checkState(mainOperatorWrapper.getStreamOperator() instanceof MultipleInputStreamOperator, "Creating chained input is only supported with MultipleInputStreamOperator and MultipleInputStreamTask");
Map<StreamConfig.SourceInputConfig, ChainedSource> chainedSourceInputs = new HashMap<>();
MultipleInputStreamOperator<?> multipleInputOperator = (MultipleInputStreamOperator<?>) mainOperatorWrapper.getStreamOperator();
List<Input> operatorInputs = multipleInputOperator.getInputs();
int sourceInputGateIndex = Arrays.stream(containingTask.getEnvironment().getAllInputGates()).mapToInt(IndexedInputGate::getInputGateIndex).max().orElse(-1) + 1;
for (int inputId = 0; inputId < configuredInputs.length; inputId++) {
if (!(configuredInputs[inputId] instanceof StreamConfig.SourceInputConfig)) {
continue;
}
StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInputs[inputId];
int sourceEdgeId = sourceInput.getInputEdge().getSourceId();
StreamConfig sourceInputConfig = chainedConfigs.get(sourceEdgeId);
OutputTag outputTag = sourceInput.getInputEdge().getOutputTag();
WatermarkGaugeExposingOutput chainedSourceOutput = createChainedSourceOutput(containingTask, sourceInputConfig, userCodeClassloader, getFinishedOnRestoreInputOrDefault(operatorInputs.get(inputId)), multipleInputOperator.getMetricGroup(), outputTag);
SourceOperator<?, ?> sourceOperator = (SourceOperator<?, ?>) createOperator(containingTask, sourceInputConfig, userCodeClassloader, (WatermarkGaugeExposingOutput<StreamRecord<OUT>>) chainedSourceOutput, allOpWrappers, true);
chainedSourceInputs.put(sourceInput, new ChainedSource(chainedSourceOutput, this.isTaskDeployedAsFinished() ? new StreamTaskFinishedOnRestoreSourceInput<>(sourceOperator, sourceInputGateIndex++, inputId) : new StreamTaskSourceInput<>(sourceOperator, sourceInputGateIndex++, inputId)));
}
return chainedSourceInputs;
}
use of org.apache.flink.util.OutputTag in project flink by splunk.
the class StreamOperatorChainingTest method testMultiChainingWithSplit.
/**
* Verify that multi-chaining works with object reuse enabled.
*/
private void testMultiChainingWithSplit(StreamExecutionEnvironment env) throws Exception {
// set parallelism to 2 to avoid chaining with source in case when available processors is
// 1.
env.setParallelism(2);
// the actual elements will not be used
DataStream<Integer> input = env.fromElements(1, 2, 3);
sink1Results = new ArrayList<>();
sink2Results = new ArrayList<>();
sink3Results = new ArrayList<>();
input = input.map(value -> value);
OutputTag<Integer> oneOutput = new OutputTag<Integer>("one") {
};
OutputTag<Integer> otherOutput = new OutputTag<Integer>("other") {
};
SingleOutputStreamOperator<Object> split = input.process(new ProcessFunction<Integer, Object>() {
private static final long serialVersionUID = 1L;
@Override
public void processElement(Integer value, Context ctx, Collector<Object> out) throws Exception {
if (value.equals(1)) {
ctx.output(oneOutput, value);
} else {
ctx.output(otherOutput, value);
}
}
});
split.getSideOutput(oneOutput).map(value -> "First 1: " + value).addSink(new SinkFunction<String>() {
@Override
public void invoke(String value, Context ctx) throws Exception {
sink1Results.add(value);
}
});
split.getSideOutput(oneOutput).map(value -> "First 2: " + value).addSink(new SinkFunction<String>() {
@Override
public void invoke(String value, Context ctx) throws Exception {
sink2Results.add(value);
}
});
split.getSideOutput(otherOutput).map(value -> "Second: " + value).addSink(new SinkFunction<String>() {
@Override
public void invoke(String value, Context ctx) throws Exception {
sink3Results.add(value);
}
});
// be build our own StreamTask and OperatorChain
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 2);
JobVertex chainedVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
Configuration configuration = chainedVertex.getConfiguration();
StreamConfig streamConfig = new StreamConfig(configuration);
StreamMap<Integer, Integer> headOperator = streamConfig.getStreamOperator(Thread.currentThread().getContextClassLoader());
try (MockEnvironment environment = createMockEnvironment(chainedVertex.getName())) {
StreamTask<Integer, StreamMap<Integer, Integer>> mockTask = createMockTask(streamConfig, environment);
OperatorChain<Integer, StreamMap<Integer, Integer>> operatorChain = createOperatorChain(streamConfig, environment, mockTask);
headOperator.setup(mockTask, streamConfig, operatorChain.getMainOperatorOutput());
operatorChain.initializeStateAndOpenOperators(null);
headOperator.processElement(new StreamRecord<>(1));
headOperator.processElement(new StreamRecord<>(2));
headOperator.processElement(new StreamRecord<>(3));
assertThat(sink1Results, contains("First 1: 1"));
assertThat(sink2Results, contains("First 2: 1"));
assertThat(sink3Results, contains("Second: 2", "Second: 3"));
}
}
use of org.apache.flink.util.OutputTag in project flink by splunk.
the class IterateITCase method testmultipleHeadsTailsWithTailPartitioning.
@Test
public void testmultipleHeadsTailsWithTailPartitioning() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source1 = env.fromElements(1, 2, 3, 4, 5).shuffle().map(noOpIntMap);
DataStream<Integer> source2 = env.fromElements(1, 2, 3, 4, 5).map(noOpIntMap);
IterativeStream<Integer> iter1 = source1.union(source2).iterate();
DataStream<Integer> head1 = iter1.map(noOpIntMap).name("map1");
DataStream<Integer> head2 = iter1.map(noOpIntMap).setParallelism(parallelism / 2).name("shuffle").rebalance();
DataStreamSink<Integer> head3 = iter1.map(noOpIntMap).setParallelism(parallelism / 2).addSink(new ReceiveCheckNoOpSink<Integer>());
DataStreamSink<Integer> head4 = iter1.map(noOpIntMap).addSink(new ReceiveCheckNoOpSink<Integer>());
OutputTag<Integer> even = new OutputTag<Integer>("even") {
};
OutputTag<Integer> odd = new OutputTag<Integer>("odd") {
};
SingleOutputStreamOperator<Object> source3 = env.fromElements(1, 2, 3, 4, 5).map(noOpIntMap).name("split").process(new ProcessFunction<Integer, Object>() {
@Override
public void processElement(Integer value, Context ctx, Collector<Object> out) throws Exception {
if (value % 2 == 0) {
ctx.output(even, value);
} else {
ctx.output(odd, value);
}
}
});
iter1.closeWith(source3.getSideOutput(even).union(head1.map(noOpIntMap).name("bc").broadcast(), head2.map(noOpIntMap).shuffle()));
StreamGraph graph = env.getStreamGraph();
JobGraph jg = graph.getJobGraph();
assertEquals(1, graph.getIterationSourceSinkPairs().size());
Tuple2<StreamNode, StreamNode> sourceSinkPair = graph.getIterationSourceSinkPairs().iterator().next();
StreamNode itSource = sourceSinkPair.f0;
StreamNode itSink = sourceSinkPair.f1;
assertEquals(4, itSource.getOutEdges().size());
assertEquals(3, itSink.getInEdges().size());
assertEquals(itSource.getParallelism(), itSink.getParallelism());
for (StreamEdge edge : itSource.getOutEdges()) {
if (graph.getTargetVertex(edge).getOperatorName().equals("map1")) {
assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
assertEquals(4, graph.getTargetVertex(edge).getParallelism());
} else if (graph.getTargetVertex(edge).getOperatorName().equals("shuffle")) {
assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
assertEquals(2, graph.getTargetVertex(edge).getParallelism());
}
}
for (StreamEdge edge : itSink.getInEdges()) {
String tailName = graph.getSourceVertex(edge).getOperatorName();
if (tailName.equals("split")) {
assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
} else if (tailName.equals("bc")) {
assertTrue(edge.getPartitioner() instanceof BroadcastPartitioner);
} else if (tailName.equals("shuffle")) {
assertTrue(edge.getPartitioner() instanceof ShufflePartitioner);
}
}
// Test co-location
JobVertex itSource1 = null;
JobVertex itSink1 = null;
for (JobVertex vertex : jg.getVertices()) {
if (vertex.getName().contains("IterationSource")) {
itSource1 = vertex;
} else if (vertex.getName().contains("IterationSink")) {
itSink1 = vertex;
}
}
assertTrue(itSource1.getCoLocationGroup() != null);
assertTrue(itSink1.getCoLocationGroup() != null);
assertEquals(itSource1.getCoLocationGroup(), itSink1.getCoLocationGroup());
}
use of org.apache.flink.util.OutputTag in project flink by splunk.
the class SideOutputITCase method testSideOutputNameClash.
@Test
public void testSideOutputNameClash() throws Exception {
final OutputTag<String> sideOutputTag1 = new OutputTag<String>("side") {
};
final OutputTag<Integer> sideOutputTag2 = new OutputTag<Integer>("side") {
};
TestListResultSink<String> sideOutputResultSink1 = new TestListResultSink<>();
TestListResultSink<Integer> sideOutputResultSink2 = new TestListResultSink<>();
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(3);
DataStream<Integer> dataStream = see.fromCollection(elements);
SingleOutputStreamOperator<Integer> passThroughtStream = dataStream.process(new ProcessFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void processElement(Integer value, Context ctx, Collector<Integer> out) throws Exception {
out.collect(value);
ctx.output(sideOutputTag1, "sideout-" + String.valueOf(value));
ctx.output(sideOutputTag2, 13);
}
});
passThroughtStream.getSideOutput(sideOutputTag1).addSink(sideOutputResultSink1);
expectedException.expect(UnsupportedOperationException.class);
passThroughtStream.getSideOutput(sideOutputTag2).addSink(sideOutputResultSink2);
}
Aggregations