use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project beam by apache.
the class FlinkStreamingPortablePipelineTranslator method translateExecutableStage.
private <InputT, OutputT> void translateExecutableStage(String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
// TODO: Fail on splittable DoFns.
// TODO: Special-case single outputs to avoid multiplexing PCollections.
RunnerApi.Components components = pipeline.getComponents();
RunnerApi.PTransform transform = components.getTransformsOrThrow(id);
Map<String, String> outputs = transform.getOutputsMap();
final RunnerApi.ExecutableStagePayload stagePayload;
try {
stagePayload = RunnerApi.ExecutableStagePayload.parseFrom(transform.getSpec().getPayload());
} catch (IOException e) {
throw new RuntimeException(e);
}
String inputPCollectionId = stagePayload.getInput();
final TransformedSideInputs transformedSideInputs;
if (stagePayload.getSideInputsCount() > 0) {
transformedSideInputs = transformSideInputs(stagePayload, components, context);
} else {
transformedSideInputs = new TransformedSideInputs(Collections.emptyMap(), null);
}
Map<TupleTag<?>, OutputTag<WindowedValue<?>>> tagsToOutputTags = Maps.newLinkedHashMap();
Map<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = Maps.newLinkedHashMap();
// TODO: does it matter which output we designate as "main"
final TupleTag<OutputT> mainOutputTag = outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next());
// associate output tags with ids, output manager uses these Integer ids to serialize state
BiMap<String, Integer> outputIndexMap = createOutputMap(outputs.keySet());
Map<String, Coder<WindowedValue<?>>> outputCoders = Maps.newHashMap();
Map<TupleTag<?>, Integer> tagsToIds = Maps.newHashMap();
Map<String, TupleTag<?>> collectionIdToTupleTag = Maps.newHashMap();
// order output names for deterministic mapping
for (String localOutputName : new TreeMap<>(outputIndexMap).keySet()) {
String collectionId = outputs.get(localOutputName);
Coder<WindowedValue<?>> windowCoder = (Coder) instantiateCoder(collectionId, components);
outputCoders.put(localOutputName, windowCoder);
TupleTag<?> tupleTag = new TupleTag<>(localOutputName);
CoderTypeInformation<WindowedValue<?>> typeInformation = new CoderTypeInformation(windowCoder, context.getPipelineOptions());
tagsToOutputTags.put(tupleTag, new OutputTag<>(localOutputName, typeInformation));
tagsToCoders.put(tupleTag, windowCoder);
tagsToIds.put(tupleTag, outputIndexMap.get(localOutputName));
collectionIdToTupleTag.put(collectionId, tupleTag);
}
final SingleOutputStreamOperator<WindowedValue<OutputT>> outputStream;
DataStream<WindowedValue<InputT>> inputDataStream = context.getDataStreamOrThrow(inputPCollectionId);
CoderTypeInformation<WindowedValue<OutputT>> outputTypeInformation = !outputs.isEmpty() ? new CoderTypeInformation(outputCoders.get(mainOutputTag.getId()), context.getPipelineOptions()) : null;
ArrayList<TupleTag<?>> additionalOutputTags = Lists.newArrayList();
for (TupleTag<?> tupleTag : tagsToCoders.keySet()) {
if (!mainOutputTag.getId().equals(tupleTag.getId())) {
additionalOutputTags.add(tupleTag);
}
}
final Coder<WindowedValue<InputT>> windowedInputCoder = instantiateCoder(inputPCollectionId, components);
final boolean stateful = stagePayload.getUserStatesCount() > 0 || stagePayload.getTimersCount() > 0;
final boolean hasSdfProcessFn = stagePayload.getComponents().getTransformsMap().values().stream().anyMatch(pTransform -> pTransform.getSpec().getUrn().equals(PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN));
Coder keyCoder = null;
KeySelector<WindowedValue<InputT>, ?> keySelector = null;
if (stateful || hasSdfProcessFn) {
// Stateful/SDF stages are only allowed of KV input.
Coder valueCoder = ((WindowedValue.FullWindowedValueCoder) windowedInputCoder).getValueCoder();
if (!(valueCoder instanceof KvCoder)) {
throw new IllegalStateException(String.format(Locale.ENGLISH, "The element coder for stateful DoFn '%s' must be KvCoder but is: %s", inputPCollectionId, valueCoder.getClass().getSimpleName()));
}
if (stateful) {
keyCoder = ((KvCoder) valueCoder).getKeyCoder();
keySelector = new KvToByteBufferKeySelector(keyCoder, new SerializablePipelineOptions(context.getPipelineOptions()));
} else {
// as the key.
if (!(((KvCoder) valueCoder).getKeyCoder() instanceof KvCoder)) {
throw new IllegalStateException(String.format(Locale.ENGLISH, "The element coder for splittable DoFn '%s' must be KVCoder(KvCoder, DoubleCoder) but is: %s", inputPCollectionId, valueCoder.getClass().getSimpleName()));
}
keyCoder = ((KvCoder) ((KvCoder) valueCoder).getKeyCoder()).getKeyCoder();
keySelector = new SdfByteBufferKeySelector(keyCoder, new SerializablePipelineOptions(context.getPipelineOptions()));
}
inputDataStream = inputDataStream.keyBy(keySelector);
}
DoFnOperator.MultiOutputOutputManagerFactory<OutputT> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(mainOutputTag, tagsToOutputTags, tagsToCoders, tagsToIds, new SerializablePipelineOptions(context.getPipelineOptions()));
DoFnOperator<InputT, OutputT> doFnOperator = new ExecutableStageDoFnOperator<>(transform.getUniqueName(), windowedInputCoder, Collections.emptyMap(), mainOutputTag, additionalOutputTags, outputManagerFactory, transformedSideInputs.unionTagToView, new ArrayList<>(transformedSideInputs.unionTagToView.values()), getSideInputIdToPCollectionViewMap(stagePayload, components), context.getPipelineOptions(), stagePayload, context.getJobInfo(), FlinkExecutableStageContextFactory.getInstance(), collectionIdToTupleTag, getWindowingStrategy(inputPCollectionId, components), keyCoder, keySelector);
final String operatorName = generateNameFromStagePayload(stagePayload);
if (transformedSideInputs.unionTagToView.isEmpty()) {
outputStream = inputDataStream.transform(operatorName, outputTypeInformation, doFnOperator);
} else {
DataStream<RawUnionValue> sideInputStream = transformedSideInputs.unionedSideInputs.broadcast();
if (stateful || hasSdfProcessFn) {
// We have to manually construct the two-input transform because we're not
// allowed to have only one input keyed, normally. Since Flink 1.5.0 it's
// possible to use the Broadcast State Pattern which provides a more elegant
// way to process keyed main input with broadcast state, but it's not feasible
// here because it breaks the DoFnOperator abstraction.
TwoInputTransformation<WindowedValue<KV<?, InputT>>, RawUnionValue, WindowedValue<OutputT>> rawFlinkTransform = new TwoInputTransformation(inputDataStream.getTransformation(), sideInputStream.getTransformation(), transform.getUniqueName(), doFnOperator, outputTypeInformation, inputDataStream.getParallelism());
rawFlinkTransform.setStateKeyType(((KeyedStream) inputDataStream).getKeyType());
rawFlinkTransform.setStateKeySelectors(((KeyedStream) inputDataStream).getKeySelector(), null);
outputStream = new SingleOutputStreamOperator(inputDataStream.getExecutionEnvironment(), // we have to cheat around the ctor being protected
rawFlinkTransform) {
};
} else {
outputStream = inputDataStream.connect(sideInputStream).transform(operatorName, outputTypeInformation, doFnOperator);
}
}
// Assign a unique but consistent id to re-map operator state
outputStream.uid(transform.getUniqueName());
if (mainOutputTag != null) {
context.addDataStream(outputs.get(mainOutputTag.getId()), outputStream);
}
for (TupleTag<?> tupleTag : additionalOutputTags) {
context.addDataStream(outputs.get(tupleTag.getId()), outputStream.getSideOutput(tagsToOutputTags.get(tupleTag)));
}
}
use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.
the class TypeFillTest method test.
@Test
public void test() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
try {
env.addSource(new TestSource<Integer>()).print();
fail();
} catch (Exception ignored) {
}
DataStream<Long> source = env.generateSequence(1, 10);
try {
source.map(new TestMap<Long, Long>()).print();
fail();
} catch (Exception ignored) {
}
try {
source.flatMap(new TestFlatMap<Long, Long>()).print();
fail();
} catch (Exception ignored) {
}
try {
source.connect(source).map(new TestCoMap<Long, Long, Integer>()).print();
fail();
} catch (Exception ignored) {
}
try {
source.connect(source).flatMap(new TestCoFlatMap<Long, Long, Integer>()).print();
fail();
} catch (Exception ignored) {
}
try {
source.keyBy(new TestKeySelector<Long, String>()).print();
fail();
} catch (Exception ignored) {
}
try {
source.connect(source).keyBy(new TestKeySelector<Long, String>(), new TestKeySelector<>());
fail();
} catch (Exception ignored) {
}
try {
source.coGroup(source).where(new TestKeySelector<>()).equalTo(new TestKeySelector<>());
fail();
} catch (Exception ignored) {
}
try {
source.join(source).where(new TestKeySelector<>()).equalTo(new TestKeySelector<>());
fail();
} catch (Exception ignored) {
}
try {
source.keyBy((in) -> in).intervalJoin(source.keyBy((in) -> in)).between(Time.milliseconds(10L), Time.milliseconds(10L)).process(new TestProcessJoinFunction<>()).print();
fail();
} catch (Exception ignored) {
}
env.addSource(new TestSource<Integer>()).returns(Integer.class);
source.map(new TestMap<Long, Long>()).returns(Long.class).print();
source.flatMap(new TestFlatMap<Long, Long>()).returns(new TypeHint<Long>() {
}).print();
source.connect(source).map(new TestCoMap<Long, Long, Integer>()).returns(BasicTypeInfo.INT_TYPE_INFO).print();
source.connect(source).flatMap(new TestCoFlatMap<Long, Long, Integer>()).returns(BasicTypeInfo.INT_TYPE_INFO).print();
source.connect(source).keyBy(new TestKeySelector<>(), new TestKeySelector<>(), Types.STRING);
source.coGroup(source).where(new TestKeySelector<>(), Types.STRING).equalTo(new TestKeySelector<>(), Types.STRING);
source.join(source).where(new TestKeySelector<>(), Types.STRING).equalTo(new TestKeySelector<>(), Types.STRING);
source.keyBy((in) -> in).intervalJoin(source.keyBy((in) -> in)).between(Time.milliseconds(10L), Time.milliseconds(10L)).process(new TestProcessJoinFunction<Long, Long, String>()).returns(Types.STRING);
source.keyBy((in) -> in).intervalJoin(source.keyBy((in) -> in)).between(Time.milliseconds(10L), Time.milliseconds(10L)).process(new TestProcessJoinFunction<>(), Types.STRING);
assertEquals(BasicTypeInfo.LONG_TYPE_INFO, source.map(new TestMap<Long, Long>()).returns(Long.class).getType());
SingleOutputStreamOperator<String> map = source.map(new MapFunction<Long, String>() {
@Override
public String map(Long value) throws Exception {
return null;
}
});
map.print();
try {
map.returns(String.class);
fail();
} catch (Exception ignored) {
}
}
use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.
the class StateDescriptorPassingTest method testReduceWindowAllState.
@Test
public void testReduceWindowAllState() {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class);
// simulate ingestion time
DataStream<File> src = env.fromElements(new File("/")).assignTimestampsAndWatermarks(WatermarkStrategy.<File>forMonotonousTimestamps().withTimestampAssigner((file, ts) -> System.currentTimeMillis()));
SingleOutputStreamOperator<?> result = src.windowAll(TumblingEventTimeWindows.of(Time.milliseconds(1000))).reduce(new ReduceFunction<File>() {
@Override
public File reduce(File value1, File value2) {
return null;
}
});
validateStateDescriptorConfigured(result);
}
use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.
the class StateDescriptorPassingTest method testProcessAllWindowState.
@Test
public void testProcessAllWindowState() {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class);
// simulate ingestion time
DataStream<File> src = env.fromElements(new File("/")).assignTimestampsAndWatermarks(WatermarkStrategy.<File>forMonotonousTimestamps().withTimestampAssigner((file, ts) -> System.currentTimeMillis()));
SingleOutputStreamOperator<?> result = src.windowAll(TumblingEventTimeWindows.of(Time.milliseconds(1000))).process(new ProcessAllWindowFunction<File, String, TimeWindow>() {
@Override
public void process(Context ctx, Iterable<File> input, Collector<String> out) {
}
});
validateListStateDescriptorConfigured(result);
}
use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.
the class SortingBoundedInputITCase method testTwoInputOperator.
@Test
public void testTwoInputOperator() {
long numberOfRecords = 500_000;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
env.configure(config, this.getClass().getClassLoader());
DataStreamSource<Tuple2<Integer, byte[]>> elements1 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO));
DataStreamSource<Tuple2<Integer, byte[]>> elements2 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO));
SingleOutputStreamOperator<Long> counts = elements1.connect(elements2).keyBy(element -> element.f0, element -> element.f0).transform("Asserting operator", BasicTypeInfo.LONG_TYPE_INFO, new AssertingTwoInputOperator());
long sum = CollectionUtil.iteratorToList(DataStreamUtils.collect(counts)).stream().mapToLong(l -> l).sum();
assertThat(sum, equalTo(numberOfRecords * 2));
}
Aggregations