use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class CommonExecLegacySink method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<T> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (tableSink instanceof StreamTableSink) {
final Transformation<T> transform;
if (tableSink instanceof RetractStreamTableSink) {
transform = translateToTransformation(planner, config, true);
} else if (tableSink instanceof UpsertStreamTableSink) {
UpsertStreamTableSink<T> upsertSink = (UpsertStreamTableSink<T>) tableSink;
final boolean isAppendOnlyTable = !needRetraction;
upsertSink.setIsAppendOnly(isAppendOnlyTable);
if (upsertKeys != null) {
upsertSink.setKeyFields(upsertKeys);
} else {
if (isAppendOnlyTable) {
upsertSink.setKeyFields(null);
} else {
throw new TableException("UpsertStreamTableSink requires that Table has a full primary keys if it is updated.");
}
}
transform = translateToTransformation(planner, config, true);
} else if (tableSink instanceof AppendStreamTableSink) {
// verify table is an insert-only (append-only) table
if (needRetraction) {
throw new TableException("AppendStreamTableSink requires that Table has only insert changes.");
}
transform = translateToTransformation(planner, config, false);
} else {
if (isStreaming) {
throw new TableException("Stream Tables can only be emitted by AppendStreamTableSink, " + "RetractStreamTableSink, or UpsertStreamTableSink.");
} else {
transform = translateToTransformation(planner, config, false);
}
}
final DataStream<T> dataStream = new DataStream<T>(planner.getExecEnv(), transform);
final DataStreamSink<T> dsSink = (DataStreamSink<T>) ((StreamTableSink<T>) tableSink).consumeDataStream(dataStream);
if (dsSink == null) {
throw new TableException(String.format("The StreamTableSink#consumeDataStream(DataStream) must be implemented " + "and return the sink transformation DataStreamSink. " + "However, %s doesn't implement this method.", tableSink.getClass().getCanonicalName()));
}
return dsSink.getLegacyTransformation();
} else if (tableSink instanceof DataStreamTableSink) {
// is no real table sink, so we just need translate its input to Transformation.
return translateToTransformation(planner, config, ((DataStreamTableSink<T>) tableSink).withChangeFlag());
} else {
throw new TableException(String.format("Only Support StreamTableSink! However %s is not a StreamTableSink.", tableSink.getClass().getCanonicalName()));
}
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class DataStreamBatchExecutionITCase method batchBroadcastExecution.
/**
* Verifies that all broadcast input is processed before regular input.
*/
@Test
public void batchBroadcastExecution() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
DataStream<Tuple2<String, Integer>> bcInput = env.fromElements(Tuple2.of("bc1", 1), Tuple2.of("bc2", 2), Tuple2.of("bc3", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular1", 1), Tuple2.of("regular1", 2), Tuple2.of("regular1", 3), Tuple2.of("regular1", 4), Tuple2.of("regular1", 3), Tuple2.of("regular1", 5), Tuple2.of("regular1", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
BroadcastStream<Tuple2<String, Integer>> broadcastStream = bcInput.broadcast(STATE_DESCRIPTOR);
DataStream<String> result = regularInput.connect(broadcastStream).process(new TestBroadcastFunction());
try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
List<String> results = CollectionUtil.iteratorToList(resultIterator);
// regular, that is non-keyed input is not sorted by timestamp. For keyed inputs
// this is a by-product of the grouping/sorting we use to get the keyed groups.
assertThat(results, equalTo(Arrays.asList("(regular1,1): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,4): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,5): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]")));
}
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class DataStreamBatchExecutionITCase method batchNonKeyedKeyedTwoInputOperator.
/**
* Verifies that all regular input is processed before keyed input.
*
* <p>Here, the first input is not keyed while the second input is keyed.
*/
@Test
public void batchNonKeyedKeyedTwoInputOperator() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
DataStream<Tuple2<String, Integer>> keyedInput = env.fromElements(Tuple2.of("regular2", 4), Tuple2.of("regular1", 3), Tuple2.of("regular1", 2), Tuple2.of("regular2", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular4", 4), Tuple2.of("regular3", 3), Tuple2.of("regular3", 2), Tuple2.of("regular4", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
DataStream<String> result = regularInput.connect(keyedInput.keyBy(in -> in.f0)).transform("operator", BasicTypeInfo.STRING_TYPE_INFO, new TwoInputIdentityOperator());
try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
List<String> results = CollectionUtil.iteratorToList(resultIterator);
assertThat(results, equalTo(Arrays.asList("(regular4,4)", "(regular3,3)", "(regular3,2)", "(regular4,1)", "(regular1,2)", "(regular1,3)", "(regular2,1)", "(regular2,4)")));
}
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class DataStreamBatchExecutionITCase method batchKeyedBroadcastExecution.
/**
* Verifies that all broadcast input is processed before keyed input.
*/
@Test
public void batchKeyedBroadcastExecution() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
DataStream<Tuple2<String, Integer>> bcInput = env.fromElements(Tuple2.of("bc1", 1), Tuple2.of("bc2", 2), Tuple2.of("bc3", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular1", 1), Tuple2.of("regular1", 2), Tuple2.of("regular2", 2), Tuple2.of("regular1", 3), Tuple2.of("regular1", 4), Tuple2.of("regular1", 3), Tuple2.of("regular2", 5), Tuple2.of("regular1", 5), Tuple2.of("regular2", 3), Tuple2.of("regular1", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
BroadcastStream<Tuple2<String, Integer>> broadcastStream = bcInput.broadcast(STATE_DESCRIPTOR);
DataStream<String> result = regularInput.keyBy((input) -> input.f0).connect(broadcastStream).process(new TestKeyedBroadcastFunction());
try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
List<String> results = CollectionUtil.iteratorToList(resultIterator);
assertThat(results, equalTo(Arrays.asList("(regular1,1): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,4): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,5): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,5): [bc2=bc2, bc1=bc1, bc3=bc3]")));
}
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class ReinterpretDataStreamAsKeyedStreamITCase method testReinterpretAsKeyedStream.
/**
* This test checks that reinterpreting a data stream to a keyed stream works as expected. This
* test consists of two jobs. The first job materializes a keyBy into files, one files per
* partition. The second job opens the files created by the first jobs as sources (doing the
* correct assignment of files to partitions) and reinterprets the sources as keyed, because we
* know they have been partitioned in a keyBy from the first job.
*/
@Test
public void testReinterpretAsKeyedStream() throws Exception {
final int maxParallelism = 8;
final int numEventsPerInstance = 100;
final int parallelism = 3;
final int numTotalEvents = numEventsPerInstance * parallelism;
final int numUniqueKeys = 100;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setMaxParallelism(maxParallelism);
env.setParallelism(parallelism);
env.enableCheckpointing(100);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L));
final List<File> partitionFiles = new ArrayList<>(parallelism);
for (int i = 0; i < parallelism; ++i) {
File partitionFile = temporaryFolder.newFile();
partitionFiles.add(i, partitionFile);
}
env.addSource(new RandomTupleSource(numEventsPerInstance, numUniqueKeys)).keyBy(0).addSink(new ToPartitionFileSink(partitionFiles));
env.execute();
DataStream<Tuple2<Integer, Integer>> source = env.addSource(new FromPartitionFileSource(partitionFiles)).assignTimestampsAndWatermarks(IngestionTimeWatermarkStrategy.create());
DataStreamUtils.reinterpretAsKeyedStream(source, (KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0, TypeInformation.of(Integer.class)).window(TumblingEventTimeWindows.of(Time.seconds(// test that also timers and aggregated state work as
1))).reduce((ReduceFunction<Tuple2<Integer, Integer>>) (value1, value2) -> new Tuple2<>(value1.f0, value1.f1 + value2.f1)).addSink(new ValidatingSink(numTotalEvents)).setParallelism(1);
env.execute();
}
Aggregations