use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class CompressionFactoryITCase method testWriteCompressedFile.
@Test
public void testWriteCompressedFile() throws Exception {
final File folder = TEMPORARY_FOLDER.newFolder();
final Path testPath = Path.fromLocalFile(folder);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.enableCheckpointing(100);
DataStream<String> stream = env.addSource(new FiniteTestSource<>(testData), TypeInformation.of(String.class));
stream.map(str -> str).addSink(StreamingFileSink.forBulkFormat(testPath, CompressWriters.forExtractor(new DefaultExtractor<String>()).withHadoopCompression(TEST_CODEC_NAME)).withBucketAssigner(new UniqueBucketAssigner<>("test")).build());
env.execute();
validateResults(folder, testData, new CompressionCodecFactory(configuration).getCodecByName(TEST_CODEC_NAME));
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class StreamingJobGraphGeneratorTest method createStreamGraphForSlotSharingTest.
/**
* Create a StreamGraph as below.
*
* <p>source1 --(rebalance & pipelined)--> Map1
*
* <p>source2 --(rebalance & blocking)--> Map2
*/
private StreamGraph createStreamGraphForSlotSharingTest(Configuration config) {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(config);
env.setBufferTimeout(-1);
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
final DataStream<Integer> source1 = env.fromElements(1, 2, 3).name("source1");
source1.rebalance().map(v -> v).name("map1");
final DataStream<Integer> source2 = env.fromElements(4, 5, 6).name("source2");
final DataStream<Integer> partitioned = new DataStream<>(env, new PartitionTransformation<>(source2.getTransformation(), new RebalancePartitioner<>(), StreamExchangeMode.BATCH));
partitioned.map(v -> v).name("map2");
return env.getStreamGraph();
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class StreamingJobGraphGeneratorTest method createJobGraphWithDescription.
private JobGraph createJobGraphWithDescription(StreamExecutionEnvironment env, String... inputNames) {
env.setParallelism(1);
DataStream<Long> source;
if (inputNames.length == 1) {
source = env.fromElements(1L, 2L, 3L).setDescription(inputNames[0]);
} else {
MultipleInputTransformation<Long> transform = new MultipleInputTransformation<>("mit", new UnusedOperatorFactory(), Types.LONG, env.getParallelism());
transform.setDescription("operator chained with source");
transform.setChainingStrategy(ChainingStrategy.HEAD_WITH_SOURCES);
Arrays.stream(inputNames).map(name -> env.fromSource(new NumberSequenceSource(1, 2), WatermarkStrategy.noWatermarks(), name).setDescription(name).getTransformation()).forEach(transform::addInput);
source = new DataStream<>(env, transform);
}
DataStream<Long> map1 = source.map(x -> x + 1).setDescription("x + 1");
DataStream<Long> map2 = source.map(x -> x + 2).setDescription("x + 2");
map1.print().setDescription("first print of map1");
map1.print().setDescription("second print of map1");
map2.print().setDescription("first print of map2");
map2.print().setDescription("second print of map2");
return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class StreamingJobGraphGeneratorTest method testCompatibleExchangeModeWithBufferTimeout.
private void testCompatibleExchangeModeWithBufferTimeout(StreamExchangeMode exchangeMode) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setBufferTimeout(100);
DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);
PartitionTransformation<Integer> transformation = new PartitionTransformation<>(sourceDataStream.getTransformation(), new RebalancePartitioner<>(), exchangeMode);
DataStream<Integer> partitionStream = new DataStream<>(env, transformation);
partitionStream.map(value -> value).print();
StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class StreamingJobGraphGeneratorTest method testExchangeModeBatch.
/**
* Test setting exchange mode to {@link StreamExchangeMode#BATCH}.
*/
@Test
public void testExchangeModeBatch() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
env.setBufferTimeout(-1);
// fromElements -> Map -> Print
DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);
DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(sourceDataStream.getTransformation(), new ForwardPartitioner<>(), StreamExchangeMode.BATCH));
DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);
DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(mapDataStream.getTransformation(), new RescalePartitioner<>(), StreamExchangeMode.BATCH));
partitionAfterMapDataStream.print().setParallelism(2);
JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
assertEquals(3, verticesSorted.size());
// it can not be chained with BATCH exchange mode
JobVertex sourceVertex = verticesSorted.get(0);
JobVertex mapVertex = verticesSorted.get(1);
// BATCH exchange mode is translated into BLOCKING result partition
assertEquals(ResultPartitionType.BLOCKING, sourceVertex.getProducedDataSets().get(0).getResultType());
assertEquals(ResultPartitionType.BLOCKING, mapVertex.getProducedDataSets().get(0).getResultType());
}
Aggregations