use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class StreamingJobGraphGeneratorTest method testChainStartEndSetting.
/**
* Verifies that the chain start/end is correctly set.
*/
@Test
public void testChainStartEndSetting() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// fromElements -> CHAIN(Map -> Print)
env.fromElements(1, 2, 3).map(new MapFunction<Integer, Integer>() {
@Override
public Integer map(Integer value) throws Exception {
return value;
}
}).print();
JobGraph jobGraph = new StreamingJobGraphGenerator(env.getStreamGraph(), 1).createJobGraph();
List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
JobVertex sourceVertex = verticesSorted.get(0);
JobVertex mapPrintVertex = verticesSorted.get(1);
assertEquals(ResultPartitionType.PIPELINED_BOUNDED, sourceVertex.getProducedDataSets().get(0).getResultType());
assertEquals(ResultPartitionType.PIPELINED_BOUNDED, mapPrintVertex.getInputs().get(0).getSource().getResultType());
StreamConfig sourceConfig = new StreamConfig(sourceVertex.getConfiguration());
StreamConfig mapConfig = new StreamConfig(mapPrintVertex.getConfiguration());
Map<Integer, StreamConfig> chainedConfigs = mapConfig.getTransitiveChainedTaskConfigs(getClass().getClassLoader());
StreamConfig printConfig = chainedConfigs.values().iterator().next();
assertTrue(sourceConfig.isChainStart());
assertTrue(sourceConfig.isChainEnd());
assertTrue(mapConfig.isChainStart());
assertFalse(mapConfig.isChainEnd());
assertFalse(printConfig.isChainStart());
assertTrue(printConfig.isChainEnd());
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class ReplicatingDataSourceITCase method testReplicatedSourceToCross.
@Test
public void testReplicatedSourceToCross() throws Exception {
/*
* Test replicated source going into cross
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit>(new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0l, 1000l))), BasicTypeInfo.LONG_TYPE_INFO).map(new ToTuple());
DataSet<Tuple1<Long>> source2 = env.generateSequence(0l, 1000l).map(new ToTuple());
DataSet<Tuple1<Long>> pairs = source1.cross(source2).filter(new FilterFunction<Tuple2<Tuple1<Long>, Tuple1<Long>>>() {
@Override
public boolean filter(Tuple2<Tuple1<Long>, Tuple1<Long>> value) throws Exception {
return value.f0.f0.equals(value.f1.f0);
}
}).map(new MapFunction<Tuple2<Tuple1<Long>, Tuple1<Long>>, Tuple1<Long>>() {
@Override
public Tuple1<Long> map(Tuple2<Tuple1<Long>, Tuple1<Long>> value) throws Exception {
return value.f0;
}
}).sum(0);
List<Tuple1<Long>> result = pairs.collect();
String expectedResult = "(500500)";
compareResultAsText(result, expectedResult);
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class StreamOperatorChainingTest method testMultiChaining.
/**
* Verify that multi-chaining works.
*/
private void testMultiChaining(StreamExecutionEnvironment env) throws Exception {
// the actual elements will not be used
DataStream<Integer> input = env.fromElements(1, 2, 3);
sink1Results = new ArrayList<>();
sink2Results = new ArrayList<>();
input = input.map(new MapFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Integer map(Integer value) throws Exception {
return value;
}
});
input.map(new MapFunction<Integer, String>() {
private static final long serialVersionUID = 1L;
@Override
public String map(Integer value) throws Exception {
return "First: " + value;
}
}).addSink(new SinkFunction<String>() {
private static final long serialVersionUID = 1L;
@Override
public void invoke(String value) throws Exception {
sink1Results.add(value);
}
});
input.map(new MapFunction<Integer, String>() {
private static final long serialVersionUID = 1L;
@Override
public String map(Integer value) throws Exception {
return "Second: " + value;
}
}).addSink(new SinkFunction<String>() {
private static final long serialVersionUID = 1L;
@Override
public void invoke(String value) throws Exception {
sink2Results.add(value);
}
});
// be build our own StreamTask and OperatorChain
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 2);
JobVertex chainedVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
Configuration configuration = chainedVertex.getConfiguration();
StreamConfig streamConfig = new StreamConfig(configuration);
StreamMap<Integer, Integer> headOperator = streamConfig.getStreamOperator(Thread.currentThread().getContextClassLoader());
StreamTask<Integer, StreamMap<Integer, Integer>> mockTask = createMockTask(streamConfig, chainedVertex.getName());
OperatorChain<Integer, StreamMap<Integer, Integer>> operatorChain = new OperatorChain<>(mockTask);
headOperator.setup(mockTask, streamConfig, operatorChain.getChainEntryPoint());
for (StreamOperator<?> operator : operatorChain.getAllOperators()) {
if (operator != null) {
operator.open();
}
}
headOperator.processElement(new StreamRecord<>(1));
headOperator.processElement(new StreamRecord<>(2));
headOperator.processElement(new StreamRecord<>(3));
assertThat(sink1Results, contains("First: 1", "First: 2", "First: 3"));
assertThat(sink2Results, contains("Second: 1", "Second: 2", "Second: 3"));
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class SavepointITCase method testSavepointForJobWithIteration.
@Test
public void testSavepointForJobWithIteration() throws Exception {
for (int i = 0; i < ITER_TEST_PARALLELISM; ++i) {
ITER_TEST_SNAPSHOT_WAIT[i] = new OneShotLatch();
ITER_TEST_RESTORE_WAIT[i] = new OneShotLatch();
ITER_TEST_CHECKPOINT_VERIFY[i] = 0;
}
TemporaryFolder folder = new TemporaryFolder();
folder.create();
// Temporary directory for file state backend
final File tmpDir = folder.newFolder();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final IntegerStreamSource source = new IntegerStreamSource();
IterativeStream<Integer> iteration = env.addSource(source).flatMap(new RichFlatMapFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void flatMap(Integer in, Collector<Integer> clctr) throws Exception {
clctr.collect(in);
}
}).setParallelism(ITER_TEST_PARALLELISM).keyBy(new KeySelector<Integer, Object>() {
private static final long serialVersionUID = 1L;
@Override
public Object getKey(Integer value) throws Exception {
return value;
}
}).flatMap(new DuplicateFilter()).setParallelism(ITER_TEST_PARALLELISM).iterate();
DataStream<Integer> iterationBody = iteration.map(new MapFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Integer map(Integer value) throws Exception {
return value;
}
}).setParallelism(ITER_TEST_PARALLELISM);
iteration.closeWith(iterationBody);
StreamGraph streamGraph = env.getStreamGraph();
streamGraph.setJobName("Test");
JobGraph jobGraph = streamGraph.getJobGraph();
Configuration config = new Configuration();
config.addAll(jobGraph.getJobConfiguration());
config.setLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1L);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 2 * jobGraph.getMaximumParallelism());
final File checkpointDir = new File(tmpDir, "checkpoints");
final File savepointDir = new File(tmpDir, "savepoints");
if (!checkpointDir.mkdir() || !savepointDir.mkdirs()) {
fail("Test setup failed: failed to create temporary directories.");
}
config.setString(CoreOptions.STATE_BACKEND, "filesystem");
config.setString(FsStateBackendFactory.CHECKPOINT_DIRECTORY_URI_CONF_KEY, checkpointDir.toURI().toString());
config.setString(FsStateBackendFactory.MEMORY_THRESHOLD_CONF_KEY, "0");
config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
TestingCluster cluster = new TestingCluster(config, false);
String savepointPath = null;
try {
cluster.start();
cluster.submitJobDetached(jobGraph);
for (OneShotLatch latch : ITER_TEST_SNAPSHOT_WAIT) {
latch.await();
}
savepointPath = cluster.triggerSavepoint(jobGraph.getJobID());
source.cancel();
jobGraph = streamGraph.getJobGraph();
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
cluster.submitJobDetached(jobGraph);
for (OneShotLatch latch : ITER_TEST_RESTORE_WAIT) {
latch.await();
}
source.cancel();
} finally {
if (null != savepointPath) {
cluster.disposeSavepoint(savepointPath);
}
cluster.stop();
cluster.awaitTermination();
}
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class UserCodeType method main.
public static void main(String[] args) throws Exception {
String jarFile = args[0];
String host = args[1];
int port = Integer.parseInt(args[2]);
ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment(host, port, jarFile);
env.getConfig().disableSysoutLogging();
DataSet<Integer> input = env.fromElements(1, 2, 3, 4, 5);
DataSet<CustomType> customTypes = input.map(new MapFunction<Integer, CustomType>() {
private static final long serialVersionUID = -5878758010124912128L;
@Override
public CustomType map(Integer integer) throws Exception {
return new CustomType(integer);
}
}).rebalance();
DataSet<Integer> result = customTypes.map(new MapFunction<CustomType, Integer>() {
private static final long serialVersionUID = -7950126399899584991L;
@Override
public Integer map(CustomType value) throws Exception {
return value.value;
}
});
result.output(new DiscardingOutputFormat<Integer>());
env.execute();
}
Aggregations