use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class SelfConnectionITCase method differentDataStreamDifferentChain.
/**
* We connect two different data streams in different chains to a CoMap.
* (This is not actually self-connect.)
*/
@Test
public void differentDataStreamDifferentChain() {
TestListResultSink<String> resultSink = new TestListResultSink<String>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
DataStream<Integer> src = env.fromElements(1, 3, 5).disableChaining();
DataStream<String> stringMap = src.flatMap(new FlatMapFunction<Integer, String>() {
private static final long serialVersionUID = 1L;
@Override
public void flatMap(Integer value, Collector<String> out) throws Exception {
out.collect("x " + value);
}
}).keyBy(new KeySelector<String, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Integer getKey(String value) throws Exception {
return value.length();
}
});
DataStream<Long> longMap = src.map(new MapFunction<Integer, Long>() {
private static final long serialVersionUID = 1L;
@Override
public Long map(Integer value) throws Exception {
return (long) (value + 1);
}
}).keyBy(new KeySelector<Long, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Integer getKey(Long value) throws Exception {
return value.intValue();
}
});
stringMap.connect(longMap).map(new CoMapFunction<String, Long, String>() {
private static final long serialVersionUID = 1L;
@Override
public String map1(String value) {
return value;
}
@Override
public String map2(Long value) {
return value.toString();
}
}).addSink(resultSink);
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
List<String> expected = Arrays.asList("x 1", "x 3", "x 5", "2", "4", "6");
List<String> result = resultSink.getResult();
Collections.sort(expected);
Collections.sort(result);
assertEquals(expected, result);
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class DataStreamTest method operatorTest.
@Test
public void operatorTest() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<Long> src = env.generateSequence(0, 0);
MapFunction<Long, Integer> mapFunction = new MapFunction<Long, Integer>() {
@Override
public Integer map(Long value) throws Exception {
return null;
}
};
DataStream<Integer> map = src.map(mapFunction);
map.addSink(new DiscardingSink<Integer>());
assertEquals(mapFunction, getFunctionForDataStream(map));
FlatMapFunction<Long, Integer> flatMapFunction = new FlatMapFunction<Long, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void flatMap(Long value, Collector<Integer> out) throws Exception {
}
};
DataStream<Integer> flatMap = src.flatMap(flatMapFunction);
flatMap.addSink(new DiscardingSink<Integer>());
assertEquals(flatMapFunction, getFunctionForDataStream(flatMap));
FilterFunction<Integer> filterFunction = new FilterFunction<Integer>() {
@Override
public boolean filter(Integer value) throws Exception {
return false;
}
};
DataStream<Integer> unionFilter = map.union(flatMap).filter(filterFunction);
unionFilter.addSink(new DiscardingSink<Integer>());
assertEquals(filterFunction, getFunctionForDataStream(unionFilter));
try {
env.getStreamGraph().getStreamEdges(map.getId(), unionFilter.getId());
} catch (RuntimeException e) {
fail(e.getMessage());
}
try {
env.getStreamGraph().getStreamEdges(flatMap.getId(), unionFilter.getId());
} catch (RuntimeException e) {
fail(e.getMessage());
}
OutputSelector<Integer> outputSelector = new OutputSelector<Integer>() {
@Override
public Iterable<String> select(Integer value) {
return null;
}
};
SplitStream<Integer> split = unionFilter.split(outputSelector);
split.select("dummy").addSink(new DiscardingSink<Integer>());
List<OutputSelector<?>> outputSelectors = env.getStreamGraph().getStreamNode(unionFilter.getId()).getOutputSelectors();
assertEquals(1, outputSelectors.size());
assertEquals(outputSelector, outputSelectors.get(0));
DataStream<Integer> select = split.select("a");
DataStreamSink<Integer> sink = select.print();
StreamEdge splitEdge = env.getStreamGraph().getStreamEdges(unionFilter.getId(), sink.getTransformation().getId()).get(0);
assertEquals("a", splitEdge.getSelectedNames().get(0));
ConnectedStreams<Integer, Integer> connect = map.connect(flatMap);
CoMapFunction<Integer, Integer, String> coMapper = new CoMapFunction<Integer, Integer, String>() {
private static final long serialVersionUID = 1L;
@Override
public String map1(Integer value) {
return null;
}
@Override
public String map2(Integer value) {
return null;
}
};
DataStream<String> coMap = connect.map(coMapper);
coMap.addSink(new DiscardingSink<String>());
assertEquals(coMapper, getFunctionForDataStream(coMap));
try {
env.getStreamGraph().getStreamEdges(map.getId(), coMap.getId());
} catch (RuntimeException e) {
fail(e.getMessage());
}
try {
env.getStreamGraph().getStreamEdges(flatMap.getId(), coMap.getId());
} catch (RuntimeException e) {
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class StreamOperatorChainingTest method testMultiChainingWithSplit.
/**
* Verify that multi-chaining works with object reuse enabled.
*/
private void testMultiChainingWithSplit(StreamExecutionEnvironment env) throws Exception {
// the actual elements will not be used
DataStream<Integer> input = env.fromElements(1, 2, 3);
sink1Results = new ArrayList<>();
sink2Results = new ArrayList<>();
sink3Results = new ArrayList<>();
input = input.map(new MapFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Integer map(Integer value) throws Exception {
return value;
}
});
SplitStream<Integer> split = input.split(new OutputSelector<Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Iterable<String> select(Integer value) {
if (value.equals(1)) {
return Collections.singletonList("one");
} else {
return Collections.singletonList("other");
}
}
});
split.select("one").map(new MapFunction<Integer, String>() {
private static final long serialVersionUID = 1L;
@Override
public String map(Integer value) throws Exception {
return "First 1: " + value;
}
}).addSink(new SinkFunction<String>() {
private static final long serialVersionUID = 1L;
@Override
public void invoke(String value) throws Exception {
sink1Results.add(value);
}
});
split.select("one").map(new MapFunction<Integer, String>() {
private static final long serialVersionUID = 1L;
@Override
public String map(Integer value) throws Exception {
return "First 2: " + value;
}
}).addSink(new SinkFunction<String>() {
private static final long serialVersionUID = 1L;
@Override
public void invoke(String value) throws Exception {
sink2Results.add(value);
}
});
split.select("other").map(new MapFunction<Integer, String>() {
private static final long serialVersionUID = 1L;
@Override
public String map(Integer value) throws Exception {
return "Second: " + value;
}
}).addSink(new SinkFunction<String>() {
private static final long serialVersionUID = 1L;
@Override
public void invoke(String value) throws Exception {
sink3Results.add(value);
}
});
// be build our own StreamTask and OperatorChain
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 2);
JobVertex chainedVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
Configuration configuration = chainedVertex.getConfiguration();
StreamConfig streamConfig = new StreamConfig(configuration);
StreamMap<Integer, Integer> headOperator = streamConfig.getStreamOperator(Thread.currentThread().getContextClassLoader());
StreamTask<Integer, StreamMap<Integer, Integer>> mockTask = createMockTask(streamConfig, chainedVertex.getName());
OperatorChain<Integer, StreamMap<Integer, Integer>> operatorChain = new OperatorChain<>(mockTask);
headOperator.setup(mockTask, streamConfig, operatorChain.getChainEntryPoint());
for (StreamOperator<?> operator : operatorChain.getAllOperators()) {
if (operator != null) {
operator.open();
}
}
headOperator.processElement(new StreamRecord<>(1));
headOperator.processElement(new StreamRecord<>(2));
headOperator.processElement(new StreamRecord<>(3));
assertThat(sink1Results, contains("First 1: 1"));
assertThat(sink2Results, contains("First 2: 1"));
assertThat(sink3Results, contains("Second: 2", "Second: 3"));
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class GroupingSetsITCase method compareSql.
private void compareSql(String query1, String query2) throws Exception {
// Function to map row to string
MapFunction<Row, String> mapFunction = new MapFunction<Row, String>() {
@Override
public String map(Row value) throws Exception {
return value == null ? "null" : value.toString();
}
};
// Execute first query and store results
Table resultTable1 = tableEnv.sql(query1);
DataSet<Row> resultDataSet1 = tableEnv.toDataSet(resultTable1, Row.class);
List<String> results1 = resultDataSet1.map(mapFunction).collect();
// Execute second query and store results
Table resultTable2 = tableEnv.sql(query2);
DataSet<Row> resultDataSet2 = tableEnv.toDataSet(resultTable2, Row.class);
List<String> results2 = resultDataSet2.map(mapFunction).collect();
// Compare results
TestBaseUtils.compareResultCollections(results1, results2, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return o2 == null ? o1 == null ? 0 : 1 : o1.compareTo(o2);
}
});
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class SavepointITCase method testCanRestoreWithModifiedStatelessOperators.
/**
* FLINK-5985
*
* This test ensures we can restore from a savepoint under modifications to the job graph that only concern
* stateless operators.
*/
@Test
public void testCanRestoreWithModifiedStatelessOperators() throws Exception {
// Config
int numTaskManagers = 2;
int numSlotsPerTaskManager = 2;
int parallelism = 2;
// Test deadline
final Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
final File tmpDir = CommonTestUtils.createTempDirectory();
final File savepointDir = new File(tmpDir, "savepoints");
TestingCluster flink = null;
String savepointPath;
try {
// Flink configuration
final Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTaskManager);
config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
LOG.info("Flink configuration: " + config + ".");
// Start Flink
flink = new TestingCluster(config);
LOG.info("Starting Flink cluster.");
flink.start(true);
// Retrieve the job manager
LOG.info("Retrieving JobManager.");
ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
LOG.info("JobManager: " + jobManager + ".");
final StatefulCounter statefulCounter = new StatefulCounter();
StatefulCounter.resetForTest(parallelism);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
env.addSource(new InfiniteTestSource()).shuffle().map(new MapFunction<Integer, Integer>() {
@Override
public Integer map(Integer value) throws Exception {
return 4 * value;
}
}).shuffle().map(statefulCounter).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {
@Override
public Integer map(Integer value) throws Exception {
return 2 * value;
}
}).addSink(new DiscardingSink<Integer>());
JobGraph originalJobGraph = env.getStreamGraph().getJobGraph();
JobSubmissionResult submissionResult = flink.submitJobDetached(originalJobGraph);
JobID jobID = submissionResult.getJobID();
// wait for the Tasks to be ready
StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
Future<Object> savepointPathFuture = jobManager.ask(new TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
savepointPath = ((TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
Future<Object> savepointFuture = jobManager.ask(new RequestSavepoint(savepointPath), deadline.timeLeft());
((ResponseSavepoint) Await.result(savepointFuture, deadline.timeLeft())).savepoint();
LOG.info("Retrieved savepoint: " + savepointPath + ".");
// Shut down the Flink cluster (thereby canceling the job)
LOG.info("Shutting down Flink cluster.");
flink.shutdown();
flink.awaitTermination();
} finally {
flink.shutdown();
flink.awaitTermination();
}
try {
LOG.info("Restarting Flink cluster.");
flink.start(true);
// Retrieve the job manager
LOG.info("Retrieving JobManager.");
ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
LOG.info("JobManager: " + jobManager + ".");
// Reset static test helpers
StatefulCounter.resetForTest(parallelism);
// Gather all task deployment descriptors
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
// generate a modified job graph that adds a stateless op
env.addSource(new InfiniteTestSource()).shuffle().map(new StatefulCounter()).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {
@Override
public Integer map(Integer value) throws Exception {
return value;
}
}).addSink(new DiscardingSink<Integer>());
JobGraph modifiedJobGraph = env.getStreamGraph().getJobGraph();
// Set the savepoint path
modifiedJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
LOG.info("Resubmitting job " + modifiedJobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
// Submit the job
flink.submitJobDetached(modifiedJobGraph);
// Await state is restored
StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
// Await some progress after restore
StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
} finally {
flink.shutdown();
flink.awaitTermination();
}
}
Aggregations