use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class ConnectedComponentsTest method getConnectedComponentsPlan.
private static Plan getConnectedComponentsPlan(int parallelism, int iterations, boolean solutionSetFirst) {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
DataSet<Tuple2<Long, Long>> verticesWithId = env.generateSequence(0, 1000).name("Vertices").map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return new Tuple2<Long, Long>(value, value);
}
}).name("Assign Vertex Ids");
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, iterations, 0).name("Connected Components Iteration");
@SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(0L, 0L)).name("Edges");
DataSet<Tuple2<Long, Long>> minCandidateId = iteration.getWorkset().join(edges).where(0).equalTo(0).projectSecond(1).<Tuple2<Long, Long>>projectFirst(1).name("Join Candidate Id With Neighbor").groupBy(0).min(1).name("Find Minimum Candidate Id");
DataSet<Tuple2<Long, Long>> updateComponentId;
if (solutionSetFirst) {
updateComponentId = iteration.getSolutionSet().join(minCandidateId).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public void join(Tuple2<Long, Long> current, Tuple2<Long, Long> candidate, Collector<Tuple2<Long, Long>> out) {
if (candidate.f1 < current.f1) {
out.collect(candidate);
}
}
}).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0").name("Update Component Id");
} else {
updateComponentId = minCandidateId.join(iteration.getSolutionSet()).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public void join(Tuple2<Long, Long> candidate, Tuple2<Long, Long> current, Collector<Tuple2<Long, Long>> out) {
if (candidate.f1 < current.f1) {
out.collect(candidate);
}
}
}).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0").name("Update Component Id");
}
iteration.closeWith(updateComponentId, updateComponentId).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("Result");
return env.createProgramPlan();
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class JobGraphGeneratorTest method testResourcesForChainedOperators.
/**
* Verifies that the resources are merged correctly for chained operators when
* generating job graph
*/
@Test
public void testResourcesForChainedOperators() throws Exception {
ResourceSpec resource1 = new ResourceSpec(0.1, 100);
ResourceSpec resource2 = new ResourceSpec(0.2, 200);
ResourceSpec resource3 = new ResourceSpec(0.3, 300);
ResourceSpec resource4 = new ResourceSpec(0.4, 400);
ResourceSpec resource5 = new ResourceSpec(0.5, 500);
ResourceSpec resource6 = new ResourceSpec(0.6, 600);
ResourceSpec resource7 = new ResourceSpec(0.7, 700);
Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class);
opMethod.setAccessible(true);
Method sinkMethod = DataSink.class.getDeclaredMethod("setResources", ResourceSpec.class);
sinkMethod.setAccessible(true);
MapFunction<Long, Long> mapFunction = new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return value;
}
};
FilterFunction<Long> filterFunction = new FilterFunction<Long>() {
@Override
public boolean filter(Long value) throws Exception {
return false;
}
};
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> input = env.fromElements(1L, 2L, 3L);
opMethod.invoke(input, resource1);
DataSet<Long> map1 = input.map(mapFunction);
opMethod.invoke(map1, resource2);
// CHAIN(Source -> Map -> Filter)
DataSet<Long> filter1 = map1.filter(filterFunction);
opMethod.invoke(filter1, resource3);
IterativeDataSet<Long> startOfIteration = filter1.iterate(10);
opMethod.invoke(startOfIteration, resource4);
DataSet<Long> map2 = startOfIteration.map(mapFunction);
opMethod.invoke(map2, resource5);
// CHAIN(Map -> Filter)
DataSet<Long> feedback = map2.filter(filterFunction);
opMethod.invoke(feedback, resource6);
DataSink<Long> sink = startOfIteration.closeWith(feedback).output(new DiscardingOutputFormat<Long>());
sinkMethod.invoke(sink, resource7);
Plan plan = env.createProgramPlan();
Optimizer pc = new Optimizer(new Configuration());
OptimizedPlan op = pc.compile(plan);
JobGraphGenerator jgg = new JobGraphGenerator();
JobGraph jobGraph = jgg.compileJobGraph(op);
JobVertex sourceMapFilterVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(0);
JobVertex iterationHeadVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
JobVertex feedbackVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(2);
JobVertex sinkVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(3);
JobVertex iterationSyncVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(4);
assertTrue(sourceMapFilterVertex.getMinResources().equals(resource1.merge(resource2).merge(resource3)));
assertTrue(iterationHeadVertex.getPreferredResources().equals(resource4));
assertTrue(feedbackVertex.getMinResources().equals(resource5.merge(resource6)));
assertTrue(sinkVertex.getPreferredResources().equals(resource7));
assertTrue(iterationSyncVertex.getMinResources().equals(resource4));
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class PartitionOperatorTest method testRangePartitionOperatorPreservesFields2.
@Test
public void testRangePartitionOperatorPreservesFields2() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
PartitionOperator<Tuple2<Long, Long>> rangePartitioned = data.partitionByRange(1);
rangePartitioned.groupBy(1).reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
data.groupBy(0).aggregate(Aggregations.SUM, 1).map(new MapFunction<Tuple2<Long, Long>, Long>() {
@Override
public Long map(Tuple2<Long, Long> value) throws Exception {
return value.f1;
}
}).output(new DiscardingOutputFormat<Long>());
rangePartitioned.filter(new FilterFunction<Tuple2<Long, Long>>() {
@Override
public boolean filter(Tuple2<Long, Long> value) throws Exception {
return value.f0 % 2 == 0;
}
}).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode partitionNode = (SingleInputPlanNode) reducer.getInput().getSource();
SingleInputPlanNode partitionIDRemover = (SingleInputPlanNode) partitionNode.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, partitionNode.getInput().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionIDRemover.getInput().getShipStrategy());
SourcePlanNode sourcePlanNode = op.getDataSources().iterator().next();
List<Channel> sourceOutgoingChannels = sourcePlanNode.getOutgoingChannels();
assertEquals(3, sourceOutgoingChannels.size());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(0).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(1).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(2).getShipStrategy());
assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(0).getDataExchangeMode());
assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(1).getDataExchangeMode());
assertEquals(DataExchangeMode.BATCH, sourceOutgoingChannels.get(2).getDataExchangeMode());
List<Channel> partitionOutputChannels = partitionNode.getOutgoingChannels();
assertEquals(2, partitionOutputChannels.size());
assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(0).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(1).getShipStrategy());
assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(0).getDataExchangeMode());
assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(1).getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class FastFailuresITCase method testThis.
@Test
public void testThis() {
Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 2);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 2);
LocalFlinkMiniCluster cluster = new LocalFlinkMiniCluster(config, false);
cluster.start();
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
env.getConfig().disableSysoutLogging();
env.setParallelism(4);
env.enableCheckpointing(1000);
env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(210, 0));
DataStream<Tuple2<Integer, Integer>> input = env.addSource(new RichSourceFunction<Tuple2<Integer, Integer>>() {
@Override
public void open(Configuration parameters) {
if (FAILURES_SO_FAR.incrementAndGet() <= NUM_FAILURES) {
throw new RuntimeException("fail");
}
}
@Override
public void run(SourceContext<Tuple2<Integer, Integer>> ctx) {
}
@Override
public void cancel() {
}
});
input.keyBy(0).map(new MapFunction<Tuple2<Integer, Integer>, Integer>() {
@Override
public Integer map(Tuple2<Integer, Integer> value) {
return value.f0;
}
}).addSink(new SinkFunction<Integer>() {
@Override
public void invoke(Integer value) {
}
});
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class StreamingOperatorsITCase method testGroupedFoldOperation.
/**
* Tests the proper functioning of the streaming fold operator. For this purpose, a stream
* of Tuple2<Integer, Integer> is created. The stream is grouped according to the first tuple
* value. Each group is folded where the second tuple value is summed up.
*
* This test relies on the hash function used by the {@link DataStream#keyBy}, which is
* assumed to be {@link MathUtils#murmurHash}.
*/
@Test
public void testGroupedFoldOperation() throws Exception {
int numElements = 10;
final int numKeys = 2;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<Integer, Integer>> sourceStream = env.addSource(new TupleSource(numElements, numKeys));
SplitStream<Tuple2<Integer, Integer>> splittedResult = sourceStream.keyBy(0).fold(0, new FoldFunction<Tuple2<Integer, Integer>, Integer>() {
private static final long serialVersionUID = 4875723041825726082L;
@Override
public Integer fold(Integer accumulator, Tuple2<Integer, Integer> value) throws Exception {
return accumulator + value.f1;
}
}).map(new RichMapFunction<Integer, Tuple2<Integer, Integer>>() {
private static final long serialVersionUID = 8538355101606319744L;
int key = -1;
@Override
public Tuple2<Integer, Integer> map(Integer value) throws Exception {
if (key == -1) {
key = MathUtils.murmurHash(value) % numKeys;
}
return new Tuple2<>(key, value);
}
}).split(new OutputSelector<Tuple2<Integer, Integer>>() {
private static final long serialVersionUID = -8439325199163362470L;
@Override
public Iterable<String> select(Tuple2<Integer, Integer> value) {
List<String> output = new ArrayList<>();
output.add(value.f0 + "");
return output;
}
});
final MemorySinkFunction sinkFunction1 = new MemorySinkFunction(0);
final List<Integer> actualResult1 = new ArrayList<>();
MemorySinkFunction.registerCollection(0, actualResult1);
splittedResult.select("0").map(new MapFunction<Tuple2<Integer, Integer>, Integer>() {
private static final long serialVersionUID = 2114608668010092995L;
@Override
public Integer map(Tuple2<Integer, Integer> value) throws Exception {
return value.f1;
}
}).addSink(sinkFunction1);
final MemorySinkFunction sinkFunction2 = new MemorySinkFunction(1);
final List<Integer> actualResult2 = new ArrayList<>();
MemorySinkFunction.registerCollection(1, actualResult2);
splittedResult.select("1").map(new MapFunction<Tuple2<Integer, Integer>, Integer>() {
private static final long serialVersionUID = 5631104389744681308L;
@Override
public Integer map(Tuple2<Integer, Integer> value) throws Exception {
return value.f1;
}
}).addSink(sinkFunction2);
Collection<Integer> expected1 = new ArrayList<>(10);
Collection<Integer> expected2 = new ArrayList<>(10);
int counter1 = 0;
int counter2 = 0;
for (int i = 0; i < numElements; i++) {
if (MathUtils.murmurHash(i) % numKeys == 0) {
counter1 += i;
expected1.add(counter1);
} else {
counter2 += i;
expected2.add(counter2);
}
}
env.execute();
Collections.sort(actualResult1);
Collections.sort(actualResult2);
Assert.assertEquals(expected1, actualResult1);
Assert.assertEquals(expected2, actualResult2);
MemorySinkFunction.clear();
}
Aggregations