use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.
the class NFAITCase method testSimplePatternNFA.
@Test
public void testSimplePatternNFA() {
List<StreamRecord<Event>> inputEvents = new ArrayList<>();
Event startEvent = new Event(42, "start", 1.0);
SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
Event endEvent = new Event(43, "end", 1.0);
inputEvents.add(new StreamRecord<Event>(startEvent, 1));
inputEvents.add(new StreamRecord<Event>(new Event(43, "foobar", 1.0), 2));
inputEvents.add(new StreamRecord<Event>(new SubEvent(41, "barfoo", 1.0, 5.0), 3));
inputEvents.add(new StreamRecord<Event>(middleEvent, 3));
inputEvents.add(new StreamRecord<Event>(new Event(43, "start", 1.0), 4));
inputEvents.add(new StreamRecord<Event>(endEvent, 5));
Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(new FilterFunction<Event>() {
private static final long serialVersionUID = 5726188262756267490L;
@Override
public boolean filter(Event value) throws Exception {
return value.getName().equals("start");
}
}).followedBy("middle").subtype(SubEvent.class).where(new FilterFunction<SubEvent>() {
private static final long serialVersionUID = 6215754202506583964L;
@Override
public boolean filter(SubEvent value) throws Exception {
return value.getVolume() > 5.0;
}
}).followedBy("end").where(new FilterFunction<Event>() {
private static final long serialVersionUID = 7056763917392056548L;
@Override
public boolean filter(Event value) throws Exception {
return value.getName().equals("end");
}
});
NFA<Event> nfa = NFACompiler.compile(pattern, Event.createTypeSerializer(), false);
List<Map<String, Event>> resultingPatterns = new ArrayList<>();
for (StreamRecord<Event> inputEvent : inputEvents) {
Collection<Map<String, Event>> patterns = nfa.process(inputEvent.getValue(), inputEvent.getTimestamp()).f0;
resultingPatterns.addAll(patterns);
}
assertEquals(1, resultingPatterns.size());
Map<String, Event> patternMap = resultingPatterns.get(0);
assertEquals(startEvent, patternMap.get("start"));
assertEquals(middleEvent, patternMap.get("middle"));
assertEquals(endEvent, patternMap.get("end"));
}
use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.
the class PipelineBreakingTest method testSimpleForwardPlan.
/**
* Tests that no pipeline breakers are inserted into a simple forward
* pipeline.
*
* <pre>
* (source) -> (map) -> (filter) -> (groupBy / reduce)
* </pre>
*/
@Test
public void testSimpleForwardPlan() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> dataSet = env.readTextFile("/never/accessed");
dataSet.map(new MapFunction<String, Integer>() {
@Override
public Integer map(String value) {
return 0;
}
}).filter(new FilterFunction<Integer>() {
@Override
public boolean filter(Integer value) {
return false;
}
}).groupBy(new IdentityKeyExtractor<Integer>()).reduceGroup(new Top1GroupReducer<Integer>()).output(new DiscardingOutputFormat<Integer>());
DataSinkNode sinkNode = convertPlan(env.createProgramPlan()).get(0);
SingleInputNode reduceNode = (SingleInputNode) sinkNode.getPredecessorNode();
SingleInputNode keyExtractorNode = (SingleInputNode) reduceNode.getPredecessorNode();
SingleInputNode filterNode = (SingleInputNode) keyExtractorNode.getPredecessorNode();
SingleInputNode mapNode = (SingleInputNode) filterNode.getPredecessorNode();
assertFalse(sinkNode.getInputConnection().isBreakingPipeline());
assertFalse(reduceNode.getIncomingConnection().isBreakingPipeline());
assertFalse(keyExtractorNode.getIncomingConnection().isBreakingPipeline());
assertFalse(filterNode.getIncomingConnection().isBreakingPipeline());
assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.
the class PipelineBreakingTest method testBranchingPlanNotReJoined.
/**
* Tests that branching plans, where the branches are not re-joined,
* do not place pipeline breakers.
*
* <pre>
* /---> (filter) -> (sink)
* /
* /
* (source) -> (map) -----------------\
* \ (join) -> (sink)
* \ (source) --/
* \
* \
* \-> (sink)
* </pre>
*/
@Test
public void testBranchingPlanNotReJoined() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Integer> data = env.readTextFile("/never/accessed").map(new MapFunction<String, Integer>() {
@Override
public Integer map(String value) {
return 0;
}
});
// output 1
data.filter(new FilterFunction<Integer>() {
@Override
public boolean filter(Integer value) {
return false;
}
}).output(new DiscardingOutputFormat<Integer>());
// output 2 does a join before a join
data.join(env.fromElements(1, 2, 3, 4)).where(new IdentityKeyExtractor<Integer>()).equalTo(new IdentityKeyExtractor<Integer>()).output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
// output 3 is direct
data.output(new DiscardingOutputFormat<Integer>());
List<DataSinkNode> sinks = convertPlan(env.createProgramPlan());
// gather the optimizer DAG nodes
DataSinkNode sinkAfterFilter = sinks.get(0);
DataSinkNode sinkAfterJoin = sinks.get(1);
DataSinkNode sinkDirect = sinks.get(2);
SingleInputNode filterNode = (SingleInputNode) sinkAfterFilter.getPredecessorNode();
SingleInputNode mapNode = (SingleInputNode) filterNode.getPredecessorNode();
TwoInputNode joinNode = (TwoInputNode) sinkAfterJoin.getPredecessorNode();
SingleInputNode joinInput = (SingleInputNode) joinNode.getSecondPredecessorNode();
// verify the non-pipeline breaking status
assertFalse(sinkAfterFilter.getInputConnection().isBreakingPipeline());
assertFalse(sinkAfterJoin.getInputConnection().isBreakingPipeline());
assertFalse(sinkDirect.getInputConnection().isBreakingPipeline());
assertFalse(filterNode.getIncomingConnection().isBreakingPipeline());
assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
assertFalse(joinNode.getFirstIncomingConnection().isBreakingPipeline());
assertFalse(joinNode.getSecondIncomingConnection().isBreakingPipeline());
assertFalse(joinInput.getIncomingConnection().isBreakingPipeline());
// some other sanity checks on the plan construction (cannot hurt)
assertEquals(mapNode, ((SingleInputNode) joinNode.getFirstPredecessorNode()).getPredecessorNode());
assertEquals(mapNode, sinkDirect.getPredecessorNode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.
the class JobGraphGeneratorTest method testResourcesForChainedOperators.
/**
* Verifies that the resources are merged correctly for chained operators when
* generating job graph
*/
@Test
public void testResourcesForChainedOperators() throws Exception {
ResourceSpec resource1 = new ResourceSpec(0.1, 100);
ResourceSpec resource2 = new ResourceSpec(0.2, 200);
ResourceSpec resource3 = new ResourceSpec(0.3, 300);
ResourceSpec resource4 = new ResourceSpec(0.4, 400);
ResourceSpec resource5 = new ResourceSpec(0.5, 500);
ResourceSpec resource6 = new ResourceSpec(0.6, 600);
ResourceSpec resource7 = new ResourceSpec(0.7, 700);
Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class);
opMethod.setAccessible(true);
Method sinkMethod = DataSink.class.getDeclaredMethod("setResources", ResourceSpec.class);
sinkMethod.setAccessible(true);
MapFunction<Long, Long> mapFunction = new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return value;
}
};
FilterFunction<Long> filterFunction = new FilterFunction<Long>() {
@Override
public boolean filter(Long value) throws Exception {
return false;
}
};
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> input = env.fromElements(1L, 2L, 3L);
opMethod.invoke(input, resource1);
DataSet<Long> map1 = input.map(mapFunction);
opMethod.invoke(map1, resource2);
// CHAIN(Source -> Map -> Filter)
DataSet<Long> filter1 = map1.filter(filterFunction);
opMethod.invoke(filter1, resource3);
IterativeDataSet<Long> startOfIteration = filter1.iterate(10);
opMethod.invoke(startOfIteration, resource4);
DataSet<Long> map2 = startOfIteration.map(mapFunction);
opMethod.invoke(map2, resource5);
// CHAIN(Map -> Filter)
DataSet<Long> feedback = map2.filter(filterFunction);
opMethod.invoke(feedback, resource6);
DataSink<Long> sink = startOfIteration.closeWith(feedback).output(new DiscardingOutputFormat<Long>());
sinkMethod.invoke(sink, resource7);
Plan plan = env.createProgramPlan();
Optimizer pc = new Optimizer(new Configuration());
OptimizedPlan op = pc.compile(plan);
JobGraphGenerator jgg = new JobGraphGenerator();
JobGraph jobGraph = jgg.compileJobGraph(op);
JobVertex sourceMapFilterVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(0);
JobVertex iterationHeadVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
JobVertex feedbackVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(2);
JobVertex sinkVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(3);
JobVertex iterationSyncVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(4);
assertTrue(sourceMapFilterVertex.getMinResources().equals(resource1.merge(resource2).merge(resource3)));
assertTrue(iterationHeadVertex.getPreferredResources().equals(resource4));
assertTrue(feedbackVertex.getMinResources().equals(resource5.merge(resource6)));
assertTrue(sinkVertex.getPreferredResources().equals(resource7));
assertTrue(iterationSyncVertex.getMinResources().equals(resource4));
}
use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.
the class PartitionOperatorTest method testRangePartitionOperatorPreservesFields2.
@Test
public void testRangePartitionOperatorPreservesFields2() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
PartitionOperator<Tuple2<Long, Long>> rangePartitioned = data.partitionByRange(1);
rangePartitioned.groupBy(1).reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
data.groupBy(0).aggregate(Aggregations.SUM, 1).map(new MapFunction<Tuple2<Long, Long>, Long>() {
@Override
public Long map(Tuple2<Long, Long> value) throws Exception {
return value.f1;
}
}).output(new DiscardingOutputFormat<Long>());
rangePartitioned.filter(new FilterFunction<Tuple2<Long, Long>>() {
@Override
public boolean filter(Tuple2<Long, Long> value) throws Exception {
return value.f0 % 2 == 0;
}
}).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode partitionNode = (SingleInputPlanNode) reducer.getInput().getSource();
SingleInputPlanNode partitionIDRemover = (SingleInputPlanNode) partitionNode.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, partitionNode.getInput().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionIDRemover.getInput().getShipStrategy());
SourcePlanNode sourcePlanNode = op.getDataSources().iterator().next();
List<Channel> sourceOutgoingChannels = sourcePlanNode.getOutgoingChannels();
assertEquals(3, sourceOutgoingChannels.size());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(0).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(1).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(2).getShipStrategy());
assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(0).getDataExchangeMode());
assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(1).getDataExchangeMode());
assertEquals(DataExchangeMode.BATCH, sourceOutgoingChannels.get(2).getDataExchangeMode());
List<Channel> partitionOutputChannels = partitionNode.getOutgoingChannels();
assertEquals(2, partitionOutputChannels.size());
assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(0).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(1).getShipStrategy());
assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(0).getDataExchangeMode());
assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(1).getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations