Search in sources :

Example 6 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class NFAITCase method testSimplePatternNFA.

@Test
public void testSimplePatternNFA() {
    List<StreamRecord<Event>> inputEvents = new ArrayList<>();
    Event startEvent = new Event(42, "start", 1.0);
    SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
    Event endEvent = new Event(43, "end", 1.0);
    inputEvents.add(new StreamRecord<Event>(startEvent, 1));
    inputEvents.add(new StreamRecord<Event>(new Event(43, "foobar", 1.0), 2));
    inputEvents.add(new StreamRecord<Event>(new SubEvent(41, "barfoo", 1.0, 5.0), 3));
    inputEvents.add(new StreamRecord<Event>(middleEvent, 3));
    inputEvents.add(new StreamRecord<Event>(new Event(43, "start", 1.0), 4));
    inputEvents.add(new StreamRecord<Event>(endEvent, 5));
    Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(new FilterFunction<Event>() {

        private static final long serialVersionUID = 5726188262756267490L;

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("start");
        }
    }).followedBy("middle").subtype(SubEvent.class).where(new FilterFunction<SubEvent>() {

        private static final long serialVersionUID = 6215754202506583964L;

        @Override
        public boolean filter(SubEvent value) throws Exception {
            return value.getVolume() > 5.0;
        }
    }).followedBy("end").where(new FilterFunction<Event>() {

        private static final long serialVersionUID = 7056763917392056548L;

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("end");
        }
    });
    NFA<Event> nfa = NFACompiler.compile(pattern, Event.createTypeSerializer(), false);
    List<Map<String, Event>> resultingPatterns = new ArrayList<>();
    for (StreamRecord<Event> inputEvent : inputEvents) {
        Collection<Map<String, Event>> patterns = nfa.process(inputEvent.getValue(), inputEvent.getTimestamp()).f0;
        resultingPatterns.addAll(patterns);
    }
    assertEquals(1, resultingPatterns.size());
    Map<String, Event> patternMap = resultingPatterns.get(0);
    assertEquals(startEvent, patternMap.get("start"));
    assertEquals(middleEvent, patternMap.get("middle"));
    assertEquals(endEvent, patternMap.get("end"));
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) SubEvent(org.apache.flink.cep.SubEvent) ArrayList(java.util.ArrayList) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 7 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class PipelineBreakingTest method testSimpleForwardPlan.

/**
	 * Tests that no pipeline breakers are inserted into a simple forward
	 * pipeline.
	 *
	 * <pre>
	 *     (source) -> (map) -> (filter) -> (groupBy / reduce)
	 * </pre>
	 */
@Test
public void testSimpleForwardPlan() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<String> dataSet = env.readTextFile("/never/accessed");
        dataSet.map(new MapFunction<String, Integer>() {

            @Override
            public Integer map(String value) {
                return 0;
            }
        }).filter(new FilterFunction<Integer>() {

            @Override
            public boolean filter(Integer value) {
                return false;
            }
        }).groupBy(new IdentityKeyExtractor<Integer>()).reduceGroup(new Top1GroupReducer<Integer>()).output(new DiscardingOutputFormat<Integer>());
        DataSinkNode sinkNode = convertPlan(env.createProgramPlan()).get(0);
        SingleInputNode reduceNode = (SingleInputNode) sinkNode.getPredecessorNode();
        SingleInputNode keyExtractorNode = (SingleInputNode) reduceNode.getPredecessorNode();
        SingleInputNode filterNode = (SingleInputNode) keyExtractorNode.getPredecessorNode();
        SingleInputNode mapNode = (SingleInputNode) filterNode.getPredecessorNode();
        assertFalse(sinkNode.getInputConnection().isBreakingPipeline());
        assertFalse(reduceNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(keyExtractorNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(filterNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SingleInputNode(org.apache.flink.optimizer.dag.SingleInputNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Top1GroupReducer(org.apache.flink.optimizer.testfunctions.Top1GroupReducer) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) Test(org.junit.Test)

Example 8 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class PipelineBreakingTest method testBranchingPlanNotReJoined.

/**
	 * Tests that branching plans, where the branches are not re-joined,
	 * do not place pipeline breakers.
	 * 
	 * <pre>
	 *                      /---> (filter) -> (sink)
	 *                     /
	 *                    /
	 * (source) -> (map) -----------------\
	 *                    \               (join) -> (sink)
	 *                     \   (source) --/
	 *                      \
	 *                       \
	 *                        \-> (sink)
	 * </pre>
	 */
@Test
public void testBranchingPlanNotReJoined() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Integer> data = env.readTextFile("/never/accessed").map(new MapFunction<String, Integer>() {

            @Override
            public Integer map(String value) {
                return 0;
            }
        });
        // output 1
        data.filter(new FilterFunction<Integer>() {

            @Override
            public boolean filter(Integer value) {
                return false;
            }
        }).output(new DiscardingOutputFormat<Integer>());
        // output 2 does a join before a join
        data.join(env.fromElements(1, 2, 3, 4)).where(new IdentityKeyExtractor<Integer>()).equalTo(new IdentityKeyExtractor<Integer>()).output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
        // output 3 is direct
        data.output(new DiscardingOutputFormat<Integer>());
        List<DataSinkNode> sinks = convertPlan(env.createProgramPlan());
        // gather the optimizer DAG nodes
        DataSinkNode sinkAfterFilter = sinks.get(0);
        DataSinkNode sinkAfterJoin = sinks.get(1);
        DataSinkNode sinkDirect = sinks.get(2);
        SingleInputNode filterNode = (SingleInputNode) sinkAfterFilter.getPredecessorNode();
        SingleInputNode mapNode = (SingleInputNode) filterNode.getPredecessorNode();
        TwoInputNode joinNode = (TwoInputNode) sinkAfterJoin.getPredecessorNode();
        SingleInputNode joinInput = (SingleInputNode) joinNode.getSecondPredecessorNode();
        // verify the non-pipeline breaking status
        assertFalse(sinkAfterFilter.getInputConnection().isBreakingPipeline());
        assertFalse(sinkAfterJoin.getInputConnection().isBreakingPipeline());
        assertFalse(sinkDirect.getInputConnection().isBreakingPipeline());
        assertFalse(filterNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(joinNode.getFirstIncomingConnection().isBreakingPipeline());
        assertFalse(joinNode.getSecondIncomingConnection().isBreakingPipeline());
        assertFalse(joinInput.getIncomingConnection().isBreakingPipeline());
        // some other sanity checks on the plan construction (cannot hurt)
        assertEquals(mapNode, ((SingleInputNode) joinNode.getFirstPredecessorNode()).getPredecessorNode());
        assertEquals(mapNode, sinkDirect.getPredecessorNode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SingleInputNode(org.apache.flink.optimizer.dag.SingleInputNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) IdentityKeyExtractor(org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TwoInputNode(org.apache.flink.optimizer.dag.TwoInputNode) Test(org.junit.Test)

Example 9 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class JobGraphGeneratorTest method testResourcesForChainedOperators.

/**
	 * Verifies that the resources are merged correctly for chained operators when
	 * generating job graph
	 */
@Test
public void testResourcesForChainedOperators() throws Exception {
    ResourceSpec resource1 = new ResourceSpec(0.1, 100);
    ResourceSpec resource2 = new ResourceSpec(0.2, 200);
    ResourceSpec resource3 = new ResourceSpec(0.3, 300);
    ResourceSpec resource4 = new ResourceSpec(0.4, 400);
    ResourceSpec resource5 = new ResourceSpec(0.5, 500);
    ResourceSpec resource6 = new ResourceSpec(0.6, 600);
    ResourceSpec resource7 = new ResourceSpec(0.7, 700);
    Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class);
    opMethod.setAccessible(true);
    Method sinkMethod = DataSink.class.getDeclaredMethod("setResources", ResourceSpec.class);
    sinkMethod.setAccessible(true);
    MapFunction<Long, Long> mapFunction = new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return value;
        }
    };
    FilterFunction<Long> filterFunction = new FilterFunction<Long>() {

        @Override
        public boolean filter(Long value) throws Exception {
            return false;
        }
    };
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Long> input = env.fromElements(1L, 2L, 3L);
    opMethod.invoke(input, resource1);
    DataSet<Long> map1 = input.map(mapFunction);
    opMethod.invoke(map1, resource2);
    // CHAIN(Source -> Map -> Filter)
    DataSet<Long> filter1 = map1.filter(filterFunction);
    opMethod.invoke(filter1, resource3);
    IterativeDataSet<Long> startOfIteration = filter1.iterate(10);
    opMethod.invoke(startOfIteration, resource4);
    DataSet<Long> map2 = startOfIteration.map(mapFunction);
    opMethod.invoke(map2, resource5);
    // CHAIN(Map -> Filter)
    DataSet<Long> feedback = map2.filter(filterFunction);
    opMethod.invoke(feedback, resource6);
    DataSink<Long> sink = startOfIteration.closeWith(feedback).output(new DiscardingOutputFormat<Long>());
    sinkMethod.invoke(sink, resource7);
    Plan plan = env.createProgramPlan();
    Optimizer pc = new Optimizer(new Configuration());
    OptimizedPlan op = pc.compile(plan);
    JobGraphGenerator jgg = new JobGraphGenerator();
    JobGraph jobGraph = jgg.compileJobGraph(op);
    JobVertex sourceMapFilterVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(0);
    JobVertex iterationHeadVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
    JobVertex feedbackVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(2);
    JobVertex sinkVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(3);
    JobVertex iterationSyncVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(4);
    assertTrue(sourceMapFilterVertex.getMinResources().equals(resource1.merge(resource2).merge(resource3)));
    assertTrue(iterationHeadVertex.getPreferredResources().equals(resource4));
    assertTrue(feedbackVertex.getMinResources().equals(resource5.merge(resource6)));
    assertTrue(sinkVertex.getPreferredResources().equals(resource7));
    assertTrue(iterationSyncVertex.getMinResources().equals(resource4));
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) Optimizer(org.apache.flink.optimizer.Optimizer) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) Method(java.lang.reflect.Method) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Test(org.junit.Test)

Example 10 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class PartitionOperatorTest method testRangePartitionOperatorPreservesFields2.

@Test
public void testRangePartitionOperatorPreservesFields2() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
        PartitionOperator<Tuple2<Long, Long>> rangePartitioned = data.partitionByRange(1);
        rangePartitioned.groupBy(1).reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        data.groupBy(0).aggregate(Aggregations.SUM, 1).map(new MapFunction<Tuple2<Long, Long>, Long>() {

            @Override
            public Long map(Tuple2<Long, Long> value) throws Exception {
                return value.f1;
            }
        }).output(new DiscardingOutputFormat<Long>());
        rangePartitioned.filter(new FilterFunction<Tuple2<Long, Long>>() {

            @Override
            public boolean filter(Tuple2<Long, Long> value) throws Exception {
                return value.f0 % 2 == 0;
            }
        }).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode partitionNode = (SingleInputPlanNode) reducer.getInput().getSource();
        SingleInputPlanNode partitionIDRemover = (SingleInputPlanNode) partitionNode.getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, partitionNode.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionIDRemover.getInput().getShipStrategy());
        SourcePlanNode sourcePlanNode = op.getDataSources().iterator().next();
        List<Channel> sourceOutgoingChannels = sourcePlanNode.getOutgoingChannels();
        assertEquals(3, sourceOutgoingChannels.size());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(0).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(1).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(2).getShipStrategy());
        assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(0).getDataExchangeMode());
        assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(1).getDataExchangeMode());
        assertEquals(DataExchangeMode.BATCH, sourceOutgoingChannels.get(2).getDataExchangeMode());
        List<Channel> partitionOutputChannels = partitionNode.getOutgoingChannels();
        assertEquals(2, partitionOutputChannels.size());
        assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(0).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(1).getShipStrategy());
        assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(0).getDataExchangeMode());
        assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(1).getDataExchangeMode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Channel(org.apache.flink.optimizer.plan.Channel) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) IdentityGroupReducerCombinable(org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Aggregations

FilterFunction (org.apache.flink.api.common.functions.FilterFunction)35 Test (org.junit.Test)29 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)15 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)15 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)14 HashMap (java.util.HashMap)5 MapFunction (org.apache.flink.api.common.functions.MapFunction)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4 Plan (org.apache.flink.api.common.Plan)4 Event (org.apache.flink.cep.Event)4 SubEvent (org.apache.flink.cep.SubEvent)4 Edge (org.apache.flink.graph.Edge)4 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)4 Method (java.lang.reflect.Method)3 HashSet (java.util.HashSet)3 ResourceSpec (org.apache.flink.api.common.operators.ResourceSpec)3 Configuration (org.apache.flink.configuration.Configuration)3