Search in sources :

Example 11 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class NFACompiler method compileFactory.

/**
	 * Compiles the given pattern into a {@link NFAFactory}. The NFA factory can be used to create
	 * multiple NFAs.
	 *
	 * @param pattern Definition of sequence pattern
	 * @param inputTypeSerializer Serializer for the input type
	 * @param timeoutHandling True if the NFA shall return timed out event patterns
	 * @param <T> Type of the input events
	 * @return Factory for NFAs corresponding to the given pattern
	 */
@SuppressWarnings("unchecked")
public static <T> NFAFactory<T> compileFactory(Pattern<T, ?> pattern, TypeSerializer<T> inputTypeSerializer, boolean timeoutHandling) {
    if (pattern == null) {
        // return a factory for empty NFAs
        return new NFAFactoryImpl<T>(inputTypeSerializer, 0, Collections.<State<T>>emptyList(), timeoutHandling);
    } else {
        // set of all generated states
        Map<String, State<T>> states = new HashMap<>();
        long windowTime;
        // this is used to enforse pattern name uniqueness.
        Set<String> patternNames = new HashSet<>();
        Pattern<T, ?> succeedingPattern;
        State<T> succeedingState;
        Pattern<T, ?> currentPattern = pattern;
        // we're traversing the pattern from the end to the beginning --> the first state is the final state
        State<T> currentState = new State<>(currentPattern.getName(), State.StateType.Final);
        patternNames.add(currentPattern.getName());
        states.put(currentPattern.getName(), currentState);
        windowTime = currentPattern.getWindowTime() != null ? currentPattern.getWindowTime().toMilliseconds() : 0L;
        while (currentPattern.getPrevious() != null) {
            succeedingPattern = currentPattern;
            succeedingState = currentState;
            currentPattern = currentPattern.getPrevious();
            if (!patternNames.add(currentPattern.getName())) {
                throw new MalformedPatternException("Duplicate pattern name: " + currentPattern.getName() + ". " + "Pattern names must be unique.");
            }
            Time currentWindowTime = currentPattern.getWindowTime();
            if (currentWindowTime != null && currentWindowTime.toMilliseconds() < windowTime) {
                // the window time is the global minimum of all window times of each state
                windowTime = currentWindowTime.toMilliseconds();
            }
            if (states.containsKey(currentPattern.getName())) {
                currentState = states.get(currentPattern.getName());
            } else {
                currentState = new State<>(currentPattern.getName(), State.StateType.Normal);
                states.put(currentState.getName(), currentState);
            }
            currentState.addStateTransition(new StateTransition<T>(StateTransitionAction.TAKE, succeedingState, (FilterFunction<T>) succeedingPattern.getFilterFunction()));
            if (succeedingPattern instanceof FollowedByPattern) {
                // the followed by pattern entails a reflexive ignore transition
                currentState.addStateTransition(new StateTransition<T>(StateTransitionAction.IGNORE, currentState, null));
            }
        }
        // add the beginning state
        final State<T> beginningState;
        if (states.containsKey(BEGINNING_STATE_NAME)) {
            beginningState = states.get(BEGINNING_STATE_NAME);
        } else {
            beginningState = new State<>(BEGINNING_STATE_NAME, State.StateType.Start);
            states.put(BEGINNING_STATE_NAME, beginningState);
        }
        beginningState.addStateTransition(new StateTransition<T>(StateTransitionAction.TAKE, currentState, (FilterFunction<T>) currentPattern.getFilterFunction()));
        return new NFAFactoryImpl<T>(inputTypeSerializer, windowTime, new HashSet<>(states.values()), timeoutHandling);
    }
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) HashMap(java.util.HashMap) Time(org.apache.flink.streaming.api.windowing.time.Time) FollowedByPattern(org.apache.flink.cep.pattern.FollowedByPattern) State(org.apache.flink.cep.nfa.State) HashSet(java.util.HashSet)

Example 12 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class CEPITCase method testSimpleOrFilterPatternCEP.

/**
	 * Checks that a certain event sequence is recognized with an OR filter
	 * @throws Exception
	 */
@Test
public void testSimpleOrFilterPatternCEP() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input = env.fromElements(new Event(1, "start", 1.0), new Event(2, "middle", 2.0), new Event(3, "end", 3.0), new Event(4, "start", 4.0), new Event(5, "middle", 5.0), new Event(6, "end", 6.0));
    Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(new FilterFunction<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("start");
        }
    }).followedBy("middle").where(new FilterFunction<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getPrice() == 2.0;
        }
    }).or(new FilterFunction<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getPrice() == 5.0;
        }
    }).followedBy("end").where(new FilterFunction<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("end");
        }
    });
    DataStream<String> result = CEP.pattern(input, pattern).select(new PatternSelectFunction<Event, String>() {

        @Override
        public String select(Map<String, Event> pattern) {
            StringBuilder builder = new StringBuilder();
            builder.append(pattern.get("start").getId()).append(",").append(pattern.get("middle").getId()).append(",").append(pattern.get("end").getId());
            return builder.toString();
        }
    });
    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    // expected sequence of matching event ids
    expected = "1,5,6\n1,2,3\n4,5,6\n1,2,6";
    env.execute();
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 13 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class NamesTest method testDefaultName.

@Test
public void testDefaultName() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<String> strs = env.fromCollection(Arrays.asList("a", "b"));
    // WARNING: The test will fail if this line is being moved down in the file (the line-number is hard-coded)
    strs.filter(new FilterFunction<String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public boolean filter(String value) throws Exception {
            return value.equals("a");
        }
    }).output(new DiscardingOutputFormat<String>());
    Plan plan = env.createProgramPlan();
    testForName("Filter at testDefaultName(NamesTest.java:55)", plan);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Plan(org.apache.flink.api.common.Plan) Test(org.junit.Test)

Example 14 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class NFAITCase method testSimplePatternNFA.

@Test
public void testSimplePatternNFA() {
    List<StreamRecord<Event>> inputEvents = new ArrayList<>();
    Event startEvent = new Event(42, "start", 1.0);
    SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
    Event endEvent = new Event(43, "end", 1.0);
    inputEvents.add(new StreamRecord<Event>(startEvent, 1));
    inputEvents.add(new StreamRecord<Event>(new Event(43, "foobar", 1.0), 2));
    inputEvents.add(new StreamRecord<Event>(new SubEvent(41, "barfoo", 1.0, 5.0), 3));
    inputEvents.add(new StreamRecord<Event>(middleEvent, 3));
    inputEvents.add(new StreamRecord<Event>(new Event(43, "start", 1.0), 4));
    inputEvents.add(new StreamRecord<Event>(endEvent, 5));
    Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(new FilterFunction<Event>() {

        private static final long serialVersionUID = 5726188262756267490L;

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("start");
        }
    }).followedBy("middle").subtype(SubEvent.class).where(new FilterFunction<SubEvent>() {

        private static final long serialVersionUID = 6215754202506583964L;

        @Override
        public boolean filter(SubEvent value) throws Exception {
            return value.getVolume() > 5.0;
        }
    }).followedBy("end").where(new FilterFunction<Event>() {

        private static final long serialVersionUID = 7056763917392056548L;

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("end");
        }
    });
    NFA<Event> nfa = NFACompiler.compile(pattern, Event.createTypeSerializer(), false);
    List<Map<String, Event>> resultingPatterns = new ArrayList<>();
    for (StreamRecord<Event> inputEvent : inputEvents) {
        Collection<Map<String, Event>> patterns = nfa.process(inputEvent.getValue(), inputEvent.getTimestamp()).f0;
        resultingPatterns.addAll(patterns);
    }
    assertEquals(1, resultingPatterns.size());
    Map<String, Event> patternMap = resultingPatterns.get(0);
    assertEquals(startEvent, patternMap.get("start"));
    assertEquals(middleEvent, patternMap.get("middle"));
    assertEquals(endEvent, patternMap.get("end"));
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) SubEvent(org.apache.flink.cep.SubEvent) ArrayList(java.util.ArrayList) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 15 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class PipelineBreakingTest method testSimpleForwardPlan.

/**
	 * Tests that no pipeline breakers are inserted into a simple forward
	 * pipeline.
	 *
	 * <pre>
	 *     (source) -> (map) -> (filter) -> (groupBy / reduce)
	 * </pre>
	 */
@Test
public void testSimpleForwardPlan() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<String> dataSet = env.readTextFile("/never/accessed");
        dataSet.map(new MapFunction<String, Integer>() {

            @Override
            public Integer map(String value) {
                return 0;
            }
        }).filter(new FilterFunction<Integer>() {

            @Override
            public boolean filter(Integer value) {
                return false;
            }
        }).groupBy(new IdentityKeyExtractor<Integer>()).reduceGroup(new Top1GroupReducer<Integer>()).output(new DiscardingOutputFormat<Integer>());
        DataSinkNode sinkNode = convertPlan(env.createProgramPlan()).get(0);
        SingleInputNode reduceNode = (SingleInputNode) sinkNode.getPredecessorNode();
        SingleInputNode keyExtractorNode = (SingleInputNode) reduceNode.getPredecessorNode();
        SingleInputNode filterNode = (SingleInputNode) keyExtractorNode.getPredecessorNode();
        SingleInputNode mapNode = (SingleInputNode) filterNode.getPredecessorNode();
        assertFalse(sinkNode.getInputConnection().isBreakingPipeline());
        assertFalse(reduceNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(keyExtractorNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(filterNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SingleInputNode(org.apache.flink.optimizer.dag.SingleInputNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Top1GroupReducer(org.apache.flink.optimizer.testfunctions.Top1GroupReducer) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) Test(org.junit.Test)

Aggregations

FilterFunction (org.apache.flink.api.common.functions.FilterFunction)35 Test (org.junit.Test)29 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)15 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)15 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)14 HashMap (java.util.HashMap)5 MapFunction (org.apache.flink.api.common.functions.MapFunction)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4 Plan (org.apache.flink.api.common.Plan)4 Event (org.apache.flink.cep.Event)4 SubEvent (org.apache.flink.cep.SubEvent)4 Edge (org.apache.flink.graph.Edge)4 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)4 Method (java.lang.reflect.Method)3 HashSet (java.util.HashSet)3 ResourceSpec (org.apache.flink.api.common.operators.ResourceSpec)3 Configuration (org.apache.flink.configuration.Configuration)3