Examples with FilterFunction - org.apache.flink.api.common.functions.FilterFunction

Example 26 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class CEPITCase method testSimplePatternCEP.

/**
	 * Checks that a certain event sequence is recognized
	 * @throws Exception
	 */
@Test
public void testSimplePatternCEP() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input = env.fromElements(new Event(1, "barfoo", 1.0), new Event(2, "start", 2.0), new Event(3, "foobar", 3.0), new SubEvent(4, "foo", 4.0, 1.0), new Event(5, "middle", 5.0), new SubEvent(6, "middle", 6.0, 2.0), new SubEvent(7, "bar", 3.0, 3.0), new Event(42, "42", 42.0), new Event(8, "end", 1.0));
    Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(new FilterFunction<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("start");
        }
    }).followedBy("middle").subtype(SubEvent.class).where(new FilterFunction<SubEvent>() {

        @Override
        public boolean filter(SubEvent value) throws Exception {
            return value.getName().equals("middle");
        }
    }).followedBy("end").where(new FilterFunction<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("end");
        }
    });
    DataStream<String> result = CEP.pattern(input, pattern).select(new PatternSelectFunction<Event, String>() {

        @Override
        public String select(Map<String, Event> pattern) {
            StringBuilder builder = new StringBuilder();
            builder.append(pattern.get("start").getId()).append(",").append(pattern.get("middle").getId()).append(",").append(pattern.get("end").getId());
            return builder.toString();
        }
    });
    result.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    // expected sequence of matching event ids
    expected = "2,6,8";
    env.execute();
}

Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 27 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class GraphOperationsITCase method testFilterVertices.

@SuppressWarnings("serial")
@Test
public void testFilterVertices() throws Exception {
    /*
		 * Test filterOnVertices:
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env), TestGraphUtils.getLongLongEdgeData(env), env);
    DataSet<Edge<Long, Long>> data = graph.filterOnVertices(new FilterFunction<Vertex<Long, Long>>() {

        public boolean filter(Vertex<Long, Long> vertex) throws Exception {
            return (vertex.getValue() > 2);
        }
    }).getEdges();
    List<Edge<Long, Long>> result = data.collect();
    expectedResult = "3,4,34\n" + "3,5,35\n" + "4,5,45\n";
    compareResultAsTuples(result, expectedResult);
}

Also used : Vertex(org.apache.flink.graph.Vertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Edge(org.apache.flink.graph.Edge) Test(org.junit.Test)

Example 28 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class DataExchangeModeForwardTest method verifySimpleForwardPlan.

private void verifySimpleForwardPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toFilter, DataExchangeMode toKeyExtractor, DataExchangeMode toCombiner, DataExchangeMode toReduce, DataExchangeMode toSink) {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setExecutionMode(execMode);
        DataSet<String> dataSet = env.readTextFile("/never/accessed");
        dataSet.map(new MapFunction<String, Integer>() {

            @Override
            public Integer map(String value) {
                return 0;
            }
        }).filter(new FilterFunction<Integer>() {

            @Override
            public boolean filter(Integer value) {
                return false;
            }
        }).groupBy(new IdentityKeyExtractor<Integer>()).reduceGroup(new Top1GroupReducer<Integer>()).output(new DiscardingOutputFormat<Integer>());
        OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
        SinkPlanNode sinkNode = optPlan.getDataSinks().iterator().next();
        SingleInputPlanNode reduceNode = (SingleInputPlanNode) sinkNode.getPredecessor();
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getPredecessor();
        SingleInputPlanNode keyExtractorNode = (SingleInputPlanNode) combineNode.getPredecessor();
        SingleInputPlanNode filterNode = (SingleInputPlanNode) keyExtractorNode.getPredecessor();
        SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
        assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
        assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
        assertEquals(toKeyExtractor, keyExtractorNode.getInput().getDataExchangeMode());
        assertEquals(toCombiner, combineNode.getInput().getDataExchangeMode());
        assertEquals(toReduce, reduceNode.getInput().getDataExchangeMode());
        assertEquals(toSink, sinkNode.getInput().getDataExchangeMode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}

Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Top1GroupReducer(org.apache.flink.optimizer.testfunctions.Top1GroupReducer) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode)

Example 29 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class JobGraphGeneratorTest method testResourcesForDeltaIteration.

/**
	 * Verifies that the resources are set onto each job vertex correctly when generating job graph
	 * which covers the delta iteration case
	 */
@Test
public void testResourcesForDeltaIteration() throws Exception {
    ResourceSpec resource1 = new ResourceSpec(0.1, 100);
    ResourceSpec resource2 = new ResourceSpec(0.2, 200);
    ResourceSpec resource3 = new ResourceSpec(0.3, 300);
    ResourceSpec resource4 = new ResourceSpec(0.4, 400);
    ResourceSpec resource5 = new ResourceSpec(0.5, 500);
    ResourceSpec resource6 = new ResourceSpec(0.6, 600);
    Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class);
    opMethod.setAccessible(true);
    Method deltaMethod = DeltaIteration.class.getDeclaredMethod("setResources", ResourceSpec.class);
    deltaMethod.setAccessible(true);
    Method sinkMethod = DataSink.class.getDeclaredMethod("setResources", ResourceSpec.class);
    sinkMethod.setAccessible(true);
    MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>> mapFunction = new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {

        @Override
        public Tuple2<Long, Long> map(Tuple2<Long, Long> value) throws Exception {
            return value;
        }
    };
    FilterFunction<Tuple2<Long, Long>> filterFunction = new FilterFunction<Tuple2<Long, Long>>() {

        @Override
        public boolean filter(Tuple2<Long, Long> value) throws Exception {
            return false;
        }
    };
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple2<Long, Long>> input = env.fromElements(new Tuple2<>(1L, 2L));
    opMethod.invoke(input, resource1);
    // CHAIN(Map -> Filter)
    DataSet<Tuple2<Long, Long>> map = input.map(mapFunction);
    opMethod.invoke(map, resource2);
    DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = map.iterateDelta(map, 100, 0).registerAggregator("test", new LongSumAggregator());
    deltaMethod.invoke(iteration, resource3);
    DataSet<Tuple2<Long, Long>> delta = iteration.getWorkset().map(mapFunction);
    opMethod.invoke(delta, resource4);
    DataSet<Tuple2<Long, Long>> feedback = delta.filter(filterFunction);
    opMethod.invoke(feedback, resource5);
    DataSink<Tuple2<Long, Long>> sink = iteration.closeWith(delta, feedback).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
    sinkMethod.invoke(sink, resource6);
    Plan plan = env.createProgramPlan();
    Optimizer pc = new Optimizer(new Configuration());
    OptimizedPlan op = pc.compile(plan);
    JobGraphGenerator jgg = new JobGraphGenerator();
    JobGraph jobGraph = jgg.compileJobGraph(op);
    JobVertex sourceMapVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(0);
    JobVertex iterationHeadVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
    JobVertex deltaVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(2);
    JobVertex iterationTailVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(3);
    JobVertex feedbackVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(4);
    JobVertex sinkVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(5);
    JobVertex iterationSyncVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(6);
    assertTrue(sourceMapVertex.getMinResources().equals(resource1.merge(resource2)));
    assertTrue(iterationHeadVertex.getPreferredResources().equals(resource3));
    assertTrue(deltaVertex.getMinResources().equals(resource4));
    // the iteration tail task will be scheduled in the same instance with iteration head, and currently not set resources.
    assertTrue(iterationTailVertex.getPreferredResources().equals(ResourceSpec.DEFAULT));
    assertTrue(feedbackVertex.getMinResources().equals(resource5));
    assertTrue(sinkVertex.getPreferredResources().equals(resource6));
    assertTrue(iterationSyncVertex.getMinResources().equals(resource3));
}

Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) Optimizer(org.apache.flink.optimizer.Optimizer) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) Method(java.lang.reflect.Method) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 30 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class DanglingPageRankITCase method testDanglingPageRank.

@Test
public void testDanglingPageRank() {
    try {
        final int NUM_ITERATIONS = 25;
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Boolean>> vertices = env.fromElements(new Tuple2<>(1L, false), new Tuple2<>(2L, false), new Tuple2<>(5L, false), new Tuple2<>(3L, true), new Tuple2<>(4L, false));
        DataSet<PageWithLinks> edges = env.fromElements(new PageWithLinks(2L, new long[] { 1 }), new PageWithLinks(5L, new long[] { 2, 4 }), new PageWithLinks(4L, new long[] { 3, 2 }), new PageWithLinks(1L, new long[] { 4, 2, 3 }));
        final long numVertices = vertices.count();
        final long numDanglingVertices = vertices.filter(new FilterFunction<Tuple2<Long, Boolean>>() {

            @Override
            public boolean filter(Tuple2<Long, Boolean> value) {
                return value.f1;
            }
        }).count();
        DataSet<PageWithRankAndDangling> verticesWithInitialRank = vertices.map(new MapFunction<Tuple2<Long, Boolean>, PageWithRankAndDangling>() {

            @Override
            public PageWithRankAndDangling map(Tuple2<Long, Boolean> value) {
                return new PageWithRankAndDangling(value.f0, 1.0 / numVertices, value.f1);
            }
        });
        IterativeDataSet<PageWithRankAndDangling> iteration = verticesWithInitialRank.iterate(NUM_ITERATIONS);
        iteration.getAggregators().registerAggregationConvergenceCriterion(AGGREGATOR_NAME, new PageRankStatsAggregator(), new DiffL1NormConvergenceCriterion());
        DataSet<PageWithRank> partialRanks = iteration.join(edges).where("pageId").equalTo("pageId").with(new FlatJoinFunction<PageWithRankAndDangling, PageWithLinks, PageWithRank>() {

            @Override
            public void join(PageWithRankAndDangling page, PageWithLinks links, Collector<PageWithRank> out) {
                double rankToDistribute = page.rank / (double) links.targets.length;
                PageWithRank output = new PageWithRank(0L, rankToDistribute);
                for (long target : links.targets) {
                    output.pageId = target;
                    out.collect(output);
                }
            }
        });
        DataSet<PageWithRankAndDangling> newRanks = iteration.coGroup(partialRanks).where("pageId").equalTo("pageId").with(new RichCoGroupFunction<PageWithRankAndDangling, PageWithRank, PageWithRankAndDangling>() {

            private static final double BETA = 0.85;

            private final double randomJump = (1.0 - BETA) / numVertices;

            private PageRankStatsAggregator aggregator;

            private double danglingRankFactor;

            @Override
            public void open(Configuration parameters) throws Exception {
                int currentIteration = getIterationRuntimeContext().getSuperstepNumber();
                aggregator = getIterationRuntimeContext().getIterationAggregator(AGGREGATOR_NAME);
                if (currentIteration == 1) {
                    danglingRankFactor = BETA * (double) numDanglingVertices / ((double) numVertices * (double) numVertices);
                } else {
                    PageRankStats previousAggregate = getIterationRuntimeContext().getPreviousIterationAggregate(AGGREGATOR_NAME);
                    danglingRankFactor = BETA * previousAggregate.danglingRank() / (double) numVertices;
                }
            }

            @Override
            public void coGroup(Iterable<PageWithRankAndDangling> currentPages, Iterable<PageWithRank> partialRanks, Collector<PageWithRankAndDangling> out) {
                // compute the next rank
                long edges = 0;
                double summedRank = 0;
                for (PageWithRank partial : partialRanks) {
                    summedRank += partial.rank;
                    edges++;
                }
                double rank = BETA * summedRank + randomJump + danglingRankFactor;
                // current rank, for stats and convergence
                PageWithRankAndDangling currentPage = currentPages.iterator().next();
                double currentRank = currentPage.rank;
                boolean isDangling = currentPage.dangling;
                // maintain statistics to compensate for probability loss on dangling nodes
                double danglingRankToAggregate = isDangling ? rank : 0;
                long danglingVerticesToAggregate = isDangling ? 1 : 0;
                double diff = Math.abs(currentRank - rank);
                aggregator.aggregate(diff, rank, danglingRankToAggregate, danglingVerticesToAggregate, 1, edges);
                currentPage.rank = rank;
                out.collect(currentPage);
            }
        });
        List<PageWithRankAndDangling> result = iteration.closeWith(newRanks).collect();
        double totalRank = 0.0;
        for (PageWithRankAndDangling r : result) {
            totalRank += r.rank;
            assertTrue(r.pageId >= 1 && r.pageId <= 5);
            assertTrue(r.pageId != 3 || r.dangling);
        }
        assertEquals(1.0, totalRank, 0.001);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}

Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Aggregations

FilterFunction (org.apache.flink.api.common.functions.FilterFunction)35 Test (org.junit.Test)29 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)15 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)15 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)14 HashMap (java.util.HashMap)5 MapFunction (org.apache.flink.api.common.functions.MapFunction)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4 Plan (org.apache.flink.api.common.Plan)4 Event (org.apache.flink.cep.Event)4 SubEvent (org.apache.flink.cep.SubEvent)4 Edge (org.apache.flink.graph.Edge)4 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)4 Method (java.lang.reflect.Method)3 HashSet (java.util.HashSet)3 ResourceSpec (org.apache.flink.api.common.operators.ResourceSpec)3 Configuration (org.apache.flink.configuration.Configuration)3