Search in sources :

Example 16 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class DataStreamTest method operatorTest.

@Test
public void operatorTest() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Long> src = env.generateSequence(0, 0);
    MapFunction<Long, Integer> mapFunction = new MapFunction<Long, Integer>() {

        @Override
        public Integer map(Long value) throws Exception {
            return null;
        }
    };
    DataStream<Integer> map = src.map(mapFunction);
    map.addSink(new DiscardingSink<Integer>());
    assertEquals(mapFunction, getFunctionForDataStream(map));
    FlatMapFunction<Long, Integer> flatMapFunction = new FlatMapFunction<Long, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Long value, Collector<Integer> out) throws Exception {
        }
    };
    DataStream<Integer> flatMap = src.flatMap(flatMapFunction);
    flatMap.addSink(new DiscardingSink<Integer>());
    assertEquals(flatMapFunction, getFunctionForDataStream(flatMap));
    FilterFunction<Integer> filterFunction = new FilterFunction<Integer>() {

        @Override
        public boolean filter(Integer value) throws Exception {
            return false;
        }
    };
    DataStream<Integer> unionFilter = map.union(flatMap).filter(filterFunction);
    unionFilter.addSink(new DiscardingSink<Integer>());
    assertEquals(filterFunction, getFunctionForDataStream(unionFilter));
    try {
        env.getStreamGraph().getStreamEdges(map.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
    try {
        env.getStreamGraph().getStreamEdges(flatMap.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
    OutputSelector<Integer> outputSelector = new OutputSelector<Integer>() {

        @Override
        public Iterable<String> select(Integer value) {
            return null;
        }
    };
    SplitStream<Integer> split = unionFilter.split(outputSelector);
    split.select("dummy").addSink(new DiscardingSink<Integer>());
    List<OutputSelector<?>> outputSelectors = env.getStreamGraph().getStreamNode(unionFilter.getId()).getOutputSelectors();
    assertEquals(1, outputSelectors.size());
    assertEquals(outputSelector, outputSelectors.get(0));
    DataStream<Integer> select = split.select("a");
    DataStreamSink<Integer> sink = select.print();
    StreamEdge splitEdge = env.getStreamGraph().getStreamEdges(unionFilter.getId(), sink.getTransformation().getId()).get(0);
    assertEquals("a", splitEdge.getSelectedNames().get(0));
    ConnectedStreams<Integer, Integer> connect = map.connect(flatMap);
    CoMapFunction<Integer, Integer, String> coMapper = new CoMapFunction<Integer, Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map1(Integer value) {
            return null;
        }

        @Override
        public String map2(Integer value) {
            return null;
        }
    };
    DataStream<String> coMap = connect.map(coMapper);
    coMap.addSink(new DiscardingSink<String>());
    assertEquals(coMapper, getFunctionForDataStream(coMap));
    try {
        env.getStreamGraph().getStreamEdges(map.getId(), coMap.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
    try {
        env.getStreamGraph().getStreamEdges(flatMap.getId(), coMap.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) OutputSelector(org.apache.flink.streaming.api.collector.selector.OutputSelector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 17 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class FilterLambda1 method main.

public static void main(String[] args) throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<String> input = env.fromElements("Please filter", "the words", "but not this");
    FilterFunction<String> filter = (v) -> WordFilter.filter(v);
    DataSet<String> output = input.filter(filter);
    output.print();
    env.execute();
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) DataSet(org.apache.flink.api.java.DataSet) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment)

Example 18 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class TPCHQuery3 method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    if (!params.has("lineitem") && !params.has("customer") && !params.has("orders")) {
        System.err.println("  This program expects data from the TPC-H benchmark as input data.");
        System.err.println("  Due to legal restrictions, we can not ship generated data.");
        System.out.println("  You can find the TPC-H data generator at http://www.tpc.org/tpch/.");
        System.out.println("  Usage: TPCHQuery3 --lineitem <path> --customer <path> --orders <path> [--output <path>]");
        return;
    }
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);
    // get input data
    DataSet<Lineitem> lineitems = getLineitemDataSet(env, params.get("lineitem"));
    DataSet<Order> orders = getOrdersDataSet(env, params.get("customer"));
    DataSet<Customer> customers = getCustomerDataSet(env, params.get("orders"));
    // Filter market segment "AUTOMOBILE"
    customers = customers.filter(new FilterFunction<Customer>() {

        @Override
        public boolean filter(Customer c) {
            return c.getMktsegment().equals("AUTOMOBILE");
        }
    });
    // Filter all Orders with o_orderdate < 12.03.1995
    orders = orders.filter(new FilterFunction<Order>() {

        private final DateFormat format = new SimpleDateFormat("yyyy-MM-dd");

        private final Date date = format.parse("1995-03-12");

        @Override
        public boolean filter(Order o) throws ParseException {
            return format.parse(o.getOrderdate()).before(date);
        }
    });
    // Filter all Lineitems with l_shipdate > 12.03.1995
    lineitems = lineitems.filter(new FilterFunction<Lineitem>() {

        private final DateFormat format = new SimpleDateFormat("yyyy-MM-dd");

        private final Date date = format.parse("1995-03-12");

        @Override
        public boolean filter(Lineitem l) throws ParseException {
            return format.parse(l.getShipdate()).after(date);
        }
    });
    // Join customers with orders and package them into a ShippingPriorityItem
    DataSet<ShippingPriorityItem> customerWithOrders = customers.join(orders).where(0).equalTo(1).with(new JoinFunction<Customer, Order, ShippingPriorityItem>() {

        @Override
        public ShippingPriorityItem join(Customer c, Order o) {
            return new ShippingPriorityItem(o.getOrderKey(), 0.0, o.getOrderdate(), o.getShippriority());
        }
    });
    // Join the last join result with Lineitems
    DataSet<ShippingPriorityItem> result = customerWithOrders.join(lineitems).where(0).equalTo(0).with(new JoinFunction<ShippingPriorityItem, Lineitem, ShippingPriorityItem>() {

        @Override
        public ShippingPriorityItem join(ShippingPriorityItem i, Lineitem l) {
            i.setRevenue(l.getExtendedprice() * (1 - l.getDiscount()));
            return i;
        }
    }).groupBy(0, 2, 3).aggregate(Aggregations.SUM, 1);
    // emit result
    if (params.has("output")) {
        result.writeAsCsv(params.get("output"), "\n", "|");
        // execute program
        env.execute("TPCH Query 3 Example");
    } else {
        System.out.println("Printing result to stdout. Use --output to specify output path.");
        result.print();
    }
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Date(java.util.Date) SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) SimpleDateFormat(java.text.SimpleDateFormat)

Example 19 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class TPCHQuery10 method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    if (!params.has("customer") && !params.has("orders") && !params.has("lineitem") && !params.has("nation")) {
        System.err.println("  This program expects data from the TPC-H benchmark as input data.");
        System.err.println("  Due to legal restrictions, we can not ship generated data.");
        System.err.println("  You can find the TPC-H data generator at http://www.tpc.org/tpch/.");
        System.err.println("  Usage: TPCHQuery10 --customer <path> --orders <path> --lineitem <path> --nation <path> [--output <path>]");
        return;
    }
    // get customer data set: (custkey, name, address, nationkey, acctbal) 
    DataSet<Tuple5<Integer, String, String, Integer, Double>> customers = getCustomerDataSet(env, params.get("customer"));
    // get orders data set: (orderkey, custkey, orderdate)
    DataSet<Tuple3<Integer, Integer, String>> orders = getOrdersDataSet(env, params.get("orders"));
    // get lineitem data set: (orderkey, extendedprice, discount, returnflag)
    DataSet<Tuple4<Integer, Double, Double, String>> lineitems = getLineitemDataSet(env, params.get("lineitem"));
    // get nation data set: (nationkey, name)
    DataSet<Tuple2<Integer, String>> nations = getNationsDataSet(env, params.get("nation"));
    // orders filtered by year: (orderkey, custkey)
    DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear = // filter by year
    orders.filter(new FilterFunction<Tuple3<Integer, Integer, String>>() {

        @Override
        public boolean filter(Tuple3<Integer, Integer, String> o) {
            return Integer.parseInt(o.f2.substring(0, 4)) > 1990;
        }
    }).project(0, 1);
    // lineitems filtered by flag: (orderkey, revenue)
    DataSet<Tuple2<Integer, Double>> lineitemsFilteredByFlag = // filter by flag
    lineitems.filter(new FilterFunction<Tuple4<Integer, Double, Double, String>>() {

        @Override
        public boolean filter(Tuple4<Integer, Double, Double, String> l) {
            return l.f3.equals("R");
        }
    }).map(new MapFunction<Tuple4<Integer, Double, Double, String>, Tuple2<Integer, Double>>() {

        @Override
        public Tuple2<Integer, Double> map(Tuple4<Integer, Double, Double, String> l) {
            // revenue per item = l_extendedprice * (1 - l_discount)
            return new Tuple2<Integer, Double>(l.f0, l.f1 * (1 - l.f2));
        }
    });
    // join orders with lineitems: (custkey, revenue)
    DataSet<Tuple2<Integer, Double>> revenueByCustomer = ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag).where(0).equalTo(0).projectFirst(1).projectSecond(1);
    revenueByCustomer = revenueByCustomer.groupBy(0).aggregate(Aggregations.SUM, 1);
    // join customer with nation (custkey, name, address, nationname, acctbal)
    DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers.joinWithTiny(nations).where(3).equalTo(0).projectFirst(0, 1, 2).projectSecond(1).projectFirst(4);
    // join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
    DataSet<Tuple6<Integer, String, String, String, Double, Double>> result = customerWithNation.join(revenueByCustomer).where(0).equalTo(0).projectFirst(0, 1, 2, 3, 4).projectSecond(1);
    // emit result
    if (params.has("output")) {
        result.writeAsCsv(params.get("output"), "\n", "|");
        // execute program
        env.execute("TPCH Query 10 Example");
    } else {
        System.out.println("Printing result to stdout. Use --output to specify output path.");
        result.print();
    }
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple5(org.apache.flink.api.java.tuple.Tuple5) Tuple6(org.apache.flink.api.java.tuple.Tuple6) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3)

Example 20 with FilterFunction

use of org.apache.flink.api.common.functions.FilterFunction in project flink by apache.

the class MusicProfiles method main.

public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    /**
		 * Read the user-song-play triplets.
		 */
    DataSet<Tuple3<String, String, Integer>> triplets = getUserSongTripletsData(env);
    /**
		 * Read the mismatches dataset and extract the songIDs
		 */
    DataSet<Tuple1<String>> mismatches = getMismatchesData(env).map(new ExtractMismatchSongIds());
    /**
		 * Filter out the mismatches from the triplets dataset
		 */
    DataSet<Tuple3<String, String, Integer>> validTriplets = triplets.coGroup(mismatches).where(1).equalTo(0).with(new FilterOutMismatches());
    /**
		 * Create a user -> song weighted bipartite graph where the edge weights
		 * correspond to play counts
		 */
    Graph<String, NullValue, Integer> userSongGraph = Graph.fromTupleDataSet(validTriplets, env);
    /**
		 * Get the top track (most listened) for each user
		 */
    DataSet<Tuple2<String, String>> usersWithTopTrack = userSongGraph.groupReduceOnEdges(new GetTopSongPerUser(), EdgeDirection.OUT).filter(new FilterSongNodes());
    if (fileOutput) {
        usersWithTopTrack.writeAsCsv(topTracksOutputPath, "\n", "\t");
    } else {
        usersWithTopTrack.print();
    }
    /**
		 * Create a user-user similarity graph, based on common songs, i.e. two
		 * users that listen to the same song are connected. For each song, we
		 * create an edge between each pair of its in-neighbors.
		 */
    DataSet<Edge<String, NullValue>> similarUsers = userSongGraph.getEdges().filter(new FilterFunction<Edge<String, Integer>>() {

        public boolean filter(Edge<String, Integer> edge) {
            return (edge.getValue() > playcountThreshold);
        }
    }).groupBy(1).reduceGroup(new CreateSimilarUserEdges()).distinct();
    Graph<String, Long, NullValue> similarUsersGraph = Graph.fromDataSet(similarUsers, new MapFunction<String, Long>() {

        public Long map(String value) {
            return 1l;
        }
    }, env).getUndirected();
    /**
		 * Detect user communities using the label propagation library method
		 */
    // Initialize each vertex with a unique numeric label and run the label propagation algorithm
    DataSet<Tuple2<String, Long>> idsWithInitialLabels = DataSetUtils.zipWithUniqueId(similarUsersGraph.getVertexIds()).map(new MapFunction<Tuple2<Long, String>, Tuple2<String, Long>>() {

        @Override
        public Tuple2<String, Long> map(Tuple2<Long, String> tuple2) throws Exception {
            return new Tuple2<String, Long>(tuple2.f1, tuple2.f0);
        }
    });
    DataSet<Vertex<String, Long>> verticesWithCommunity = similarUsersGraph.joinWithVertices(idsWithInitialLabels, new VertexJoinFunction<Long, Long>() {

        public Long vertexJoin(Long vertexValue, Long inputValue) {
            return inputValue;
        }
    }).run(new LabelPropagation<String, Long, NullValue>(maxIterations));
    if (fileOutput) {
        verticesWithCommunity.writeAsCsv(communitiesOutputPath, "\n", "\t");
        // since file sinks are lazy, we trigger the execution explicitly
        env.execute();
    } else {
        verticesWithCommunity.print();
    }
}
Also used : VertexJoinFunction(org.apache.flink.graph.VertexJoinFunction) Vertex(org.apache.flink.graph.Vertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) NullValue(org.apache.flink.types.NullValue) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Edge(org.apache.flink.graph.Edge)

Aggregations

FilterFunction (org.apache.flink.api.common.functions.FilterFunction)35 Test (org.junit.Test)29 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)15 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)15 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)14 HashMap (java.util.HashMap)5 MapFunction (org.apache.flink.api.common.functions.MapFunction)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4 Plan (org.apache.flink.api.common.Plan)4 Event (org.apache.flink.cep.Event)4 SubEvent (org.apache.flink.cep.SubEvent)4 Edge (org.apache.flink.graph.Edge)4 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)4 Method (java.lang.reflect.Method)3 HashSet (java.util.HashSet)3 ResourceSpec (org.apache.flink.api.common.operators.ResourceSpec)3 Configuration (org.apache.flink.configuration.Configuration)3