Search in sources :

Example 11 with Tuple4

use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.

the class AbstractEventTimeWindowCheckpointingITCase method doTestTumblingTimeWindowWithKVState.

public void doTestTumblingTimeWindowWithKVState(int maxParallelism) {
    final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
    final int WINDOW_SIZE = windowSize();
    final int NUM_KEYS = numKeys();
    FailingSource.reset();
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setMaxParallelism(maxParallelism);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(100);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
        env.getConfig().disableSysoutLogging();
        env.setStateBackend(this.stateBackend);
        env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS)).apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

            private boolean open = false;

            private ValueState<Integer> count;

            @Override
            public void open(Configuration parameters) {
                assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                open = true;
                count = getRuntimeContext().getState(new ValueStateDescriptor<>("count", Integer.class, 0));
            }

            @Override
            public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) throws Exception {
                // different count results for each key
                if (count.value() == 0) {
                    count.update(tuple.<Long>getField(0).intValue());
                }
                // validate that the function has been opened properly
                assertTrue(open);
                count.update(count.value() + 1);
                out.collect(new Tuple4<>(tuple.<Long>getField(0), window.getStart(), window.getEnd(), new IntType(count.value())));
            }
        }).addSink(new CountValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE)).setParallelism(1);
        tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) SuccessException(org.apache.flink.test.util.SuccessException) IOException(java.io.IOException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 12 with Tuple4

use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.

the class AbstractEventTimeWindowCheckpointingITCase method testPreAggregatedSlidingTimeWindow.

@Test
public void testPreAggregatedSlidingTimeWindow() {
    final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
    final int WINDOW_SIZE = windowSize();
    final int WINDOW_SLIDE = windowSlide();
    final int NUM_KEYS = numKeys();
    FailingSource.reset();
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(100);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
        env.getConfig().disableSysoutLogging();
        env.setStateBackend(this.stateBackend);
        env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS), Time.of(WINDOW_SLIDE, MILLISECONDS)).reduce(new ReduceFunction<Tuple2<Long, IntType>>() {

            @Override
            public Tuple2<Long, IntType> reduce(Tuple2<Long, IntType> a, Tuple2<Long, IntType> b) {
                // validate that the function has been opened properly
                return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
            }
        }, new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

            private boolean open = false;

            @Override
            public void open(Configuration parameters) {
                assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                open = true;
            }

            @Override
            public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> input, Collector<Tuple4<Long, Long, Long, IntType>> out) {
                // validate that the function has been opened properly
                assertTrue(open);
                for (Tuple2<Long, IntType> in : input) {
                    out.collect(new Tuple4<>(in.f0, window.getStart(), window.getEnd(), in.f1));
                }
            }
        }).addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SLIDE)).setParallelism(1);
        tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) SuccessException(org.apache.flink.test.util.SuccessException) IOException(java.io.IOException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple(org.apache.flink.api.java.tuple.Tuple) Test(org.junit.Test)

Example 13 with Tuple4

use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.

the class EventTimeAllWindowCheckpointingITCase method testSlidingTimeWindow.

@Test
public void testSlidingTimeWindow() {
    final int NUM_ELEMENTS_PER_KEY = 3000;
    final int WINDOW_SIZE = 1000;
    final int WINDOW_SLIDE = 100;
    final int NUM_KEYS = 1;
    FailingSource.reset();
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(100);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
        env.getConfig().disableSysoutLogging();
        env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().timeWindowAll(Time.of(WINDOW_SIZE, MILLISECONDS), Time.of(WINDOW_SLIDE, MILLISECONDS)).apply(new RichAllWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, TimeWindow>() {

            private boolean open = false;

            @Override
            public void open(Configuration parameters) {
                assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
                open = true;
            }

            @Override
            public void apply(TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) {
                // validate that the function has been opened properly
                assertTrue(open);
                int sum = 0;
                long key = -1;
                for (Tuple2<Long, IntType> value : values) {
                    sum += value.f1.value;
                    key = value.f0;
                }
                out.collect(new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum)));
            }
        }).addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SLIDE)).setParallelism(1);
        tryExecute(env, "Sliding Window Test");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) SuccessException(org.apache.flink.test.util.SuccessException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 14 with Tuple4

use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.

the class GroupOrderTest method testReduceWithGroupOrder.

@Test
public void testReduceWithGroupOrder() {
    // construct the plan
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple4<Long, Long, Long, Long>> set1 = env.readCsvFile("/tmp/fake.csv").types(Long.class, Long.class, Long.class, Long.class);
    set1.groupBy(1).sortGroup(3, Order.DESCENDING).reduceGroup(new IdentityGroupReducer<Tuple4<Long, Long, Long, Long>>()).name("Reduce").output(new DiscardingOutputFormat<Tuple4<Long, Long, Long, Long>>()).name("Sink");
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan;
    try {
        oPlan = compileNoStats(plan);
    } catch (CompilerException ce) {
        ce.printStackTrace();
        fail("The pact compiler is unable to compile this plan correctly.");
        // silence the compiler
        return;
    }
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
    SinkPlanNode sinkNode = resolver.getNode("Sink");
    SingleInputPlanNode reducer = resolver.getNode("Reduce");
    // verify the strategies
    Assert.assertEquals(ShipStrategyType.FORWARD, sinkNode.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
    Channel c = reducer.getInput();
    Assert.assertEquals(LocalStrategy.SORT, c.getLocalStrategy());
    FieldList ship = new FieldList(1);
    FieldList local = new FieldList(1, 3);
    Assert.assertEquals(ship, c.getShipStrategyKeys());
    Assert.assertEquals(local, c.getLocalStrategyKeys());
    Assert.assertTrue(c.getLocalStrategySortOrder()[0] == reducer.getSortOrders(0)[0]);
    // check that we indeed sort descending
    Assert.assertEquals(false, c.getLocalStrategySortOrder()[1]);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) Tuple4(org.apache.flink.api.java.tuple.Tuple4) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) IdentityGroupReducer(org.apache.flink.optimizer.testfunctions.IdentityGroupReducer) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 15 with Tuple4

use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.

the class TPCHQuery10 method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // get customer data set: (custkey, name, address, nationkey, acctbal) 
    DataSet<Tuple5<Integer, String, String, Integer, Double>> customers = getCustomerDataSet(env);
    // get orders data set: (orderkey, custkey, orderdate)
    DataSet<Tuple3<Integer, Integer, String>> orders = getOrdersDataSet(env);
    // get lineitem data set: (orderkey, extendedprice, discount, returnflag)
    DataSet<Tuple4<Integer, Double, Double, String>> lineitems = getLineitemDataSet(env);
    // get nation data set: (nationkey, name)
    DataSet<Tuple2<Integer, String>> nations = getNationsDataSet(env);
    // orders filtered by year: (orderkey, custkey)
    DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear = // filter by year
    orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990).project(0, 1);
    // lineitems filtered by flag: (orderkey, extendedprice, discount)
    DataSet<Tuple3<Integer, Double, Double>> lineitemsFilteredByFlag = // filter by flag
    lineitems.filter(lineitem -> lineitem.f3.equals("R")).project(0, 1, 2);
    // join orders with lineitems: (custkey, extendedprice, discount)
    DataSet<Tuple3<Integer, Double, Double>> lineitemsOfCustomerKey = ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag).where(0).equalTo(0).projectFirst(1).projectSecond(1, 2);
    // aggregate for revenue: (custkey, revenue)
    DataSet<Tuple2<Integer, Double>> revenueOfCustomerKey = lineitemsOfCustomerKey.map(i -> new Tuple2<>(i.f0, i.f1 * (1 - i.f2))).groupBy(0).sum(1);
    // join customer with nation (custkey, name, address, nationname, acctbal)
    DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers.joinWithTiny(nations).where(3).equalTo(0).projectFirst(0, 1, 2).projectSecond(1).projectFirst(4);
    // join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
    DataSet<Tuple6<Integer, String, String, String, Double, Double>> customerWithRevenue = customerWithNation.join(revenueOfCustomerKey).where(0).equalTo(0).projectFirst(0, 1, 2, 3, 4).projectSecond(1);
    // emit result
    customerWithRevenue.writeAsCsv(outputPath);
    // execute program
    env.execute("TPCH Query 10 Example");
}
Also used : DataSet(org.apache.flink.api.java.DataSet) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple5(org.apache.flink.api.java.tuple.Tuple5) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple6(org.apache.flink.api.java.tuple.Tuple6) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple5(org.apache.flink.api.java.tuple.Tuple5) Tuple6(org.apache.flink.api.java.tuple.Tuple6) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3)

Aggregations

Tuple4 (org.apache.flink.api.java.tuple.Tuple4)43 Test (org.junit.Test)34 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)27 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)15 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)12 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)11 Configuration (org.apache.flink.configuration.Configuration)10 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)10 SuccessException (org.apache.flink.test.util.SuccessException)10 IOException (java.io.IOException)6 Tuple (org.apache.flink.api.java.tuple.Tuple)5 KeySelector (org.apache.flink.api.java.functions.KeySelector)4 ArrayList (java.util.ArrayList)3 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)3 Plan (org.apache.flink.api.common.Plan)2 Tuple5 (org.apache.flink.api.java.tuple.Tuple5)2 Tuple6 (org.apache.flink.api.java.tuple.Tuple6)2 ParameterTool (org.apache.flink.api.java.utils.ParameterTool)2 Path (org.apache.flink.core.fs.Path)2 KvStateSnapshot (org.apache.flink.migration.runtime.state.KvStateSnapshot)2