Search in sources :

Example 1 with Tuple4

use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.

the class TPCHQuery10 method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // get customer data set: (custkey, name, address, nationkey, acctbal) 
    DataSet<Tuple5<Integer, String, String, Integer, Double>> customers = getCustomerDataSet(env);
    // get orders data set: (orderkey, custkey, orderdate)
    DataSet<Tuple3<Integer, Integer, String>> orders = getOrdersDataSet(env);
    // get lineitem data set: (orderkey, extendedprice, discount, returnflag)
    DataSet<Tuple4<Integer, Double, Double, String>> lineitems = getLineitemDataSet(env);
    // get nation data set: (nationkey, name)
    DataSet<Tuple2<Integer, String>> nations = getNationsDataSet(env);
    // orders filtered by year: (orderkey, custkey)
    DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear = // filter by year
    orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990).project(0, 1);
    // lineitems filtered by flag: (orderkey, extendedprice, discount)
    DataSet<Tuple3<Integer, Double, Double>> lineitemsFilteredByFlag = // filter by flag
    lineitems.filter(lineitem -> lineitem.f3.equals("R")).project(0, 1, 2);
    // join orders with lineitems: (custkey, extendedprice, discount)
    DataSet<Tuple3<Integer, Double, Double>> lineitemsOfCustomerKey = ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag).where(0).equalTo(0).projectFirst(1).projectSecond(1, 2);
    // aggregate for revenue: (custkey, revenue)
    DataSet<Tuple2<Integer, Double>> revenueOfCustomerKey = lineitemsOfCustomerKey.map(i -> new Tuple2<>(i.f0, i.f1 * (1 - i.f2))).groupBy(0).sum(1);
    // join customer with nation (custkey, name, address, nationname, acctbal)
    DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers.joinWithTiny(nations).where(3).equalTo(0).projectFirst(0, 1, 2).projectSecond(1).projectFirst(4);
    // join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
    DataSet<Tuple6<Integer, String, String, String, Double, Double>> customerWithRevenue = customerWithNation.join(revenueOfCustomerKey).where(0).equalTo(0).projectFirst(0, 1, 2, 3, 4).projectSecond(1);
    // emit result
    customerWithRevenue.writeAsCsv(outputPath);
    // execute program
    env.execute("TPCH Query 10 Example");
}
Also used : DataSet(org.apache.flink.api.java.DataSet) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple5(org.apache.flink.api.java.tuple.Tuple5) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple6(org.apache.flink.api.java.tuple.Tuple6) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple5(org.apache.flink.api.java.tuple.Tuple5) Tuple6(org.apache.flink.api.java.tuple.Tuple6) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3)

Example 2 with Tuple4

use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.

the class MigrationV0ToV1Test method testSavepointMigrationV0ToV1.

/**
	 * Simple test of savepoint methods.
	 */
@Test
public void testSavepointMigrationV0ToV1() throws Exception {
    String target = tmp.getRoot().getAbsolutePath();
    assertEquals(0, tmp.getRoot().listFiles().length);
    long checkpointId = ThreadLocalRandom.current().nextLong(Integer.MAX_VALUE);
    int numTaskStates = 4;
    int numSubtaskStates = 16;
    Collection<org.apache.flink.migration.runtime.checkpoint.TaskState> expected = createTaskStatesOld(numTaskStates, numSubtaskStates);
    SavepointV0 savepoint = new SavepointV0(checkpointId, expected);
    assertEquals(SavepointV0.VERSION, savepoint.getVersion());
    assertEquals(checkpointId, savepoint.getCheckpointId());
    assertEquals(expected, savepoint.getOldTaskStates());
    assertFalse(savepoint.getOldTaskStates().isEmpty());
    Exception latestException = null;
    Path path = null;
    FSDataOutputStream fdos = null;
    FileSystem fs = null;
    try {
        // Try to create a FS output stream
        for (int attempt = 0; attempt < 10; attempt++) {
            path = new Path(target, FileUtils.getRandomFilename("savepoint-"));
            if (fs == null) {
                fs = FileSystem.get(path.toUri());
            }
            try {
                fdos = fs.create(path, false);
                break;
            } catch (Exception e) {
                latestException = e;
            }
        }
        if (fdos == null) {
            throw new IOException("Failed to create file output stream at " + path, latestException);
        }
        try (DataOutputStream dos = new DataOutputStream(fdos)) {
            dos.writeInt(SavepointStore.MAGIC_NUMBER);
            dos.writeInt(savepoint.getVersion());
            SavepointV0Serializer.INSTANCE.serializeOld(savepoint, dos);
        }
        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        Savepoint sp = SavepointStore.loadSavepoint(path.toString(), cl);
        int t = 0;
        for (TaskState taskState : sp.getTaskStates()) {
            for (int p = 0; p < taskState.getParallelism(); ++p) {
                SubtaskState subtaskState = taskState.getState(p);
                ChainedStateHandle<StreamStateHandle> legacyOperatorState = subtaskState.getLegacyOperatorState();
                for (int c = 0; c < legacyOperatorState.getLength(); ++c) {
                    StreamStateHandle stateHandle = legacyOperatorState.get(c);
                    try (InputStream is = stateHandle.openInputStream()) {
                        Tuple4<Integer, Integer, Integer, Integer> expTestState = new Tuple4<>(0, t, p, c);
                        Tuple4<Integer, Integer, Integer, Integer> actTestState;
                        //check function state
                        if (p % 4 != 0) {
                            assertEquals(1, is.read());
                            actTestState = InstantiationUtil.deserializeObject(is, cl);
                            assertEquals(expTestState, actTestState);
                        } else {
                            assertEquals(0, is.read());
                        }
                        //check operator state
                        expTestState.f0 = 1;
                        actTestState = InstantiationUtil.deserializeObject(is, cl);
                        assertEquals(expTestState, actTestState);
                    }
                }
                //check keyed state
                KeyGroupsStateHandle keyGroupsStateHandle = subtaskState.getManagedKeyedState();
                if (t % 3 != 0) {
                    assertEquals(1, keyGroupsStateHandle.getNumberOfKeyGroups());
                    assertEquals(p, keyGroupsStateHandle.getGroupRangeOffsets().getKeyGroupRange().getStartKeyGroup());
                    ByteStreamStateHandle stateHandle = (ByteStreamStateHandle) keyGroupsStateHandle.getDelegateStateHandle();
                    HashMap<String, KvStateSnapshot<?, ?, ?, ?>> testKeyedState = MigrationInstantiationUtil.deserializeObject(stateHandle.getData(), cl);
                    assertEquals(2, testKeyedState.size());
                    for (KvStateSnapshot<?, ?, ?, ?> snapshot : testKeyedState.values()) {
                        MemValueState.Snapshot<?, ?, ?> castedSnapshot = (MemValueState.Snapshot<?, ?, ?>) snapshot;
                        byte[] data = castedSnapshot.getData();
                        assertEquals(t, data[0]);
                        assertEquals(p, data[1]);
                    }
                } else {
                    assertEquals(null, keyGroupsStateHandle);
                }
            }
            ++t;
        }
        savepoint.dispose();
    } finally {
        // Dispose
        SavepointStore.removeSavepointFile(path.toString());
    }
}
Also used : FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) SavepointV0(org.apache.flink.migration.runtime.checkpoint.savepoint.SavepointV0) FileSystem(org.apache.flink.core.fs.FileSystem) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) Path(org.apache.flink.core.fs.Path) InputStream(java.io.InputStream) MemValueState(org.apache.flink.migration.runtime.state.memory.MemValueState) IOException(java.io.IOException) KvStateSnapshot(org.apache.flink.migration.runtime.state.KvStateSnapshot) IOException(java.io.IOException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) KvStateSnapshot(org.apache.flink.migration.runtime.state.KvStateSnapshot) SubtaskState(org.apache.flink.runtime.checkpoint.SubtaskState) StreamTaskState(org.apache.flink.migration.streaming.runtime.tasks.StreamTaskState) TaskState(org.apache.flink.runtime.checkpoint.TaskState) Test(org.junit.Test)

Example 3 with Tuple4

use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.

the class TableEnvironmentITCase method testAsFromTupleToPojo.

@Test
public void testAsFromTupleToPojo() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());
    List<Tuple4<String, Integer, Double, String>> data = new ArrayList<>();
    data.add(new Tuple4<>("Rofl", 1, 1.0, "Hi"));
    data.add(new Tuple4<>("lol", 2, 1.0, "Hi"));
    data.add(new Tuple4<>("Test me", 4, 3.33, "Hello world"));
    Table table = tableEnv.fromDataSet(env.fromCollection(data), "q, w, e, r").select("q as a, w as b, e as c, r as d");
    DataSet<SmallPojo2> ds = tableEnv.toDataSet(table, SmallPojo2.class);
    List<SmallPojo2> results = ds.collect();
    String expected = "Rofl,1,1.0,Hi\n" + "lol,2,1.0,Hi\n" + "Test me,4,3.33,Hello world\n";
    compareResultAsText(results, expected);
}
Also used : Tuple4(org.apache.flink.api.java.tuple.Tuple4) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Table(org.apache.flink.table.api.Table) ArrayList(java.util.ArrayList) BatchTableEnvironment(org.apache.flink.table.api.java.BatchTableEnvironment) Test(org.junit.Test)

Example 4 with Tuple4

use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.

the class AbstractEventTimeWindowCheckpointingITCase method testPreAggregatedTumblingTimeWindow.

@Test
public void testPreAggregatedTumblingTimeWindow() {
    final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
    final int WINDOW_SIZE = windowSize();
    final int NUM_KEYS = numKeys();
    FailingSource.reset();
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(100);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
        env.getConfig().disableSysoutLogging();
        env.setStateBackend(this.stateBackend);
        env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS)).reduce(new ReduceFunction<Tuple2<Long, IntType>>() {

            @Override
            public Tuple2<Long, IntType> reduce(Tuple2<Long, IntType> a, Tuple2<Long, IntType> b) {
                return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
            }
        }, new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

            private boolean open = false;

            @Override
            public void open(Configuration parameters) {
                assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                open = true;
            }

            @Override
            public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> input, Collector<Tuple4<Long, Long, Long, IntType>> out) {
                // validate that the function has been opened properly
                assertTrue(open);
                for (Tuple2<Long, IntType> in : input) {
                    out.collect(new Tuple4<>(in.f0, window.getStart(), window.getEnd(), in.f1));
                }
            }
        }).addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE)).setParallelism(1);
        tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) SuccessException(org.apache.flink.test.util.SuccessException) IOException(java.io.IOException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple(org.apache.flink.api.java.tuple.Tuple) Test(org.junit.Test)

Example 5 with Tuple4

use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.

the class AbstractEventTimeWindowCheckpointingITCase method doTestTumblingTimeWindowWithKVState.

public void doTestTumblingTimeWindowWithKVState(int maxParallelism) {
    final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
    final int WINDOW_SIZE = windowSize();
    final int NUM_KEYS = numKeys();
    FailingSource.reset();
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setMaxParallelism(maxParallelism);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(100);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
        env.getConfig().disableSysoutLogging();
        env.setStateBackend(this.stateBackend);
        env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS)).apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

            private boolean open = false;

            private ValueState<Integer> count;

            @Override
            public void open(Configuration parameters) {
                assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                open = true;
                count = getRuntimeContext().getState(new ValueStateDescriptor<>("count", Integer.class, 0));
            }

            @Override
            public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) throws Exception {
                // different count results for each key
                if (count.value() == 0) {
                    count.update(tuple.<Long>getField(0).intValue());
                }
                // validate that the function has been opened properly
                assertTrue(open);
                count.update(count.value() + 1);
                out.collect(new Tuple4<>(tuple.<Long>getField(0), window.getStart(), window.getEnd(), new IntType(count.value())));
            }
        }).addSink(new CountValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE)).setParallelism(1);
        tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) SuccessException(org.apache.flink.test.util.SuccessException) IOException(java.io.IOException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple(org.apache.flink.api.java.tuple.Tuple)

Aggregations

Tuple4 (org.apache.flink.api.java.tuple.Tuple4)57 Test (org.junit.Test)44 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)31 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)21 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)18 Configuration (org.apache.flink.configuration.Configuration)15 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)15 Tuple (org.apache.flink.api.java.tuple.Tuple)13 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)12 IOException (java.io.IOException)11 MiniClusterResourceConfiguration (org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration)9 FailingSource (org.apache.flink.test.checkpointing.utils.FailingSource)9 IntType (org.apache.flink.test.checkpointing.utils.IntType)9 ValidatingSink (org.apache.flink.test.checkpointing.utils.ValidatingSink)9 SuccessException (org.apache.flink.test.util.SuccessException)6 Plan (org.apache.flink.api.common.Plan)5 KeySelector (org.apache.flink.api.java.functions.KeySelector)5 ArrayList (java.util.ArrayList)3 DataSet (org.apache.flink.api.java.DataSet)3 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)3