Search in sources :

Example 46 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class StreamingScalabilityAndLatency method runPartitioningProgram.

private static void runPartitioningProgram(int jobManagerPort, int parallelism) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", jobManagerPort);
    env.setParallelism(parallelism);
    env.getConfig().enableObjectReuse();
    env.setBufferTimeout(5L);
    env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE);
    env.addSource(new TimeStampingSource()).map(new IdMapper<Tuple2<Long, Long>>()).keyBy(0).addSink(new TimestampingSink());
    env.execute("Partitioning Program");
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 47 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class AbstractQueryableStateITCase method testFoldingState.

/**
	 * Tests simple folding state queryable state instance. Each source emits
	 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
	 * queried. The folding state sums these up and maps them to Strings. The
	 * test succeeds after each subtask index is queried with result n*(n+1)/2
	 * (as a String).
	 */
@Test
public void testFoldingState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Folding state
        FoldingStateDescriptor<Tuple2<Integer, Long>, String> foldingState = new FoldingStateDescriptor<>("any", "0", new SumFold(), StringSerializer.INSTANCE);
        QueryableStateStream<Integer, String> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("pumba", foldingState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Now query
        String expected = Integer.toString(numElements * (numElements + 1) / 2);
        for (int key = 0; key < NUM_SLOTS; key++) {
            final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
            boolean success = false;
            while (deadline.hasTimeLeft() && !success) {
                Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, false);
                byte[] serializedValue = Await.result(future, deadline.timeLeft());
                String value = KvStateRequestSerializer.deserializeValue(serializedValue, queryableState.getValueSerializer());
                if (expected.equals(value)) {
                    success = true;
                } else {
                    // Retry
                    Thread.sleep(50);
                }
            }
            assertTrue("Did not succeed query", success);
        }
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) FoldingStateDescriptor(org.apache.flink.api.common.state.FoldingStateDescriptor) KeySelector(org.apache.flink.api.java.functions.KeySelector) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 48 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class AbstractQueryableStateITCase method testReducingState.

/**
	 * Tests simple reducing state queryable state instance. Each source emits
	 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
	 * queried. The reducing state instance sums these up. The test succeeds
	 * after each subtask index is queried with result n*(n+1)/2.
	 */
@Test
public void testReducingState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Reducing state
        ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any", new SumReduce(), source.getType());
        QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("jungle", reducingState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Wait until job is running
        // Now query
        long expected = numElements * (numElements + 1) / 2;
        executeValueQuery(deadline, client, jobId, queryableState, expected);
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 49 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class SqlITCase method testFilter.

@Test
public void testFilter() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
    StreamITCase.clear();
    DataStream<Tuple5<Integer, Long, Integer, String, Long>> ds = StreamTestData.get5TupleDataStream(env);
    tableEnv.registerDataStream("MyTable", ds, "a, b, c, d, e");
    String sqlQuery = "SELECT a, b, e FROM MyTable WHERE c < 4";
    Table result = tableEnv.sql(sqlQuery);
    DataStream<Row> resultSet = tableEnv.toDataStream(result, Row.class);
    resultSet.addSink(new StreamITCase.StringSink());
    env.execute();
    List<String> expected = new ArrayList<>();
    expected.add("1,1,1");
    expected.add("2,2,2");
    expected.add("2,3,1");
    expected.add("3,4,2");
    StreamITCase.compareWithList(expected);
}
Also used : Tuple5(org.apache.flink.api.java.tuple.Tuple5) Table(org.apache.flink.table.api.Table) StreamITCase(org.apache.flink.table.api.scala.stream.utils.StreamITCase) ArrayList(java.util.ArrayList) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 50 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class SqlITCase method testUnion.

@Test
public void testUnion() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
    StreamITCase.clear();
    DataStream<Tuple3<Integer, Long, String>> ds1 = StreamTestData.getSmall3TupleDataSet(env);
    Table t1 = tableEnv.fromDataStream(ds1, "a,b,c");
    tableEnv.registerTable("T1", t1);
    DataStream<Tuple5<Integer, Long, Integer, String, Long>> ds2 = StreamTestData.get5TupleDataStream(env);
    tableEnv.registerDataStream("T2", ds2, "a, b, d, c, e");
    String sqlQuery = "SELECT * FROM T1 " + "UNION ALL " + "(SELECT a, b, c FROM T2 WHERE a	< 3)";
    Table result = tableEnv.sql(sqlQuery);
    DataStream<Row> resultSet = tableEnv.toDataStream(result, Row.class);
    resultSet.addSink(new StreamITCase.StringSink());
    env.execute();
    List<String> expected = new ArrayList<>();
    expected.add("1,1,Hi");
    expected.add("2,2,Hello");
    expected.add("3,2,Hello world");
    expected.add("1,1,Hallo");
    expected.add("2,2,Hallo Welt");
    expected.add("2,3,Hallo Welt wie");
    StreamITCase.compareWithList(expected);
}
Also used : Table(org.apache.flink.table.api.Table) StreamITCase(org.apache.flink.table.api.scala.stream.utils.StreamITCase) ArrayList(java.util.ArrayList) Tuple5(org.apache.flink.api.java.tuple.Tuple5) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Aggregations

StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)383 Test (org.junit.Test)286 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)192 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)81 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)75 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)48 EventTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger)42 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)34 Properties (java.util.Properties)32 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)31 TumblingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows)30 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)27 SuccessException (org.apache.flink.test.util.SuccessException)27 IOException (java.io.IOException)24 Configuration (org.apache.flink.configuration.Configuration)24 SlidingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows)24 KeySelector (org.apache.flink.api.java.functions.KeySelector)22 ProcessingTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.ProcessingTimeTrigger)21 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)20 MapFunction (org.apache.flink.api.common.functions.MapFunction)19