Search in sources :

Example 61 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class SortPartitionITCase method testSortPartitionWithKeySelector1.

@Test
public void testSortPartitionWithKeySelector1() throws Exception {
    /*
		 * Test sort partition on an extracted key
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);
    DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
    List<Tuple1<Boolean>> result = ds.map(new IdMapper<Tuple3<Integer, Long, String>>()).setParallelism(// parallelize input
    4).sortPartition(new KeySelector<Tuple3<Integer, Long, String>, Long>() {

        @Override
        public Long getKey(Tuple3<Integer, Long, String> value) throws Exception {
            return value.f1;
        }
    }, Order.ASCENDING).mapPartition(new OrderCheckMapper<>(new Tuple3AscendingChecker())).distinct().collect();
    String expected = "(true)\n";
    compareResultAsText(result, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple3(org.apache.flink.api.java.tuple.Tuple3) KeySelector(org.apache.flink.api.java.functions.KeySelector) Test(org.junit.Test)

Example 62 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class CoGroupITCase method testCoGroupWithMultipleKeyFieldsWithInnerClassKeyExtractorWithoutClosureCleaner.

@Test
public void testCoGroupWithMultipleKeyFieldsWithInnerClassKeyExtractorWithoutClosureCleaner() throws Exception {
    /*
		 * CoGroup with multiple key fields, test that disabling closure cleaner leads to an exception when using inner
		 * classes.
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().disableClosureCleaner();
    DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
    DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
    boolean correctExceptionTriggered = false;
    try {
        DataSet<Tuple3<Integer, Long, String>> coGrouped = ds1.coGroup(ds2).where(new KeySelector<Tuple5<Integer, Long, Integer, String, Long>, Tuple2<Integer, Long>>() {

            @Override
            public Tuple2<Integer, Long> getKey(Tuple5<Integer, Long, Integer, String, Long> t) throws Exception {
                return new Tuple2<Integer, Long>(t.f0, t.f4);
            }
        }).equalTo(new KeySelector<Tuple3<Integer, Long, String>, Tuple2<Integer, Long>>() {

            @Override
            public Tuple2<Integer, Long> getKey(Tuple3<Integer, Long, String> t) {
                return new Tuple2<Integer, Long>(t.f0, t.f1);
            }
        }).with(new CoGroupFunction<Tuple5<Integer, Long, Integer, String, Long>, Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {

            @Override
            public void coGroup(Iterable<Tuple5<Integer, Long, Integer, String, Long>> first, Iterable<Tuple3<Integer, Long, String>> second, Collector<Tuple3<Integer, Long, String>> out) {
                List<String> strs = new ArrayList<String>();
                for (Tuple5<Integer, Long, Integer, String, Long> t : first) {
                    strs.add(t.f3);
                }
                for (Tuple3<Integer, Long, String> t : second) {
                    for (String s : strs) {
                        out.collect(new Tuple3<Integer, Long, String>(t.f0, t.f1, s));
                    }
                }
            }
        });
    } catch (InvalidProgramException ex) {
        correctExceptionTriggered = (ex.getCause() instanceof java.io.NotSerializableException);
    }
    Assert.assertTrue(correctExceptionTriggered);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ArrayList(java.util.ArrayList) List(java.util.List) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) IOException(java.io.IOException) Tuple5(org.apache.flink.api.java.tuple.Tuple5) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 63 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class FirstNITCase method testFaultyCast.

/**
	 * Test for FLINK-2135
	 */
@Test
public void testFaultyCast() throws Exception {
    ExecutionEnvironment ee = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<String> b = ee.fromElements("a", "b");
    GroupReduceOperator<String, String> a = b.groupBy(new KeySelector<String, Long>() {

        @Override
        public Long getKey(String value) throws Exception {
            return 1L;
        }
    }).sortGroup(new KeySelector<String, Double>() {

        @Override
        public Double getKey(String value) throws Exception {
            return 1.0;
        }
    }, Order.DESCENDING).first(1);
    List<String> result = b.collect();
    String expected = "a\nb";
    compareResultAsText(result, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) Test(org.junit.Test)

Example 64 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class AbstractQueryableStateITCase method testQueryNonStartedJobState.

/**
	 * Similar tests as {@link #testValueState()} but before submitting the
	 * job, we already issue one request which fails.
	 */
@Test
public void testQueryNonStartedJobState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Value state
        ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), null);
        QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("hakuna", valueState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        // Now query
        long expected = numElements;
        // query once
        client.getKvState(jobId, queryableState.getQueryableStateName(), 0, KvStateRequestSerializer.serializeKeyAndNamespace(0, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE));
        cluster.submitJobDetached(jobGraph);
        executeValueQuery(deadline, client, jobId, queryableState, expected);
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 65 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class AbstractQueryableStateITCase method testValueStateDefault.

/**
	 * Tests simple value state queryable state instance with a default value
	 * set. Each source emits (subtaskIndex, 0)..(subtaskIndex, numElements)
	 * tuples, the key is mapped to 1 but key 0 is queried which should throw
	 * a {@link UnknownKeyOrNamespace} exception.
	 *
	 * @throws UnknownKeyOrNamespace thrown due querying a non-existent key
	 */
@Test(expected = UnknownKeyOrNamespace.class)
public void testValueStateDefault() throws Exception, UnknownKeyOrNamespace {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Value state
        ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), Tuple2.of(0, 1337l));
        // only expose key "1"
        QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return 1;
            }
        }).asQueryableState("hakuna", valueState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Now query
        int key = 0;
        final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
        Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, true);
        Await.result(future, deadline.timeLeft());
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

KeySelector (org.apache.flink.api.java.functions.KeySelector)120 Test (org.junit.Test)113 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)45 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)44 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)39 Watermark (org.apache.flink.streaming.api.watermark.Watermark)30 List (java.util.List)29 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)28 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)22 JobID (org.apache.flink.api.common.JobID)22 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)22 IOException (java.io.IOException)21 Arrays (java.util.Arrays)21 AtomicLong (java.util.concurrent.atomic.AtomicLong)21 Configuration (org.apache.flink.configuration.Configuration)21 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)21 ArrayList (java.util.ArrayList)18 Map (java.util.Map)18 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)18 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)16