use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class SortPartitionITCase method testSortPartitionWithKeySelector1.
@Test
public void testSortPartitionWithKeySelector1() throws Exception {
/*
* Test sort partition on an extracted key
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
List<Tuple1<Boolean>> result = ds.map(new IdMapper<Tuple3<Integer, Long, String>>()).setParallelism(// parallelize input
4).sortPartition(new KeySelector<Tuple3<Integer, Long, String>, Long>() {
@Override
public Long getKey(Tuple3<Integer, Long, String> value) throws Exception {
return value.f1;
}
}, Order.ASCENDING).mapPartition(new OrderCheckMapper<>(new Tuple3AscendingChecker())).distinct().collect();
String expected = "(true)\n";
compareResultAsText(result, expected);
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class CoGroupITCase method testCoGroupWithMultipleKeyFieldsWithInnerClassKeyExtractorWithoutClosureCleaner.
@Test
public void testCoGroupWithMultipleKeyFieldsWithInnerClassKeyExtractorWithoutClosureCleaner() throws Exception {
/*
* CoGroup with multiple key fields, test that disabling closure cleaner leads to an exception when using inner
* classes.
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().disableClosureCleaner();
DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
boolean correctExceptionTriggered = false;
try {
DataSet<Tuple3<Integer, Long, String>> coGrouped = ds1.coGroup(ds2).where(new KeySelector<Tuple5<Integer, Long, Integer, String, Long>, Tuple2<Integer, Long>>() {
@Override
public Tuple2<Integer, Long> getKey(Tuple5<Integer, Long, Integer, String, Long> t) throws Exception {
return new Tuple2<Integer, Long>(t.f0, t.f4);
}
}).equalTo(new KeySelector<Tuple3<Integer, Long, String>, Tuple2<Integer, Long>>() {
@Override
public Tuple2<Integer, Long> getKey(Tuple3<Integer, Long, String> t) {
return new Tuple2<Integer, Long>(t.f0, t.f1);
}
}).with(new CoGroupFunction<Tuple5<Integer, Long, Integer, String, Long>, Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
@Override
public void coGroup(Iterable<Tuple5<Integer, Long, Integer, String, Long>> first, Iterable<Tuple3<Integer, Long, String>> second, Collector<Tuple3<Integer, Long, String>> out) {
List<String> strs = new ArrayList<String>();
for (Tuple5<Integer, Long, Integer, String, Long> t : first) {
strs.add(t.f3);
}
for (Tuple3<Integer, Long, String> t : second) {
for (String s : strs) {
out.collect(new Tuple3<Integer, Long, String>(t.f0, t.f1, s));
}
}
}
});
} catch (InvalidProgramException ex) {
correctExceptionTriggered = (ex.getCause() instanceof java.io.NotSerializableException);
}
Assert.assertTrue(correctExceptionTriggered);
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class FirstNITCase method testFaultyCast.
/**
* Test for FLINK-2135
*/
@Test
public void testFaultyCast() throws Exception {
ExecutionEnvironment ee = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> b = ee.fromElements("a", "b");
GroupReduceOperator<String, String> a = b.groupBy(new KeySelector<String, Long>() {
@Override
public Long getKey(String value) throws Exception {
return 1L;
}
}).sortGroup(new KeySelector<String, Double>() {
@Override
public Double getKey(String value) throws Exception {
return 1.0;
}
}, Order.DESCENDING).first(1);
List<String> result = b.collect();
String expected = "a\nb";
compareResultAsText(result, expected);
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AbstractQueryableStateITCase method testQueryNonStartedJobState.
/**
* Similar tests as {@link #testValueState()} but before submitting the
* job, we already issue one request which fails.
*/
@Test
public void testQueryNonStartedJobState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Value state
ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), null);
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("hakuna", valueState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
// Now query
long expected = numElements;
// query once
client.getKvState(jobId, queryableState.getQueryableStateName(), 0, KvStateRequestSerializer.serializeKeyAndNamespace(0, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE));
cluster.submitJobDetached(jobGraph);
executeValueQuery(deadline, client, jobId, queryableState, expected);
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AbstractQueryableStateITCase method testValueStateDefault.
/**
* Tests simple value state queryable state instance with a default value
* set. Each source emits (subtaskIndex, 0)..(subtaskIndex, numElements)
* tuples, the key is mapped to 1 but key 0 is queried which should throw
* a {@link UnknownKeyOrNamespace} exception.
*
* @throws UnknownKeyOrNamespace thrown due querying a non-existent key
*/
@Test(expected = UnknownKeyOrNamespace.class)
public void testValueStateDefault() throws Exception, UnknownKeyOrNamespace {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Value state
ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), Tuple2.of(0, 1337l));
// only expose key "1"
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return 1;
}
}).asQueryableState("hakuna", valueState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Now query
int key = 0;
final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, true);
Await.result(future, deadline.timeLeft());
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
Aggregations