use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class CoGroupITCase method testCoGroupWithMultipleKeyFieldsWithInnerClassKeyExtractorWithClosureCleaner.
@Test
public void testCoGroupWithMultipleKeyFieldsWithInnerClassKeyExtractorWithClosureCleaner() throws Exception {
/*
* CoGroup with multiple key fields, test working closure cleaner for inner classes
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple3<Integer, Long, String>> coGrouped = ds1.coGroup(ds2).where(new KeySelector<Tuple5<Integer, Long, Integer, String, Long>, Tuple2<Integer, Long>>() {
@Override
public Tuple2<Integer, Long> getKey(Tuple5<Integer, Long, Integer, String, Long> t) throws Exception {
return new Tuple2<Integer, Long>(t.f0, t.f4);
}
}).equalTo(new KeySelector<Tuple3<Integer, Long, String>, Tuple2<Integer, Long>>() {
@Override
public Tuple2<Integer, Long> getKey(Tuple3<Integer, Long, String> t) {
return new Tuple2<Integer, Long>(t.f0, t.f1);
}
}).with(new CoGroupFunction<Tuple5<Integer, Long, Integer, String, Long>, Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
@Override
public void coGroup(Iterable<Tuple5<Integer, Long, Integer, String, Long>> first, Iterable<Tuple3<Integer, Long, String>> second, Collector<Tuple3<Integer, Long, String>> out) {
List<String> strs = new ArrayList<String>();
for (Tuple5<Integer, Long, Integer, String, Long> t : first) {
strs.add(t.f3);
}
for (Tuple3<Integer, Long, String> t : second) {
for (String s : strs) {
out.collect(new Tuple3<Integer, Long, String>(t.f0, t.f1, s));
}
}
}
});
List<Tuple3<Integer, Long, String>> result = coGrouped.collect();
String expected = "1,1,Hallo\n" + "2,2,Hallo Welt\n" + "3,2,Hallo Welt wie gehts?\n" + "3,2,ABC\n" + "5,3,HIJ\n" + "5,3,IJK\n";
compareResultAsTuples(result, expected);
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class JoinITCase method testDefaultJoinOnTwoCustomTypeInputsWithInnerClassKeyExtractorsClosureCleaner.
@Test
public void testDefaultJoinOnTwoCustomTypeInputsWithInnerClassKeyExtractorsClosureCleaner() throws Exception {
/*
* (Default) Join on two custom type inputs with key extractors, implemented as inner classes to test closure
* cleaning
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<CustomType> ds1 = CollectionDataSets.getCustomTypeDataSet(env);
DataSet<CustomType> ds2 = CollectionDataSets.getSmallCustomTypeDataSet(env);
DataSet<Tuple2<CustomType, CustomType>> joinDs = ds1.join(ds2).where(new KeySelector<CustomType, Integer>() {
@Override
public Integer getKey(CustomType value) {
return value.myInt;
}
}).equalTo(new KeySelector<CustomType, Integer>() {
@Override
public Integer getKey(CustomType value) throws Exception {
return value.myInt;
}
});
List<Tuple2<CustomType, CustomType>> result = joinDs.collect();
String expected = "1,0,Hi,1,0,Hi\n" + "2,1,Hello,2,1,Hello\n" + "2,1,Hello,2,2,Hello world\n" + "2,2,Hello world,2,1,Hello\n" + "2,2,Hello world,2,2,Hello world\n";
compareResultAsTuples(result, expected);
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class WindowTranslationTest method testSessionWithFoldFails.
// ------------------------------------------------------------------------
// Merging Windows Support
// ------------------------------------------------------------------------
@Test
public void testSessionWithFoldFails() throws Exception {
// verify that fold does not work with merging windows
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
WindowedStream<String, String, TimeWindow> windowedStream = env.fromElements("Hello", "Ciao").keyBy(new KeySelector<String, String>() {
@Override
public String getKey(String value) throws Exception {
return value;
}
}).window(EventTimeSessionWindows.withGap(Time.seconds(5)));
try {
windowedStream.fold("", new FoldFunction<String, String>() {
private static final long serialVersionUID = -4567902917104921706L;
@Override
public String fold(String accumulator, String value) throws Exception {
return accumulator;
}
});
} catch (UnsupportedOperationException e) {
// use a catch to ensure that the exception is thrown by the fold
return;
}
fail("The fold call should fail.");
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class CEPMigration11to13Test method testKeyedCEPOperatorMigratation.
@Test
public void testKeyedCEPOperatorMigratation() throws Exception {
KeySelector<Event, Integer> keySelector = new KeySelector<Event, Integer>() {
private static final long serialVersionUID = -4873366487571254798L;
@Override
public Integer getKey(Event value) throws Exception {
return value.getId();
}
};
final Event startEvent = new Event(42, "start", 1.0);
final SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
final Event endEvent = new Event(42, "end", 1.0);
// uncomment these lines for regenerating the snapshot on Flink 1.1
/*
OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = new OneInputStreamOperatorTestHarness<>(
new KeyedCEPPatternOperator<>(
Event.createTypeSerializer(),
false,
keySelector,
IntSerializer.INSTANCE,
new NFAFactory()));
harness.configureForKeyedStream(keySelector, BasicTypeInfo.INT_TYPE_INFO);
harness.open();
harness.processElement(new StreamRecord<Event>(startEvent, 1));
harness.processElement(new StreamRecord<Event>(new Event(42, "foobar", 1.0), 2));
harness.processElement(new StreamRecord<Event>(new SubEvent(42, "barfoo", 1.0, 5.0), 3));
harness.processWatermark(new Watermark(2));
// simulate snapshot/restore with empty element queue but NFA state
StreamTaskState snapshot = harness.snapshot(1, 1);
FileOutputStream out = new FileOutputStream(
"src/test/resources/cep-keyed-snapshot-1.1");
ObjectOutputStream oos = new ObjectOutputStream(out);
oos.writeObject(snapshot);
out.close();
harness.close();
*/
OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = new KeyedOneInputStreamOperatorTestHarness<>(new KeyedCEPPatternOperator<>(Event.createTypeSerializer(), false, keySelector, IntSerializer.INSTANCE, new NFAFactory(), true), keySelector, BasicTypeInfo.INT_TYPE_INFO);
harness.setup();
harness.initializeStateFromLegacyCheckpoint(getResourceFilename("cep-keyed-snapshot-1.1"));
harness.open();
harness.processElement(new StreamRecord<Event>(middleEvent, 3));
harness.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4));
harness.processElement(new StreamRecord<>(endEvent, 5));
harness.processWatermark(new Watermark(20));
ConcurrentLinkedQueue<Object> result = harness.getOutput();
// watermark and the result
assertEquals(2, result.size());
Object resultObject = result.poll();
assertTrue(resultObject instanceof StreamRecord);
StreamRecord<?> resultRecord = (StreamRecord<?>) resultObject;
assertTrue(resultRecord.getValue() instanceof Map);
@SuppressWarnings("unchecked") Map<String, Event> patternMap = (Map<String, Event>) resultRecord.getValue();
assertEquals(startEvent, patternMap.get("start"));
assertEquals(middleEvent, patternMap.get("middle"));
assertEquals(endEvent, patternMap.get("end"));
harness.close();
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AbstractQueryableStateITCase method testFoldingState.
/**
* Tests simple folding state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The folding state sums these up and maps them to Strings. The
* test succeeds after each subtask index is queried with result n*(n+1)/2
* (as a String).
*/
@Test
public void testFoldingState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Folding state
FoldingStateDescriptor<Tuple2<Integer, Long>, String> foldingState = new FoldingStateDescriptor<>("any", "0", new SumFold(), StringSerializer.INSTANCE);
QueryableStateStream<Integer, String> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("pumba", foldingState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Now query
String expected = Integer.toString(numElements * (numElements + 1) / 2);
for (int key = 0; key < NUM_SLOTS; key++) {
final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, false);
byte[] serializedValue = Await.result(future, deadline.timeLeft());
String value = KvStateRequestSerializer.deserializeValue(serializedValue, queryableState.getValueSerializer());
if (expected.equals(value)) {
success = true;
} else {
// Retry
Thread.sleep(50);
}
}
assertTrue("Did not succeed query", success);
}
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
Aggregations