use of org.apache.flink.api.java.tuple.Tuple3 in project flink by apache.
the class CopyOnWriteStateTableTest method testRandomModificationsAndCopyOnWriteIsolation.
/**
* This test does some random modifications to a state table and a reference (hash map). Then draws snapshots,
* performs more modifications and checks snapshot integrity.
*/
@Test
public void testRandomModificationsAndCopyOnWriteIsolation() throws Exception {
final RegisteredBackendStateMetaInfo<Integer, ArrayList<Integer>> metaInfo = new RegisteredBackendStateMetaInfo<>(StateDescriptor.Type.UNKNOWN, "test", IntSerializer.INSTANCE, // we use mutable state objects.
new ArrayListSerializer<>(IntSerializer.INSTANCE));
final MockInternalKeyContext<Integer> keyContext = new MockInternalKeyContext<>(IntSerializer.INSTANCE);
final CopyOnWriteStateTable<Integer, Integer, ArrayList<Integer>> stateTable = new CopyOnWriteStateTable<>(keyContext, metaInfo);
final HashMap<Tuple2<Integer, Integer>, ArrayList<Integer>> referenceMap = new HashMap<>();
final Random random = new Random(42);
// holds snapshots from the map under test
CopyOnWriteStateTable.StateTableEntry<Integer, Integer, ArrayList<Integer>>[] snapshot = null;
int snapshotSize = 0;
// holds a reference snapshot from our reference map that we compare against
Tuple3<Integer, Integer, ArrayList<Integer>>[] reference = null;
int val = 0;
int snapshotCounter = 0;
int referencedSnapshotId = 0;
final StateTransformationFunction<ArrayList<Integer>, Integer> transformationFunction = new StateTransformationFunction<ArrayList<Integer>, Integer>() {
@Override
public ArrayList<Integer> apply(ArrayList<Integer> previousState, Integer value) throws Exception {
if (previousState == null) {
previousState = new ArrayList<>();
}
previousState.add(value);
// we give back the original, attempting to spot errors in to copy-on-write
return previousState;
}
};
// the main loop for modifications
for (int i = 0; i < 10_000_000; ++i) {
int key = random.nextInt(20);
int namespace = random.nextInt(4);
Tuple2<Integer, Integer> compositeKey = new Tuple2<>(key, namespace);
int op = random.nextInt(7);
ArrayList<Integer> state = null;
ArrayList<Integer> referenceState = null;
switch(op) {
case 0:
case 1:
{
state = stateTable.get(key, namespace);
referenceState = referenceMap.get(compositeKey);
if (null == state) {
state = new ArrayList<>();
stateTable.put(key, namespace, state);
referenceState = new ArrayList<>();
referenceMap.put(compositeKey, referenceState);
}
break;
}
case 2:
{
stateTable.put(key, namespace, new ArrayList<Integer>());
referenceMap.put(compositeKey, new ArrayList<Integer>());
break;
}
case 3:
{
state = stateTable.putAndGetOld(key, namespace, new ArrayList<Integer>());
referenceState = referenceMap.put(compositeKey, new ArrayList<Integer>());
break;
}
case 4:
{
stateTable.remove(key, namespace);
referenceMap.remove(compositeKey);
break;
}
case 5:
{
state = stateTable.removeAndGetOld(key, namespace);
referenceState = referenceMap.remove(compositeKey);
break;
}
case 6:
{
final int updateValue = random.nextInt(1000);
stateTable.transform(key, namespace, updateValue, transformationFunction);
referenceMap.put(compositeKey, transformationFunction.apply(referenceMap.remove(compositeKey), updateValue));
break;
}
default:
{
Assert.fail("Unknown op-code " + op);
}
}
Assert.assertEquals(referenceMap.size(), stateTable.size());
if (state != null) {
// mutate the states a bit...
if (random.nextBoolean() && !state.isEmpty()) {
state.remove(state.size() - 1);
referenceState.remove(referenceState.size() - 1);
} else {
state.add(val);
referenceState.add(val);
++val;
}
}
Assert.assertEquals(referenceState, state);
// snapshot triggering / comparison / release
if (i > 0 && i % 500 == 0) {
if (snapshot != null) {
// check our referenced snapshot
deepCheck(reference, convert(snapshot, snapshotSize));
if (i % 1_000 == 0) {
// draw and release some other snapshot while holding on the old snapshot
++snapshotCounter;
stateTable.snapshotTableArrays();
stateTable.releaseSnapshot(snapshotCounter);
}
//release the snapshot after some time
if (i % 5_000 == 0) {
snapshot = null;
reference = null;
snapshotSize = 0;
stateTable.releaseSnapshot(referencedSnapshotId);
}
} else {
// if there is no more referenced snapshot, we create one
++snapshotCounter;
referencedSnapshotId = snapshotCounter;
snapshot = stateTable.snapshotTableArrays();
snapshotSize = stateTable.size();
reference = manualDeepDump(referenceMap);
}
}
}
}
use of org.apache.flink.api.java.tuple.Tuple3 in project flink by apache.
the class WindowFoldITCase method testFoldProcessWindow.
@Test
public void testFoldProcessWindow() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
ctx.collect(Tuple2.of("a", 0));
ctx.collect(Tuple2.of("a", 1));
ctx.collect(Tuple2.of("a", 2));
ctx.collect(Tuple2.of("b", 3));
ctx.collect(Tuple2.of("b", 4));
ctx.collect(Tuple2.of("b", 5));
ctx.collect(Tuple2.of("a", 6));
ctx.collect(Tuple2.of("a", 7));
ctx.collect(Tuple2.of("a", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
source1.keyBy(0).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).fold(Tuple2.of(0, "R:"), new FoldFunction<Tuple2<String, Integer>, Tuple2<Integer, String>>() {
@Override
public Tuple2<Integer, String> fold(Tuple2<Integer, String> accumulator, Tuple2<String, Integer> value) throws Exception {
accumulator.f1 += value.f0;
accumulator.f0 += value.f1;
return accumulator;
}
}, new ProcessWindowFunction<Tuple2<Integer, String>, Tuple3<String, Integer, Integer>, Tuple, TimeWindow>() {
@Override
public void process(Tuple tuple, Context context, Iterable<Tuple2<Integer, String>> elements, Collector<Tuple3<String, Integer, Integer>> out) throws Exception {
int i = 0;
for (Tuple2<Integer, String> in : elements) {
out.collect(new Tuple3<>(in.f1, in.f0, i++));
}
}
}).addSink(new SinkFunction<Tuple3<String, Integer, Integer>>() {
@Override
public void invoke(Tuple3<String, Integer, Integer> value) throws Exception {
testResults.add(value.toString());
}
});
env.execute("Fold Process Window Test");
List<String> expectedResult = Arrays.asList("(R:aaa,3,0)", "(R:aaa,21,0)", "(R:bbb,12,0)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
use of org.apache.flink.api.java.tuple.Tuple3 in project flink by apache.
the class WindowFoldITCase method testFoldProcessAllWindow.
@Test
public void testFoldProcessAllWindow() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
ctx.collect(Tuple2.of("a", 0));
ctx.collect(Tuple2.of("a", 1));
ctx.collect(Tuple2.of("a", 2));
ctx.collect(Tuple2.of("b", 3));
ctx.collect(Tuple2.of("b", 4));
ctx.collect(Tuple2.of("b", 5));
ctx.collect(Tuple2.of("a", 6));
ctx.collect(Tuple2.of("a", 7));
ctx.collect(Tuple2.of("a", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
source1.windowAll(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).fold(Tuple2.of(0, "R:"), new FoldFunction<Tuple2<String, Integer>, Tuple2<Integer, String>>() {
@Override
public Tuple2<Integer, String> fold(Tuple2<Integer, String> accumulator, Tuple2<String, Integer> value) throws Exception {
accumulator.f1 += value.f0;
accumulator.f0 += value.f1;
return accumulator;
}
}, new ProcessAllWindowFunction<Tuple2<Integer, String>, Tuple3<String, Integer, Integer>, TimeWindow>() {
@Override
public void process(Context context, Iterable<Tuple2<Integer, String>> elements, Collector<Tuple3<String, Integer, Integer>> out) throws Exception {
int i = 0;
for (Tuple2<Integer, String> in : elements) {
out.collect(new Tuple3<>(in.f1, in.f0, i++));
}
}
}).addSink(new SinkFunction<Tuple3<String, Integer, Integer>>() {
@Override
public void invoke(Tuple3<String, Integer, Integer> value) throws Exception {
testResults.add(value.toString());
}
});
env.execute("Fold Process Window Test");
List<String> expectedResult = Arrays.asList("(R:aaa,3,0)", "(R:aaa,21,0)", "(R:bbb,12,0)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
use of org.apache.flink.api.java.tuple.Tuple3 in project flink by apache.
the class CoGroupJoinITCase method testSelfJoin.
@Test
public void testSelfJoin() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
ctx.collect(Tuple3.of("a", "x", 0));
ctx.collect(Tuple3.of("a", "y", 1));
ctx.collect(Tuple3.of("a", "z", 2));
ctx.collect(Tuple3.of("b", "u", 3));
ctx.collect(Tuple3.of("b", "w", 5));
ctx.collect(Tuple3.of("a", "i", 6));
ctx.collect(Tuple3.of("a", "j", 7));
ctx.collect(Tuple3.of("a", "k", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
source1.join(source1).where(new Tuple3KeyExtractor()).equalTo(new Tuple3KeyExtractor()).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {
@Override
public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
return first + ":" + second;
}
}).addSink(new SinkFunction<String>() {
@Override
public void invoke(String value) throws Exception {
testResults.add(value);
}
});
env.execute("Self-Join Test");
List<String> expectedResult = Arrays.asList("(a,x,0):(a,x,0)", "(a,x,0):(a,y,1)", "(a,x,0):(a,z,2)", "(a,y,1):(a,x,0)", "(a,y,1):(a,y,1)", "(a,y,1):(a,z,2)", "(a,z,2):(a,x,0)", "(a,z,2):(a,y,1)", "(a,z,2):(a,z,2)", "(b,u,3):(b,u,3)", "(b,u,3):(b,w,5)", "(b,w,5):(b,u,3)", "(b,w,5):(b,w,5)", "(a,i,6):(a,i,6)", "(a,i,6):(a,j,7)", "(a,i,6):(a,k,8)", "(a,j,7):(a,i,6)", "(a,j,7):(a,j,7)", "(a,j,7):(a,k,8)", "(a,k,8):(a,i,6)", "(a,k,8):(a,j,7)", "(a,k,8):(a,k,8)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
use of org.apache.flink.api.java.tuple.Tuple3 in project flink by apache.
the class AggregationFunctionTest method minMaxByTest.
@Test
public void minMaxByTest() throws Exception {
// Tuples are grouped on field 0, aggregated on field 1
// preparing expected outputs
List<Tuple3<Integer, Integer, Integer>> maxByFirstExpected = ImmutableList.of(Tuple3.of(0, 0, 0), Tuple3.of(0, 1, 1), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2));
List<Tuple3<Integer, Integer, Integer>> maxByLastExpected = ImmutableList.of(Tuple3.of(0, 0, 0), Tuple3.of(0, 1, 1), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 5), Tuple3.of(0, 2, 5), Tuple3.of(0, 2, 5), Tuple3.of(0, 2, 8));
List<Tuple3<Integer, Integer, Integer>> minByFirstExpected = ImmutableList.of(Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0));
List<Tuple3<Integer, Integer, Integer>> minByLastExpected = ImmutableList.of(Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 3), Tuple3.of(0, 0, 3), Tuple3.of(0, 0, 3), Tuple3.of(0, 0, 6), Tuple3.of(0, 0, 6), Tuple3.of(0, 0, 6));
// some necessary boiler plate
TypeInformation<Tuple3<Integer, Integer, Integer>> typeInfo = TypeExtractor.getForObject(Tuple3.of(0, 0, 0));
ExecutionConfig config = new ExecutionConfig();
KeySelector<Tuple3<Integer, Integer, Integer>, Tuple> keySelector = KeySelectorUtil.getSelectorForKeys(new Keys.ExpressionKeys<>(new int[] { 0 }, typeInfo), typeInfo, config);
TypeInformation<Tuple> keyType = TypeExtractor.getKeySelectorTypes(keySelector, typeInfo);
// aggregations tested
ReduceFunction<Tuple3<Integer, Integer, Integer>> maxByFunctionFirst = new ComparableAggregator<>(1, typeInfo, AggregationType.MAXBY, true, config);
ReduceFunction<Tuple3<Integer, Integer, Integer>> maxByFunctionLast = new ComparableAggregator<>(1, typeInfo, AggregationType.MAXBY, false, config);
ReduceFunction<Tuple3<Integer, Integer, Integer>> minByFunctionFirst = new ComparableAggregator<>(1, typeInfo, AggregationType.MINBY, true, config);
ReduceFunction<Tuple3<Integer, Integer, Integer>> minByFunctionLast = new ComparableAggregator<>(1, typeInfo, AggregationType.MINBY, false, config);
assertEquals(maxByFirstExpected, MockContext.createAndExecuteForKeyedStream(new StreamGroupedReduce<>(maxByFunctionFirst, typeInfo.createSerializer(config)), getInputByList(), keySelector, keyType));
assertEquals(maxByLastExpected, MockContext.createAndExecuteForKeyedStream(new StreamGroupedReduce<>(maxByFunctionLast, typeInfo.createSerializer(config)), getInputByList(), keySelector, keyType));
assertEquals(minByLastExpected, MockContext.createAndExecuteForKeyedStream(new StreamGroupedReduce<>(minByFunctionLast, typeInfo.createSerializer(config)), getInputByList(), keySelector, keyType));
assertEquals(minByFirstExpected, MockContext.createAndExecuteForKeyedStream(new StreamGroupedReduce<>(minByFunctionFirst, typeInfo.createSerializer(config)), getInputByList(), keySelector, keyType));
}
Aggregations