use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class ValueCollectionDataSets method getGroupSortedNestedTupleDataSet.
public static DataSet<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> getGroupSortedNestedTupleDataSet(ExecutionEnvironment env) {
List<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> data = new ArrayList<>();
data.add(new Tuple2<>(new Tuple2<>(new IntValue(1), new IntValue(3)), new StringValue("a")));
data.add(new Tuple2<>(new Tuple2<>(new IntValue(1), new IntValue(2)), new StringValue("a")));
data.add(new Tuple2<>(new Tuple2<>(new IntValue(2), new IntValue(1)), new StringValue("a")));
data.add(new Tuple2<>(new Tuple2<>(new IntValue(2), new IntValue(2)), new StringValue("b")));
data.add(new Tuple2<>(new Tuple2<>(new IntValue(3), new IntValue(3)), new StringValue("c")));
data.add(new Tuple2<>(new Tuple2<>(new IntValue(3), new IntValue(6)), new StringValue("c")));
data.add(new Tuple2<>(new Tuple2<>(new IntValue(4), new IntValue(9)), new StringValue("c")));
TupleTypeInfo<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> type = new TupleTypeInfo<>(new TupleTypeInfo<Tuple2<IntValue, IntValue>>(ValueTypeInfo.INT_VALUE_TYPE_INFO, ValueTypeInfo.INT_VALUE_TYPE_INFO), ValueTypeInfo.STRING_VALUE_TYPE_INFO);
return env.fromCollection(data, type);
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class AbstractQueryableStateITCase method testFoldingState.
/**
* Tests simple folding state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The folding state sums these up and maps them to Strings. The
* test succeeds after each subtask index is queried with result n*(n+1)/2
* (as a String).
*/
@Test
public void testFoldingState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Folding state
FoldingStateDescriptor<Tuple2<Integer, Long>, String> foldingState = new FoldingStateDescriptor<>("any", "0", new SumFold(), StringSerializer.INSTANCE);
QueryableStateStream<Integer, String> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("pumba", foldingState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Now query
String expected = Integer.toString(numElements * (numElements + 1) / 2);
for (int key = 0; key < NUM_SLOTS; key++) {
final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, false);
byte[] serializedValue = Await.result(future, deadline.timeLeft());
String value = KvStateRequestSerializer.deserializeValue(serializedValue, queryableState.getValueSerializer());
if (expected.equals(value)) {
success = true;
} else {
// Retry
Thread.sleep(50);
}
}
assertTrue("Did not succeed query", success);
}
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class AbstractQueryableStateITCase method testReducingState.
/**
* Tests simple reducing state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The reducing state instance sums these up. The test succeeds
* after each subtask index is queried with result n*(n+1)/2.
*/
@Test
public void testReducingState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Reducing state
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any", new SumReduce(), source.getType());
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("jungle", reducingState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Wait until job is running
// Now query
long expected = numElements * (numElements + 1) / 2;
executeValueQuery(deadline, client, jobId, queryableState, expected);
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class PartitionITCase method testRangePartitionerWithKeySelectorOnSequenceNestedDataWithOrders.
@Test
public void testRangePartitionerWithKeySelectorOnSequenceNestedDataWithOrders() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
final DataSet<Tuple2<ComparablePojo, Long>> dataSet = env.generateSequence(0, 10000).map(new MapFunction<Long, Tuple2<ComparablePojo, Long>>() {
@Override
public Tuple2<ComparablePojo, Long> map(Long value) throws Exception {
return new Tuple2<>(new ComparablePojo(value / 5000, value % 5000), value);
}
});
final List<Tuple2<ComparablePojo, ComparablePojo>> collected = dataSet.partitionByRange(new KeySelector<Tuple2<ComparablePojo, Long>, ComparablePojo>() {
@Override
public ComparablePojo getKey(Tuple2<ComparablePojo, Long> value) throws Exception {
return value.f0;
}
}).withOrders(Order.ASCENDING).mapPartition(new MinMaxSelector<>(new ComparablePojoComparator())).mapPartition(new ExtractComparablePojo()).collect();
final Comparator<Tuple2<ComparablePojo, ComparablePojo>> pojoComparator = new Comparator<Tuple2<ComparablePojo, ComparablePojo>>() {
@Override
public int compare(Tuple2<ComparablePojo, ComparablePojo> o1, Tuple2<ComparablePojo, ComparablePojo> o2) {
return o1.f0.compareTo(o2.f1);
}
};
Collections.sort(collected, pojoComparator);
ComparablePojo previousMax = null;
for (Tuple2<ComparablePojo, ComparablePojo> element : collected) {
assertTrue("Min element in each partition should be smaller than max.", element.f0.compareTo(element.f1) <= 0);
if (previousMax == null) {
previousMax = element.f1;
} else {
assertTrue("Partitions overlap. Previous max should be smaller than current min.", previousMax.compareTo(element.f0) < 0);
if (previousMax.first.equals(element.f0.first)) {
assertEquals("Ordering on the second field should be continous.", previousMax.second - 1, element.f0.second.longValue());
}
previousMax = element.f1;
}
}
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class PartitionITCase method testRangePartitionInIteration.
@Test(expected = InvalidProgramException.class)
public void testRangePartitionInIteration() throws Exception {
// does not apply for collection execution
if (super.mode == TestExecutionMode.COLLECTION) {
throw new InvalidProgramException("Does not apply for collection execution");
}
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<Long> source = env.generateSequence(0, 10000);
DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() {
@Override
public Tuple2<Long, String> map(Long v) throws Exception {
return new Tuple2<>(v, Long.toString(v));
}
});
DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0);
DataSet<Tuple2<Long, String>> body = it.getWorkset().partitionByRange(// Verify that range partition is not allowed in iteration
1).join(it.getSolutionSet()).where(0).equalTo(0).projectFirst(0).projectSecond(1);
DataSet<Tuple2<Long, String>> result = it.closeWith(body, body);
// should fail
result.collect();
}
Aggregations