use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class ReduceOnEdgesMethodsITCase method testAllInNeighbors.
@Test
public void testAllInNeighbors() throws Exception {
/*
* Get the all the in-neighbors for each vertex
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env), TestGraphUtils.getLongLongEdgeData(env), env);
DataSet<Tuple2<Long, Long>> verticesWithAllInNeighbors = graph.groupReduceOnEdges(new SelectInNeighbors(), EdgeDirection.IN);
List<Tuple2<Long, Long>> result = verticesWithAllInNeighbors.collect();
expectedResult = "1,5\n" + "2,1\n" + "3,1\n" + "3,2\n" + "4,3\n" + "5,3\n" + "5,4";
compareResultAsTuples(result, expectedResult);
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class ReduceOnEdgesMethodsITCase method testAllNeighbors.
@Test
public void testAllNeighbors() throws Exception {
/*
* Get the all the neighbors for each vertex
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env), TestGraphUtils.getLongLongEdgeData(env), env);
DataSet<Tuple2<Long, Long>> verticesWithAllNeighbors = graph.groupReduceOnEdges(new SelectNeighbors(), EdgeDirection.ALL);
List<Tuple2<Long, Long>> result = verticesWithAllNeighbors.collect();
expectedResult = "1,2\n" + "1,3\n" + "1,5\n" + "2,1\n" + "2,3\n" + "3,1\n" + "3,2\n" + "3,4\n" + "3,5\n" + "4,3\n" + "4,5\n" + "5,1\n" + "5,3\n" + "5,4";
compareResultAsTuples(result, expectedResult);
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class ConnectedComponentsTest method getConnectedComponentsPlan.
private static Plan getConnectedComponentsPlan(int parallelism, int iterations, boolean solutionSetFirst) {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
DataSet<Tuple2<Long, Long>> verticesWithId = env.generateSequence(0, 1000).name("Vertices").map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return new Tuple2<Long, Long>(value, value);
}
}).name("Assign Vertex Ids");
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, iterations, 0).name("Connected Components Iteration");
@SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(0L, 0L)).name("Edges");
DataSet<Tuple2<Long, Long>> minCandidateId = iteration.getWorkset().join(edges).where(0).equalTo(0).projectSecond(1).<Tuple2<Long, Long>>projectFirst(1).name("Join Candidate Id With Neighbor").groupBy(0).min(1).name("Find Minimum Candidate Id");
DataSet<Tuple2<Long, Long>> updateComponentId;
if (solutionSetFirst) {
updateComponentId = iteration.getSolutionSet().join(minCandidateId).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public void join(Tuple2<Long, Long> current, Tuple2<Long, Long> candidate, Collector<Tuple2<Long, Long>> out) {
if (candidate.f1 < current.f1) {
out.collect(candidate);
}
}
}).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0").name("Update Component Id");
} else {
updateComponentId = minCandidateId.join(iteration.getSolutionSet()).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public void join(Tuple2<Long, Long> candidate, Tuple2<Long, Long> current, Collector<Tuple2<Long, Long>> out) {
if (candidate.f1 < current.f1) {
out.collect(candidate);
}
}
}).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0").name("Update Component Id");
}
iteration.closeWith(updateComponentId, updateComponentId).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("Result");
return env.createProgramPlan();
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class SerializedCheckpointData method toDeque.
// ------------------------------------------------------------------------
// De-Serialize from Checkpoint
// ------------------------------------------------------------------------
/**
* De-serializes an array of SerializedCheckpointData back into an ArrayDeque of element checkpoints.
*
* @param data The data to be deserialized.
* @param serializer The serializer used to deserialize the data.
* @param <T> The type of the elements.
* @return An ArrayDeque of element checkpoints.
*
* @throws IOException Thrown, if the serialization fails.
*/
public static <T> ArrayDeque<Tuple2<Long, List<T>>> toDeque(SerializedCheckpointData[] data, TypeSerializer<T> serializer) throws IOException {
ArrayDeque<Tuple2<Long, List<T>>> deque = new ArrayDeque<>(data.length);
DataInputDeserializer deser = null;
for (SerializedCheckpointData checkpoint : data) {
byte[] serializedData = checkpoint.getSerializedData();
if (deser == null) {
deser = new DataInputDeserializer(serializedData, 0, serializedData.length);
} else {
deser.setBuffer(serializedData, 0, serializedData.length);
}
final List<T> ids = new ArrayList<>(checkpoint.getNumIds());
final int numIds = checkpoint.getNumIds();
for (int i = 0; i < numIds; i++) {
ids.add(serializer.deserialize(deser));
}
deque.addLast(new Tuple2<Long, List<T>>(checkpoint.checkpointId, ids));
}
return deque;
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class AssignRangeIndex method mapPartition.
@Override
public void mapPartition(Iterable<IN> values, Collector<Tuple2<Integer, IN>> out) throws Exception {
List<Object> broadcastVariable = getRuntimeContext().getBroadcastVariable("RangeBoundaries");
if (broadcastVariable == null || broadcastVariable.size() != 1) {
throw new RuntimeException("AssignRangePartition require a single RangeBoundaries as broadcast input.");
}
Object[][] boundaryObjects = (Object[][]) broadcastVariable.get(0);
RangeBoundaries rangeBoundaries = new CommonRangeBoundaries(typeComparator.createComparator(), boundaryObjects);
Tuple2<Integer, IN> tupleWithPartitionId = new Tuple2<>();
for (IN record : values) {
tupleWithPartitionId.f0 = rangeBoundaries.getRangeIndex(record);
tupleWithPartitionId.f1 = record;
out.collect(tupleWithPartitionId);
}
}
Aggregations