use of org.apache.flink.graph.Edge in project flink by apache.
the class MusicProfiles method main.
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
/**
* Read the user-song-play triplets.
*/
DataSet<Tuple3<String, String, Integer>> triplets = getUserSongTripletsData(env);
/**
* Read the mismatches dataset and extract the songIDs
*/
DataSet<Tuple1<String>> mismatches = getMismatchesData(env).map(new ExtractMismatchSongIds());
/**
* Filter out the mismatches from the triplets dataset
*/
DataSet<Tuple3<String, String, Integer>> validTriplets = triplets.coGroup(mismatches).where(1).equalTo(0).with(new FilterOutMismatches());
/**
* Create a user -> song weighted bipartite graph where the edge weights
* correspond to play counts
*/
Graph<String, NullValue, Integer> userSongGraph = Graph.fromTupleDataSet(validTriplets, env);
/**
* Get the top track (most listened) for each user
*/
DataSet<Tuple2<String, String>> usersWithTopTrack = userSongGraph.groupReduceOnEdges(new GetTopSongPerUser(), EdgeDirection.OUT).filter(new FilterSongNodes());
if (fileOutput) {
usersWithTopTrack.writeAsCsv(topTracksOutputPath, "\n", "\t");
} else {
usersWithTopTrack.print();
}
/**
* Create a user-user similarity graph, based on common songs, i.e. two
* users that listen to the same song are connected. For each song, we
* create an edge between each pair of its in-neighbors.
*/
DataSet<Edge<String, NullValue>> similarUsers = userSongGraph.getEdges().filter(new FilterFunction<Edge<String, Integer>>() {
public boolean filter(Edge<String, Integer> edge) {
return (edge.getValue() > playcountThreshold);
}
}).groupBy(1).reduceGroup(new CreateSimilarUserEdges()).distinct();
Graph<String, Long, NullValue> similarUsersGraph = Graph.fromDataSet(similarUsers, new MapFunction<String, Long>() {
public Long map(String value) {
return 1l;
}
}, env).getUndirected();
/**
* Detect user communities using the label propagation library method
*/
// Initialize each vertex with a unique numeric label and run the label propagation algorithm
DataSet<Tuple2<String, Long>> idsWithInitialLabels = DataSetUtils.zipWithUniqueId(similarUsersGraph.getVertexIds()).map(new MapFunction<Tuple2<Long, String>, Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> map(Tuple2<Long, String> tuple2) throws Exception {
return new Tuple2<String, Long>(tuple2.f1, tuple2.f0);
}
});
DataSet<Vertex<String, Long>> verticesWithCommunity = similarUsersGraph.joinWithVertices(idsWithInitialLabels, new VertexJoinFunction<Long, Long>() {
public Long vertexJoin(Long vertexValue, Long inputValue) {
return inputValue;
}
}).run(new LabelPropagation<String, Long, NullValue>(maxIterations));
if (fileOutput) {
verticesWithCommunity.writeAsCsv(communitiesOutputPath, "\n", "\t");
// since file sinks are lazy, we trigger the execution explicitly
env.execute();
} else {
verticesWithCommunity.print();
}
}
use of org.apache.flink.graph.Edge in project flink by apache.
the class SingleSourceShortestPaths method main.
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env);
Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(srcVertexId), env);
// Execute the scatter-gather iteration
Graph<Long, Double, Double> result = graph.runScatterGatherIteration(new MinDistanceMessenger(), new VertexDistanceUpdater(), maxIterations);
// Extract the vertices as the result
DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices();
// emit result
if (fileOutput) {
singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ",");
// since file sinks are lazy, we trigger the execution explicitly
env.execute("Single Source Shortest Paths Example");
} else {
singleSourceShortestPaths.print();
}
}
Aggregations