use of org.apache.flink.types.NullValue in project flink by apache.
the class GraphCreationITCase method testFromTuple2.
@Test
public void testFromTuple2() throws Exception {
/*
* Test graph creation with fromTuple2DataSet
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> edges = TestGraphUtils.getLongLongTuple2Data(env);
Graph<Long, NullValue, NullValue> graph = Graph.fromTuple2DataSet(edges, env);
List<Vertex<Long, NullValue>> result = graph.getVertices().collect();
expectedResult = "1,(null)\n" + "2,(null)\n" + "3,(null)\n" + "4,(null)\n" + "6,(null)\n" + "10,(null)\n" + "20,(null)\n" + "30,(null)\n" + "40,(null)\n" + "60,(null)\n";
compareResultAsTuples(result, expectedResult);
}
use of org.apache.flink.types.NullValue in project flink by apache.
the class VertexMetricsTest method testWithEmptyGraph.
@Test
public void testWithEmptyGraph() throws Exception {
Result expectedResult;
expectedResult = new Result(0, 0, 0, 0, 0, 0, 0, 0);
Result withoutZeroDegreeVertices = new VertexMetrics<LongValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(false).run(emptyGraph).execute();
assertEquals(expectedResult, withoutZeroDegreeVertices);
assertEquals(Float.NaN, withoutZeroDegreeVertices.getAverageDegree(), ACCURACY);
assertEquals(Float.NaN, withoutZeroDegreeVertices.getDensity(), ACCURACY);
expectedResult = new Result(3, 0, 0, 0, 0, 0, 0, 0);
Result withZeroDegreeVertices = new VertexMetrics<LongValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(true).run(emptyGraph).execute();
assertEquals(expectedResult, withZeroDegreeVertices);
assertEquals(0.0f, withZeroDegreeVertices.getAverageDegree(), ACCURACY);
assertEquals(0.0f, withZeroDegreeVertices.getDensity(), ACCURACY);
}
use of org.apache.flink.types.NullValue in project flink by apache.
the class JaccardIndexTest method testRMatGraph.
@Test
public void testRMatGraph() throws Exception {
long vertexCount = 1 << 8;
long edgeCount = 8 * vertexCount;
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate().run(new Simplify<LongValue, NullValue, NullValue>(false));
DataSet<Result<LongValue>> ji = graph.run(new JaccardIndex<LongValue, NullValue, NullValue>().setGroupSize(4));
Checksum checksum = new ChecksumHashCode<Result<LongValue>>().run(ji).execute();
assertEquals(13954, checksum.getCount());
assertEquals(0x00001b1a1f7a9d0bL, checksum.getChecksum());
}
use of org.apache.flink.types.NullValue in project flink by apache.
the class TriangleListing method main.
public static void main(String[] args) throws Exception {
// Set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
ParameterTool parameters = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(parameters);
if (!parameters.has("directed")) {
throw new ProgramParametrizationException(getUsage("must declare execution mode as '--directed true' or '--directed false'"));
}
boolean directedAlgorithm = parameters.getBoolean("directed");
int little_parallelism = parameters.getInt("little_parallelism", PARALLELISM_DEFAULT);
boolean triadic_census = parameters.getBoolean("triadic_census", DEFAULT_TRIADIC_CENSUS);
GraphAnalytic tc = null;
DataSet tl;
switch(parameters.get("input", "")) {
case "csv":
{
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
switch(parameters.get("type", "")) {
case "integer":
{
Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
case "string":
{
Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<StringValue, NullValue, NullValue>().setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid CSV type"));
}
}
break;
case "rmat":
{
int scale = parameters.getInt("scale", DEFAULT_SCALE);
int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
long vertexCount = 1L << scale;
long edgeCount = vertexCount * edgeFactor;
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate();
if (directedAlgorithm) {
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
} else {
boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP);
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
Graph<IntValue, NullValue, NullValue> simpleGraph = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)).run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid input type"));
}
switch(parameters.get("output", "")) {
case "print":
System.out.println();
if (directedAlgorithm) {
for (Object e : tl.collect()) {
org.apache.flink.graph.library.clustering.directed.TriangleListing.Result result = (org.apache.flink.graph.library.clustering.directed.TriangleListing.Result) e;
System.out.println(result.toPrintableString());
}
} else {
tl.print();
}
break;
case "hash":
System.out.println();
System.out.println(DataSetUtils.checksumHashCode(tl));
break;
case "csv":
String filename = parameters.getRequired("output_filename");
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
tl.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
env.execute();
break;
default:
throw new ProgramParametrizationException(getUsage("invalid output type"));
}
if (tc != null) {
System.out.print("Triadic census:\n ");
System.out.println(tc.getResult().toString().replace(";", "\n "));
}
JobExecutionResult result = env.getLastJobExecutionResult();
NumberFormat nf = NumberFormat.getInstance();
System.out.println();
System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
use of org.apache.flink.types.NullValue in project flink by apache.
the class MusicProfiles method main.
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
/**
* Read the user-song-play triplets.
*/
DataSet<Tuple3<String, String, Integer>> triplets = getUserSongTripletsData(env);
/**
* Read the mismatches dataset and extract the songIDs
*/
DataSet<Tuple1<String>> mismatches = getMismatchesData(env).map(new ExtractMismatchSongIds());
/**
* Filter out the mismatches from the triplets dataset
*/
DataSet<Tuple3<String, String, Integer>> validTriplets = triplets.coGroup(mismatches).where(1).equalTo(0).with(new FilterOutMismatches());
/**
* Create a user -> song weighted bipartite graph where the edge weights
* correspond to play counts
*/
Graph<String, NullValue, Integer> userSongGraph = Graph.fromTupleDataSet(validTriplets, env);
/**
* Get the top track (most listened) for each user
*/
DataSet<Tuple2<String, String>> usersWithTopTrack = userSongGraph.groupReduceOnEdges(new GetTopSongPerUser(), EdgeDirection.OUT).filter(new FilterSongNodes());
if (fileOutput) {
usersWithTopTrack.writeAsCsv(topTracksOutputPath, "\n", "\t");
} else {
usersWithTopTrack.print();
}
/**
* Create a user-user similarity graph, based on common songs, i.e. two
* users that listen to the same song are connected. For each song, we
* create an edge between each pair of its in-neighbors.
*/
DataSet<Edge<String, NullValue>> similarUsers = userSongGraph.getEdges().filter(new FilterFunction<Edge<String, Integer>>() {
public boolean filter(Edge<String, Integer> edge) {
return (edge.getValue() > playcountThreshold);
}
}).groupBy(1).reduceGroup(new CreateSimilarUserEdges()).distinct();
Graph<String, Long, NullValue> similarUsersGraph = Graph.fromDataSet(similarUsers, new MapFunction<String, Long>() {
public Long map(String value) {
return 1l;
}
}, env).getUndirected();
/**
* Detect user communities using the label propagation library method
*/
// Initialize each vertex with a unique numeric label and run the label propagation algorithm
DataSet<Tuple2<String, Long>> idsWithInitialLabels = DataSetUtils.zipWithUniqueId(similarUsersGraph.getVertexIds()).map(new MapFunction<Tuple2<Long, String>, Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> map(Tuple2<Long, String> tuple2) throws Exception {
return new Tuple2<String, Long>(tuple2.f1, tuple2.f0);
}
});
DataSet<Vertex<String, Long>> verticesWithCommunity = similarUsersGraph.joinWithVertices(idsWithInitialLabels, new VertexJoinFunction<Long, Long>() {
public Long vertexJoin(Long vertexValue, Long inputValue) {
return inputValue;
}
}).run(new LabelPropagation<String, Long, NullValue>(maxIterations));
if (fileOutput) {
verticesWithCommunity.writeAsCsv(communitiesOutputPath, "\n", "\t");
// since file sinks are lazy, we trigger the execution explicitly
env.execute();
} else {
verticesWithCommunity.print();
}
}
Aggregations