use of org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory in project flink by apache.
the class AsmTestBase method directedRMatGraph.
/**
* Generate a directed RMat graph. Tests are usually run on a graph with scale=10 and
* edgeFactor=16 but algorithms generating very large DataSets require smaller input graphs.
*
* <p>The examples program can write this graph as a CSV file for verifying algorithm results
* with external libraries:
*
* <pre>
* ./bin/flink run examples/flink-gelly-examples_*.jar --algorithm EdgeList \
* --input RMatGraph --type long --simplify directed --scale $SCALE --edge_factor $EDGE_FACTOR \
* --output csv --filename directedRMatGraph.csv
* </pre>
*
* @param scale vertices are generated in the range [0, 2<sup>scale</sup>)
* @param edgeFactor the edge count is {@code edgeFactor} * 2<sup>scale</sup>
* @return directed RMat graph
* @throws Exception on error
*/
protected Graph<LongValue, NullValue, NullValue> directedRMatGraph(int scale, int edgeFactor) throws Exception {
long vertexCount = 1L << scale;
long edgeCount = edgeFactor * vertexCount;
return new RMatGraph<>(env, new JDKRandomGeneratorFactory(), vertexCount, edgeCount).generate().run(new org.apache.flink.graph.asm.simple.directed.Simplify<>());
}
use of org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory in project flink by apache.
the class GraphMetrics method main.
public static void main(String[] args) throws Exception {
// Set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
ParameterTool parameters = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(parameters);
if (!parameters.has("directed")) {
throw new ProgramParametrizationException(getUsage("must declare execution mode as '--directed true' or '--directed false'"));
}
boolean directedAlgorithm = parameters.getBoolean("directed");
GraphAnalytic vm;
GraphAnalytic em;
switch(parameters.get("input", "")) {
case "csv":
{
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
switch(parameters.get("type", "")) {
case "integer":
{
Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>());
}
vm = graph.run(new org.apache.flink.graph.library.metric.directed.VertexMetrics<LongValue, NullValue, NullValue>());
em = graph.run(new org.apache.flink.graph.library.metric.directed.EdgeMetrics<LongValue, NullValue, NullValue>());
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(false));
}
vm = graph.run(new org.apache.flink.graph.library.metric.undirected.VertexMetrics<LongValue, NullValue, NullValue>());
em = graph.run(new org.apache.flink.graph.library.metric.undirected.EdgeMetrics<LongValue, NullValue, NullValue>());
}
}
break;
case "string":
{
Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<StringValue, NullValue, NullValue>());
}
vm = graph.run(new org.apache.flink.graph.library.metric.directed.VertexMetrics<StringValue, NullValue, NullValue>());
em = graph.run(new org.apache.flink.graph.library.metric.directed.EdgeMetrics<StringValue, NullValue, NullValue>());
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>(false));
}
vm = graph.run(new org.apache.flink.graph.library.metric.undirected.VertexMetrics<StringValue, NullValue, NullValue>());
em = graph.run(new org.apache.flink.graph.library.metric.undirected.EdgeMetrics<StringValue, NullValue, NullValue>());
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid CSV type"));
}
}
break;
case "rmat":
{
int scale = parameters.getInt("scale", DEFAULT_SCALE);
int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
long vertexCount = 1L << scale;
long edgeCount = vertexCount * edgeFactor;
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate();
if (directedAlgorithm) {
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> newGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>());
vm = newGraph.run(new org.apache.flink.graph.library.metric.directed.VertexMetrics<LongValue, NullValue, NullValue>());
em = newGraph.run(new org.apache.flink.graph.library.metric.directed.EdgeMetrics<LongValue, NullValue, NullValue>());
} else {
Graph<IntValue, NullValue, NullValue> newGraph = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue())).run(new org.apache.flink.graph.asm.simple.directed.Simplify<IntValue, NullValue, NullValue>());
vm = newGraph.run(new org.apache.flink.graph.library.metric.directed.VertexMetrics<IntValue, NullValue, NullValue>());
em = newGraph.run(new org.apache.flink.graph.library.metric.directed.EdgeMetrics<IntValue, NullValue, NullValue>());
}
} else {
boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP);
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> newGraph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(clipAndFlip));
vm = newGraph.run(new org.apache.flink.graph.library.metric.undirected.VertexMetrics<LongValue, NullValue, NullValue>());
em = newGraph.run(new org.apache.flink.graph.library.metric.undirected.EdgeMetrics<LongValue, NullValue, NullValue>());
} else {
Graph<IntValue, NullValue, NullValue> newGraph = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue())).run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>(clipAndFlip));
vm = newGraph.run(new org.apache.flink.graph.library.metric.undirected.VertexMetrics<IntValue, NullValue, NullValue>());
em = newGraph.run(new org.apache.flink.graph.library.metric.undirected.EdgeMetrics<IntValue, NullValue, NullValue>());
}
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid input type"));
}
env.execute("Graph Metrics");
System.out.println();
System.out.print("Vertex metrics:\n ");
System.out.println(vm.getResult().toString().replace(";", "\n "));
System.out.println();
System.out.print("Edge metrics:\n ");
System.out.println(em.getResult().toString().replace(";", "\n "));
JobExecutionResult result = env.getLastJobExecutionResult();
NumberFormat nf = NumberFormat.getInstance();
System.out.println();
System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
use of org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory in project flink by apache.
the class TriangleListing method main.
public static void main(String[] args) throws Exception {
// Set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
ParameterTool parameters = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(parameters);
if (!parameters.has("directed")) {
throw new ProgramParametrizationException(getUsage("must declare execution mode as '--directed true' or '--directed false'"));
}
boolean directedAlgorithm = parameters.getBoolean("directed");
int little_parallelism = parameters.getInt("little_parallelism", PARALLELISM_DEFAULT);
boolean triadic_census = parameters.getBoolean("triadic_census", DEFAULT_TRIADIC_CENSUS);
GraphAnalytic tc = null;
DataSet tl;
switch(parameters.get("input", "")) {
case "csv":
{
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
switch(parameters.get("type", "")) {
case "integer":
{
Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
case "string":
{
Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<StringValue, NullValue, NullValue>().setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid CSV type"));
}
}
break;
case "rmat":
{
int scale = parameters.getInt("scale", DEFAULT_SCALE);
int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
long vertexCount = 1L << scale;
long edgeCount = vertexCount * edgeFactor;
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate();
if (directedAlgorithm) {
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
} else {
boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP);
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
Graph<IntValue, NullValue, NullValue> simpleGraph = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)).run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid input type"));
}
switch(parameters.get("output", "")) {
case "print":
System.out.println();
if (directedAlgorithm) {
for (Object e : tl.collect()) {
org.apache.flink.graph.library.clustering.directed.TriangleListing.Result result = (org.apache.flink.graph.library.clustering.directed.TriangleListing.Result) e;
System.out.println(result.toPrintableString());
}
} else {
tl.print();
}
break;
case "hash":
System.out.println();
System.out.println(DataSetUtils.checksumHashCode(tl));
break;
case "csv":
String filename = parameters.getRequired("output_filename");
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
tl.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
env.execute();
break;
default:
throw new ProgramParametrizationException(getUsage("invalid output type"));
}
if (tc != null) {
System.out.print("Triadic census:\n ");
System.out.println(tc.getResult().toString().replace(";", "\n "));
}
JobExecutionResult result = env.getLastJobExecutionResult();
NumberFormat nf = NumberFormat.getInstance();
System.out.println();
System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
use of org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory in project flink by apache.
the class AsmTestBase method undirectedRMatGraph.
/**
* Generate an undirected RMat graph. Tests are usually run on a graph with scale=10 and
* edgeFactor=16 but algorithms generating very large DataSets require smaller input graphs.
*
* <p>The examples program can write this graph as a CSV file for verifying algorithm results
* with external libraries:
*
* <pre>
* ./bin/flink run examples/flink-gelly-examples_*.jar --algorithm EdgeList \
* --input RMatGraph --type long --simplify undirected --scale $SCALE --edge_factor $EDGE_FACTOR \
* --output csv --filename undirectedRMatGraph.csv
* </pre>
*
* @param scale vertices are generated in the range [0, 2<sup>scale</sup>)
* @param edgeFactor the edge count is {@code edgeFactor} * 2<sup>scale</sup>
* @return undirected RMat graph
* @throws Exception on error
*/
protected Graph<LongValue, NullValue, NullValue> undirectedRMatGraph(int scale, int edgeFactor) throws Exception {
long vertexCount = 1L << scale;
long edgeCount = edgeFactor * vertexCount;
return new RMatGraph<>(env, new JDKRandomGeneratorFactory(), vertexCount, edgeCount).generate().run(new org.apache.flink.graph.asm.simple.undirected.Simplify<>(false));
}
Aggregations