Search in sources :

Example 21 with NullValue

use of org.apache.flink.types.NullValue in project flink by apache.

the class GraphCreationITCase method testFromTuple2.

@Test
public void testFromTuple2() throws Exception {
    /*
		 * Test graph creation with fromTuple2DataSet
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple2<Long, Long>> edges = TestGraphUtils.getLongLongTuple2Data(env);
    Graph<Long, NullValue, NullValue> graph = Graph.fromTuple2DataSet(edges, env);
    List<Vertex<Long, NullValue>> result = graph.getVertices().collect();
    expectedResult = "1,(null)\n" + "2,(null)\n" + "3,(null)\n" + "4,(null)\n" + "6,(null)\n" + "10,(null)\n" + "20,(null)\n" + "30,(null)\n" + "40,(null)\n" + "60,(null)\n";
    compareResultAsTuples(result, expectedResult);
}
Also used : Vertex(org.apache.flink.graph.Vertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) NullValue(org.apache.flink.types.NullValue) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 22 with NullValue

use of org.apache.flink.types.NullValue in project flink by apache.

the class VertexMetricsTest method testWithEmptyGraph.

@Test
public void testWithEmptyGraph() throws Exception {
    Result expectedResult;
    expectedResult = new Result(0, 0, 0, 0, 0, 0, 0, 0);
    Result withoutZeroDegreeVertices = new VertexMetrics<LongValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(false).run(emptyGraph).execute();
    assertEquals(expectedResult, withoutZeroDegreeVertices);
    assertEquals(Float.NaN, withoutZeroDegreeVertices.getAverageDegree(), ACCURACY);
    assertEquals(Float.NaN, withoutZeroDegreeVertices.getDensity(), ACCURACY);
    expectedResult = new Result(3, 0, 0, 0, 0, 0, 0, 0);
    Result withZeroDegreeVertices = new VertexMetrics<LongValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(true).run(emptyGraph).execute();
    assertEquals(expectedResult, withZeroDegreeVertices);
    assertEquals(0.0f, withZeroDegreeVertices.getAverageDegree(), ACCURACY);
    assertEquals(0.0f, withZeroDegreeVertices.getDensity(), ACCURACY);
}
Also used : NullValue(org.apache.flink.types.NullValue) LongValue(org.apache.flink.types.LongValue) Result(org.apache.flink.graph.library.metric.directed.VertexMetrics.Result) Test(org.junit.Test)

Example 23 with NullValue

use of org.apache.flink.types.NullValue in project flink by apache.

the class JaccardIndexTest method testRMatGraph.

@Test
public void testRMatGraph() throws Exception {
    long vertexCount = 1 << 8;
    long edgeCount = 8 * vertexCount;
    RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
    Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate().run(new Simplify<LongValue, NullValue, NullValue>(false));
    DataSet<Result<LongValue>> ji = graph.run(new JaccardIndex<LongValue, NullValue, NullValue>().setGroupSize(4));
    Checksum checksum = new ChecksumHashCode<Result<LongValue>>().run(ji).execute();
    assertEquals(13954, checksum.getCount());
    assertEquals(0x00001b1a1f7a9d0bL, checksum.getChecksum());
}
Also used : JDKRandomGeneratorFactory(org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory) ChecksumHashCode(org.apache.flink.graph.asm.dataset.ChecksumHashCode) Result(org.apache.flink.graph.library.similarity.JaccardIndex.Result) RMatGraph(org.apache.flink.graph.generator.RMatGraph) NullValue(org.apache.flink.types.NullValue) Checksum(org.apache.flink.graph.asm.dataset.ChecksumHashCode.Checksum) LongValue(org.apache.flink.types.LongValue) JDKRandomGenerator(org.apache.commons.math3.random.JDKRandomGenerator) Test(org.junit.Test)

Example 24 with NullValue

use of org.apache.flink.types.NullValue in project flink by apache.

the class TriangleListing method main.

public static void main(String[] args) throws Exception {
    // Set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableObjectReuse();
    ParameterTool parameters = ParameterTool.fromArgs(args);
    env.getConfig().setGlobalJobParameters(parameters);
    if (!parameters.has("directed")) {
        throw new ProgramParametrizationException(getUsage("must declare execution mode as '--directed true' or '--directed false'"));
    }
    boolean directedAlgorithm = parameters.getBoolean("directed");
    int little_parallelism = parameters.getInt("little_parallelism", PARALLELISM_DEFAULT);
    boolean triadic_census = parameters.getBoolean("triadic_census", DEFAULT_TRIADIC_CENSUS);
    GraphAnalytic tc = null;
    DataSet tl;
    switch(parameters.get("input", "")) {
        case "csv":
            {
                String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
                String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
                GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
                switch(parameters.get("type", "")) {
                    case "integer":
                        {
                            Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class);
                            if (directedAlgorithm) {
                                if (parameters.getBoolean("simplify", false)) {
                                    graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
                                }
                                if (triadic_census) {
                                    tc = graph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                                }
                                tl = graph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                            } else {
                                if (parameters.getBoolean("simplify", false)) {
                                    graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
                                }
                                if (triadic_census) {
                                    tc = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                                }
                                tl = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                            }
                        }
                        break;
                    case "string":
                        {
                            Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class);
                            if (directedAlgorithm) {
                                if (parameters.getBoolean("simplify", false)) {
                                    graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<StringValue, NullValue, NullValue>().setParallelism(little_parallelism));
                                }
                                if (triadic_census) {
                                    tc = graph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                                }
                                tl = graph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                            } else {
                                if (parameters.getBoolean("simplify", false)) {
                                    graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
                                }
                                if (triadic_census) {
                                    tc = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                                }
                                tl = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                            }
                        }
                        break;
                    default:
                        throw new ProgramParametrizationException(getUsage("invalid CSV type"));
                }
            }
            break;
        case "rmat":
            {
                int scale = parameters.getInt("scale", DEFAULT_SCALE);
                int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
                RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
                long vertexCount = 1L << scale;
                long edgeCount = vertexCount * edgeFactor;
                Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate();
                if (directedAlgorithm) {
                    if (scale > 32) {
                        Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
                        if (triadic_census) {
                            tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                        }
                        tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                    } else {
                        Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
                        if (triadic_census) {
                            tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                        }
                        tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                    }
                } else {
                    boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP);
                    if (scale > 32) {
                        Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
                        if (triadic_census) {
                            tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                        }
                        tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                    } else {
                        Graph<IntValue, NullValue, NullValue> simpleGraph = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)).run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
                        if (triadic_census) {
                            tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                        }
                        tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                    }
                }
            }
            break;
        default:
            throw new ProgramParametrizationException(getUsage("invalid input type"));
    }
    switch(parameters.get("output", "")) {
        case "print":
            System.out.println();
            if (directedAlgorithm) {
                for (Object e : tl.collect()) {
                    org.apache.flink.graph.library.clustering.directed.TriangleListing.Result result = (org.apache.flink.graph.library.clustering.directed.TriangleListing.Result) e;
                    System.out.println(result.toPrintableString());
                }
            } else {
                tl.print();
            }
            break;
        case "hash":
            System.out.println();
            System.out.println(DataSetUtils.checksumHashCode(tl));
            break;
        case "csv":
            String filename = parameters.getRequired("output_filename");
            String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
            String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
            tl.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
            env.execute();
            break;
        default:
            throw new ProgramParametrizationException(getUsage("invalid output type"));
    }
    if (tc != null) {
        System.out.print("Triadic census:\n  ");
        System.out.println(tc.getResult().toString().replace(";", "\n "));
    }
    JobExecutionResult result = env.getLastJobExecutionResult();
    NumberFormat nf = NumberFormat.getInstance();
    System.out.println();
    System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
Also used : RandomGenerableFactory(org.apache.flink.graph.generator.random.RandomGenerableFactory) DataSet(org.apache.flink.api.java.DataSet) GraphAnalytic(org.apache.flink.graph.GraphAnalytic) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) NullValue(org.apache.flink.types.NullValue) StringValue(org.apache.flink.types.StringValue) LongValueToUnsignedIntValue(org.apache.flink.graph.asm.translate.translators.LongValueToUnsignedIntValue) IntValue(org.apache.flink.types.IntValue) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) GraphCsvReader(org.apache.flink.graph.GraphCsvReader) RMatGraph(org.apache.flink.graph.generator.RMatGraph) RMatGraph(org.apache.flink.graph.generator.RMatGraph) Graph(org.apache.flink.graph.Graph) LongValue(org.apache.flink.types.LongValue) ParameterTool(org.apache.flink.api.java.utils.ParameterTool) LongValueToUnsignedIntValue(org.apache.flink.graph.asm.translate.translators.LongValueToUnsignedIntValue) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) JDKRandomGeneratorFactory(org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory) TranslateGraphIds(org.apache.flink.graph.asm.translate.TranslateGraphIds) ProgramParametrizationException(org.apache.flink.client.program.ProgramParametrizationException) NumberFormat(java.text.NumberFormat)

Example 25 with NullValue

use of org.apache.flink.types.NullValue in project flink by apache.

the class MusicProfiles method main.

public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    /**
		 * Read the user-song-play triplets.
		 */
    DataSet<Tuple3<String, String, Integer>> triplets = getUserSongTripletsData(env);
    /**
		 * Read the mismatches dataset and extract the songIDs
		 */
    DataSet<Tuple1<String>> mismatches = getMismatchesData(env).map(new ExtractMismatchSongIds());
    /**
		 * Filter out the mismatches from the triplets dataset
		 */
    DataSet<Tuple3<String, String, Integer>> validTriplets = triplets.coGroup(mismatches).where(1).equalTo(0).with(new FilterOutMismatches());
    /**
		 * Create a user -> song weighted bipartite graph where the edge weights
		 * correspond to play counts
		 */
    Graph<String, NullValue, Integer> userSongGraph = Graph.fromTupleDataSet(validTriplets, env);
    /**
		 * Get the top track (most listened) for each user
		 */
    DataSet<Tuple2<String, String>> usersWithTopTrack = userSongGraph.groupReduceOnEdges(new GetTopSongPerUser(), EdgeDirection.OUT).filter(new FilterSongNodes());
    if (fileOutput) {
        usersWithTopTrack.writeAsCsv(topTracksOutputPath, "\n", "\t");
    } else {
        usersWithTopTrack.print();
    }
    /**
		 * Create a user-user similarity graph, based on common songs, i.e. two
		 * users that listen to the same song are connected. For each song, we
		 * create an edge between each pair of its in-neighbors.
		 */
    DataSet<Edge<String, NullValue>> similarUsers = userSongGraph.getEdges().filter(new FilterFunction<Edge<String, Integer>>() {

        public boolean filter(Edge<String, Integer> edge) {
            return (edge.getValue() > playcountThreshold);
        }
    }).groupBy(1).reduceGroup(new CreateSimilarUserEdges()).distinct();
    Graph<String, Long, NullValue> similarUsersGraph = Graph.fromDataSet(similarUsers, new MapFunction<String, Long>() {

        public Long map(String value) {
            return 1l;
        }
    }, env).getUndirected();
    /**
		 * Detect user communities using the label propagation library method
		 */
    // Initialize each vertex with a unique numeric label and run the label propagation algorithm
    DataSet<Tuple2<String, Long>> idsWithInitialLabels = DataSetUtils.zipWithUniqueId(similarUsersGraph.getVertexIds()).map(new MapFunction<Tuple2<Long, String>, Tuple2<String, Long>>() {

        @Override
        public Tuple2<String, Long> map(Tuple2<Long, String> tuple2) throws Exception {
            return new Tuple2<String, Long>(tuple2.f1, tuple2.f0);
        }
    });
    DataSet<Vertex<String, Long>> verticesWithCommunity = similarUsersGraph.joinWithVertices(idsWithInitialLabels, new VertexJoinFunction<Long, Long>() {

        public Long vertexJoin(Long vertexValue, Long inputValue) {
            return inputValue;
        }
    }).run(new LabelPropagation<String, Long, NullValue>(maxIterations));
    if (fileOutput) {
        verticesWithCommunity.writeAsCsv(communitiesOutputPath, "\n", "\t");
        // since file sinks are lazy, we trigger the execution explicitly
        env.execute();
    } else {
        verticesWithCommunity.print();
    }
}
Also used : VertexJoinFunction(org.apache.flink.graph.VertexJoinFunction) Vertex(org.apache.flink.graph.Vertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) NullValue(org.apache.flink.types.NullValue) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Edge(org.apache.flink.graph.Edge)

Aggregations

NullValue (org.apache.flink.types.NullValue)49 Test (org.junit.Test)39 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)33 LongValue (org.apache.flink.types.LongValue)23 Edge (org.apache.flink.graph.Edge)18 Vertex (org.apache.flink.graph.Vertex)18 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)15 Checksum (org.apache.flink.graph.asm.dataset.ChecksumHashCode.Checksum)13 Graph (org.apache.flink.graph.Graph)12 DataSet (org.apache.flink.api.java.DataSet)11 ChecksumHashCode (org.apache.flink.graph.asm.dataset.ChecksumHashCode)11 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)7 JDKRandomGeneratorFactory (org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory)7 NumberFormat (java.text.NumberFormat)6 JobExecutionResult (org.apache.flink.api.common.JobExecutionResult)6 Plan (org.apache.flink.api.common.Plan)6 MapFunction (org.apache.flink.api.common.functions.MapFunction)6 FieldList (org.apache.flink.api.common.operators.util.FieldList)6 ParameterTool (org.apache.flink.api.java.utils.ParameterTool)6 ProgramParametrizationException (org.apache.flink.client.program.ProgramParametrizationException)6