Search in sources :

Example 11 with ComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.ComputerResult in project grakn by graknlabs.

the class TinkerComputeQueryRunner method run.

public TinkerComputeJob<List<List<Concept>>> run(PathsQuery query) {
    return runCompute(query, tinkerComputeQuery -> {
        ConceptId sourceId = query.from();
        ConceptId destinationId = query.to();
        if (!tinkerComputeQuery.verticesExistInSubgraph(sourceId, destinationId)) {
            throw GraqlQueryException.instanceDoesNotExist();
        }
        if (sourceId.equals(destinationId)) {
            return Collections.singletonList(Collections.singletonList(tx.getConcept(sourceId)));
        }
        ComputerResult result;
        Set<LabelId> subLabelIds = convertLabelsToIds(tinkerComputeQuery.subLabels());
        try {
            result = tinkerComputeQuery.compute(new ShortestPathVertexProgram(sourceId, destinationId), null, subLabelIds);
        } catch (NoResultException e) {
            return Collections.emptyList();
        }
        Multimap<Concept, Concept> predecessorMapFromSource = tinkerComputeQuery.getPredecessorMap(result);
        List<List<Concept>> allPaths = tinkerComputeQuery.getAllPaths(predecessorMapFromSource, sourceId);
        if (tinkerComputeQuery.isAttributeIncluded()) {
            // this can be slow
            return tinkerComputeQuery.getExtendedPaths(allPaths);
        }
        LOG.info("Number of paths: " + allPaths.size());
        return allPaths;
    });
}
Also used : Concept(ai.grakn.concept.Concept) SchemaConcept(ai.grakn.concept.SchemaConcept) ShortestPathVertexProgram(ai.grakn.graql.internal.analytics.ShortestPathVertexProgram) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) List(java.util.List) LabelId(ai.grakn.concept.LabelId) NoResultException(ai.grakn.graql.internal.analytics.NoResultException) ConceptId(ai.grakn.concept.ConceptId)

Example 12 with ComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.ComputerResult in project grakn by graknlabs.

the class GraknSparkComputer method submitWithExecutor.

@SuppressWarnings("PMD.UnusedFormalParameter")
private Future<ComputerResult> submitWithExecutor() {
    jobGroupId = Integer.toString(ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE));
    String jobDescription = this.vertexProgram == null ? this.mapReducers.toString() : this.vertexProgram + "+" + this.mapReducers;
    // Use different output locations
    this.sparkConfiguration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, this.sparkConfiguration.getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION) + "/" + jobGroupId);
    updateConfigKeys(sparkConfiguration);
    final Future<ComputerResult> result = computerService.submit(() -> {
        final long startTime = System.currentTimeMillis();
        // apache and hadoop configurations that are used throughout the graph computer computation
        final org.apache.commons.configuration.Configuration graphComputerConfiguration = new HadoopConfiguration(this.sparkConfiguration);
        if (!graphComputerConfiguration.containsKey(Constants.SPARK_SERIALIZER)) {
            graphComputerConfiguration.setProperty(Constants.SPARK_SERIALIZER, GryoSerializer.class.getCanonicalName());
        }
        graphComputerConfiguration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES, this.persist.equals(GraphComputer.Persist.EDGES));
        final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(graphComputerConfiguration);
        final Storage fileSystemStorage = FileSystemStorage.open(hadoopConfiguration);
        final boolean inputFromHDFS = FileInputFormat.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean inputFromSpark = PersistedInputRDD.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean outputToHDFS = FileOutputFormat.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean outputToSpark = PersistedOutputRDD.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean skipPartitioner = graphComputerConfiguration.getBoolean(Constants.GREMLIN_SPARK_SKIP_PARTITIONER, false);
        final boolean skipPersist = graphComputerConfiguration.getBoolean(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE, false);
        if (inputFromHDFS) {
            String inputLocation = Constants.getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION), fileSystemStorage).orElse(null);
            if (null != inputLocation) {
                try {
                    graphComputerConfiguration.setProperty(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath().toString());
                    hadoopConfiguration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath().toString());
                } catch (final IOException e) {
                    throw new IllegalStateException(e.getMessage(), e);
                }
            }
        }
        final InputRDD inputRDD;
        final OutputRDD outputRDD;
        final boolean filtered;
        try {
            inputRDD = InputRDD.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class)) ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, InputRDD.class, InputRDD.class).newInstance() : InputFormatRDD.class.newInstance();
            outputRDD = OutputRDD.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class)) ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, OutputRDD.class, OutputRDD.class).newInstance() : OutputFormatRDD.class.newInstance();
            // if the input class can filter on load, then set the filters
            if (inputRDD instanceof InputFormatRDD && GraphFilterAware.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class, InputFormat.class))) {
                GraphFilterAware.storeGraphFilter(graphComputerConfiguration, hadoopConfiguration, this.graphFilter);
                filtered = false;
            } else if (inputRDD instanceof GraphFilterAware) {
                ((GraphFilterAware) inputRDD).setGraphFilter(this.graphFilter);
                filtered = false;
            } else
                filtered = this.graphFilter.hasFilter();
        } catch (final InstantiationException | IllegalAccessException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }
        // create the spark context from the graph computer configuration
        final JavaSparkContext sparkContext = new JavaSparkContext(Spark.create(hadoopConfiguration));
        final Storage sparkContextStorage = SparkContextStorage.open();
        sparkContext.setJobGroup(jobGroupId, jobDescription);
        GraknSparkMemory memory = null;
        // delete output location
        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);
        if (null != outputLocation) {
            if (outputToHDFS && fileSystemStorage.exists(outputLocation)) {
                fileSystemStorage.rm(outputLocation);
            }
            if (outputToSpark && sparkContextStorage.exists(outputLocation)) {
                sparkContextStorage.rm(outputLocation);
            }
        }
        // the Spark application name will always be set by SparkContextStorage,
        // thus, INFO the name to make it easier to debug
        logger.debug(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX + (null == this.vertexProgram ? "No VertexProgram" : this.vertexProgram) + "[" + this.mapReducers + "]");
        // add the project jars to the cluster
        this.loadJars(hadoopConfiguration, sparkContext);
        updateLocalConfiguration(sparkContext, hadoopConfiguration);
        // create a message-passing friendly rdd from the input rdd
        boolean partitioned = false;
        JavaPairRDD<Object, VertexWritable> loadedGraphRDD = inputRDD.readGraphRDD(graphComputerConfiguration, sparkContext);
        // if there are vertex or edge filters, filter the loaded graph rdd prior to partitioning and persisting
        if (filtered) {
            this.logger.debug("Filtering the loaded graphRDD: " + this.graphFilter);
            loadedGraphRDD = GraknSparkExecutor.applyGraphFilter(loadedGraphRDD, this.graphFilter);
        }
        // else partition it with HashPartitioner
        if (loadedGraphRDD.partitioner().isPresent()) {
            this.logger.debug("Using the existing partitioner associated with the loaded graphRDD: " + loadedGraphRDD.partitioner().get());
        } else {
            if (!skipPartitioner) {
                final Partitioner partitioner = new HashPartitioner(this.workersSet ? this.workers : loadedGraphRDD.partitions().size());
                this.logger.debug("Partitioning the loaded graphRDD: " + partitioner);
                loadedGraphRDD = loadedGraphRDD.partitionBy(partitioner);
                partitioned = true;
                assert loadedGraphRDD.partitioner().isPresent();
            } else {
                // no easy way to test this with a test case
                assert skipPartitioner == !loadedGraphRDD.partitioner().isPresent();
                this.logger.debug("Partitioning has been skipped for the loaded graphRDD via " + Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            }
        }
        // then this coalesce/repartition will not take place
        if (this.workersSet) {
            // ensures that the loaded graphRDD does not have more partitions than workers
            if (loadedGraphRDD.partitions().size() > this.workers) {
                loadedGraphRDD = loadedGraphRDD.coalesce(this.workers);
            } else {
                // ensures that the loaded graphRDD does not have less partitions than workers
                if (loadedGraphRDD.partitions().size() < this.workers) {
                    loadedGraphRDD = loadedGraphRDD.repartition(this.workers);
                }
            }
        }
        // or else use default cache() which is MEMORY_ONLY
        if (!skipPersist && (!inputFromSpark || partitioned || filtered)) {
            loadedGraphRDD = loadedGraphRDD.persist(StorageLevel.fromString(hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
        }
        // final graph with view
        // (for persisting and/or mapReducing -- may be null and thus, possible to save space/time)
        JavaPairRDD<Object, VertexWritable> computedGraphRDD = null;
        try {
            // //////////////////////////////
            if (null != this.vertexProgram) {
                memory = new GraknSparkMemory(this.vertexProgram, this.mapReducers, sparkContext);
                // if there is a registered VertexProgramInterceptor, use it to bypass the GraphComputer semantics
                if (graphComputerConfiguration.containsKey(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR)) {
                    try {
                        final GraknSparkVertexProgramInterceptor<VertexProgram> interceptor = (GraknSparkVertexProgramInterceptor) Class.forName(graphComputerConfiguration.getString(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR)).newInstance();
                        computedGraphRDD = interceptor.apply(this.vertexProgram, loadedGraphRDD, memory);
                    } catch (final ClassNotFoundException | IllegalAccessException | InstantiationException e) {
                        throw new IllegalStateException(e.getMessage());
                    }
                } else {
                    // standard GraphComputer semantics
                    // get a configuration that will be propagated to all workers
                    final HadoopConfiguration vertexProgramConfiguration = new HadoopConfiguration();
                    this.vertexProgram.storeState(vertexProgramConfiguration);
                    // set up the vertex program and wire up configurations
                    this.vertexProgram.setup(memory);
                    JavaPairRDD<Object, ViewIncomingPayload<Object>> viewIncomingRDD = null;
                    memory.broadcastMemory(sparkContext);
                    // execute the vertex program
                    while (true) {
                        if (Thread.interrupted()) {
                            sparkContext.cancelAllJobs();
                            throw new TraversalInterruptedException();
                        }
                        memory.setInExecute(true);
                        viewIncomingRDD = GraknSparkExecutor.executeVertexProgramIteration(loadedGraphRDD, viewIncomingRDD, memory, graphComputerConfiguration, vertexProgramConfiguration);
                        memory.setInExecute(false);
                        if (this.vertexProgram.terminate(memory)) {
                            break;
                        } else {
                            memory.incrIteration();
                            memory.broadcastMemory(sparkContext);
                        }
                    }
                    // then generate a view+graph
                    if ((null != outputRDD && !this.persist.equals(Persist.NOTHING)) || !this.mapReducers.isEmpty()) {
                        computedGraphRDD = GraknSparkExecutor.prepareFinalGraphRDD(loadedGraphRDD, viewIncomingRDD, this.vertexProgram.getVertexComputeKeys());
                        assert null != computedGraphRDD && computedGraphRDD != loadedGraphRDD;
                    } else {
                        // ensure that the computedGraphRDD was not created
                        assert null == computedGraphRDD;
                    }
                }
                // ///////////////
                // drop all transient memory keys
                memory.complete();
                // write the computed graph to the respective output (rdd or output format)
                if (null != outputRDD && !this.persist.equals(Persist.NOTHING)) {
                    // the logic holds that a computeGraphRDD must be created at this point
                    assert null != computedGraphRDD;
                    outputRDD.writeGraphRDD(graphComputerConfiguration, computedGraphRDD);
                }
            }
            final boolean computedGraphCreated = computedGraphRDD != null && computedGraphRDD != loadedGraphRDD;
            if (!computedGraphCreated) {
                computedGraphRDD = loadedGraphRDD;
            }
            final Memory.Admin finalMemory = null == memory ? new MapMemory() : new MapMemory(memory);
            // ////////////////////////////
            if (!this.mapReducers.isEmpty()) {
                // create a mapReduceRDD for executing the map reduce jobs on
                JavaPairRDD<Object, VertexWritable> mapReduceRDD = computedGraphRDD;
                if (computedGraphCreated && !outputToSpark) {
                    // drop all the edges of the graph as they are not used in mapReduce processing
                    mapReduceRDD = computedGraphRDD.mapValues(vertexWritable -> {
                        vertexWritable.get().dropEdges(Direction.BOTH);
                        return vertexWritable;
                    });
                    // if there is only one MapReduce to execute, don't bother wasting the clock cycles.
                    if (this.mapReducers.size() > 1) {
                        mapReduceRDD = mapReduceRDD.persist(StorageLevel.fromString(hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
                    }
                }
                for (final MapReduce mapReduce : this.mapReducers) {
                    // execute the map reduce job
                    final HadoopConfiguration newApacheConfiguration = new HadoopConfiguration(graphComputerConfiguration);
                    mapReduce.storeState(newApacheConfiguration);
                    // map
                    final JavaPairRDD mapRDD = GraknSparkExecutor.executeMap(mapReduceRDD, mapReduce, newApacheConfiguration);
                    // combine
                    final JavaPairRDD combineRDD = mapReduce.doStage(MapReduce.Stage.COMBINE) ? GraknSparkExecutor.executeCombine(mapRDD, newApacheConfiguration) : mapRDD;
                    // reduce
                    final JavaPairRDD reduceRDD = mapReduce.doStage(MapReduce.Stage.REDUCE) ? GraknSparkExecutor.executeReduce(combineRDD, mapReduce, newApacheConfiguration) : combineRDD;
                    // write the map reduce output back to disk and computer result memory
                    if (null != outputRDD) {
                        mapReduce.addResultToMemory(finalMemory, outputRDD.writeMemoryRDD(graphComputerConfiguration, mapReduce.getMemoryKey(), reduceRDD));
                    }
                }
                // if the mapReduceRDD is not simply the computed graph, unpersist the mapReduceRDD
                if (computedGraphCreated && !outputToSpark) {
                    assert loadedGraphRDD != computedGraphRDD;
                    assert mapReduceRDD != computedGraphRDD;
                    mapReduceRDD.unpersist();
                } else {
                    assert mapReduceRDD == computedGraphRDD;
                }
            }
            // if the graphRDD was loaded from Spark, but then partitioned or filtered, its a different RDD
            if (!inputFromSpark || partitioned || filtered) {
                loadedGraphRDD.unpersist();
            }
            // then don't unpersist the computedGraphRDD/loadedGraphRDD
            if ((!outputToSpark || this.persist.equals(GraphComputer.Persist.NOTHING)) && computedGraphCreated) {
                computedGraphRDD.unpersist();
            }
            // delete any file system or rdd data if persist nothing
            if (null != outputLocation && this.persist.equals(GraphComputer.Persist.NOTHING)) {
                if (outputToHDFS) {
                    fileSystemStorage.rm(outputLocation);
                }
                if (outputToSpark) {
                    sparkContextStorage.rm(outputLocation);
                }
            }
            // update runtime and return the newly computed graph
            finalMemory.setRuntime(System.currentTimeMillis() - startTime);
            // clear properties that should not be propagated in an OLAP chain
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_GRAPH_FILTER);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            return new DefaultComputerResult(InputOutputHelper.getOutputGraph(graphComputerConfiguration, this.resultGraph, this.persist), finalMemory.asImmutable());
        } catch (Exception e) {
            // So it throws the same exception as tinker does
            throw new RuntimeException(e);
        }
    });
    computerService.shutdown();
    return result;
}
Also used : InputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.InputRDD) PersistedInputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.PersistedInputRDD) TraversalInterruptedException(org.apache.tinkerpop.gremlin.process.traversal.util.TraversalInterruptedException) GryoSerializer(org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer) FileSystem(org.apache.hadoop.fs.FileSystem) GraphFilterAware(org.apache.tinkerpop.gremlin.hadoop.structure.io.GraphFilterAware) GraphComputer(org.apache.tinkerpop.gremlin.process.computer.GraphComputer) LoggerFactory(org.slf4j.LoggerFactory) SparkContextStorage(org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorage) Future(java.util.concurrent.Future) Partitioner(org.apache.spark.Partitioner) StorageLevel(org.apache.spark.storage.StorageLevel) Constants(org.apache.tinkerpop.gremlin.hadoop.Constants) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) ThreadFactory(java.util.concurrent.ThreadFactory) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) InputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.InputRDD) HadoopConfiguration(org.apache.tinkerpop.gremlin.hadoop.structure.HadoopConfiguration) OutputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.OutputRDD) HashPartitioner(org.apache.spark.HashPartitioner) Set(java.util.Set) BasicThreadFactory(org.apache.commons.lang3.concurrent.BasicThreadFactory) Executors(java.util.concurrent.Executors) SparkSingleIterationStrategy(org.apache.tinkerpop.gremlin.spark.process.computer.traversal.strategy.optimization.SparkSingleIterationStrategy) Memory(org.apache.tinkerpop.gremlin.process.computer.Memory) OutputFormatRDD(org.apache.tinkerpop.gremlin.spark.structure.io.OutputFormatRDD) InputFormatRDD(org.apache.tinkerpop.gremlin.spark.structure.io.InputFormatRDD) MapMemory(org.apache.tinkerpop.gremlin.process.computer.util.MapMemory) FileConfiguration(org.apache.commons.configuration.FileConfiguration) TraversalStrategies(org.apache.tinkerpop.gremlin.process.traversal.TraversalStrategies) TraversalInterruptedException(org.apache.tinkerpop.gremlin.process.traversal.util.TraversalInterruptedException) ConfigurationUtils(org.apache.commons.configuration.ConfigurationUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) ComputerSubmissionHelper(org.apache.tinkerpop.gremlin.hadoop.process.computer.util.ComputerSubmissionHelper) VertexProgram(org.apache.tinkerpop.gremlin.process.computer.VertexProgram) HashSet(java.util.HashSet) VertexWritable(org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) AbstractHadoopGraphComputer(org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer) FileInputFormat(org.apache.hadoop.mapreduce.lib.input.FileInputFormat) ExecutorService(java.util.concurrent.ExecutorService) FileSystemStorage(org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage) ConfUtil(org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil) ViewIncomingPayload(org.apache.tinkerpop.gremlin.spark.process.computer.payload.ViewIncomingPayload) Logger(org.slf4j.Logger) SparkLauncher(org.apache.spark.launcher.SparkLauncher) InputFormat(org.apache.hadoop.mapreduce.InputFormat) InputOutputHelper(org.apache.tinkerpop.gremlin.spark.structure.io.InputOutputHelper) Spark(org.apache.tinkerpop.gremlin.spark.structure.Spark) IOException(java.io.IOException) SparkInterceptorStrategy(org.apache.tinkerpop.gremlin.spark.process.computer.traversal.strategy.optimization.SparkInterceptorStrategy) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) File(java.io.File) PersistedOutputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) Direction(org.apache.tinkerpop.gremlin.structure.Direction) HadoopGraph(org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph) PersistedInputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.PersistedInputRDD) Storage(org.apache.tinkerpop.gremlin.structure.io.Storage) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce) VertexWritable(org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable) Configuration(org.apache.hadoop.conf.Configuration) HadoopConfiguration(org.apache.tinkerpop.gremlin.hadoop.structure.HadoopConfiguration) FileConfiguration(org.apache.commons.configuration.FileConfiguration) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) Memory(org.apache.tinkerpop.gremlin.process.computer.Memory) MapMemory(org.apache.tinkerpop.gremlin.process.computer.util.MapMemory) ViewIncomingPayload(org.apache.tinkerpop.gremlin.spark.process.computer.payload.ViewIncomingPayload) InputFormatRDD(org.apache.tinkerpop.gremlin.spark.structure.io.InputFormatRDD) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce) GraphFilterAware(org.apache.tinkerpop.gremlin.hadoop.structure.io.GraphFilterAware) MapMemory(org.apache.tinkerpop.gremlin.process.computer.util.MapMemory) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Partitioner(org.apache.spark.Partitioner) HashPartitioner(org.apache.spark.HashPartitioner) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) VertexProgram(org.apache.tinkerpop.gremlin.process.computer.VertexProgram) TraversalInterruptedException(org.apache.tinkerpop.gremlin.process.traversal.util.TraversalInterruptedException) IOException(java.io.IOException) OutputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.OutputRDD) PersistedOutputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD) SparkContextStorage(org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorage) FileSystemStorage(org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage) Storage(org.apache.tinkerpop.gremlin.structure.io.Storage) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) HashPartitioner(org.apache.spark.HashPartitioner) GryoSerializer(org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer) HadoopConfiguration(org.apache.tinkerpop.gremlin.hadoop.structure.HadoopConfiguration)

Example 13 with ComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.ComputerResult in project janusgraph by JanusGraph.

the class OLAPTest method degreeCounting.

@Test
public void degreeCounting() throws Exception {
    int numV = 200;
    int numE = generateRandomGraph(numV);
    clopen();
    final JanusGraphComputer computer = graph.compute();
    computer.resultMode(JanusGraphComputer.ResultMode.NONE);
    computer.workers(4);
    computer.program(new DegreeCounter());
    computer.mapReduce(new DegreeMapper());
    ComputerResult result = computer.submit().get();
    System.out.println("Execution time (ms) [" + numV + "|" + numE + "]: " + result.memory().getRuntime());
    assertTrue(result.memory().exists(DegreeMapper.DEGREE_RESULT));
    Map<Long, Integer> degrees = result.memory().get(DegreeMapper.DEGREE_RESULT);
    assertNotNull(degrees);
    assertEquals(numV, degrees.size());
    int totalCount = 0;
    for (Map.Entry<Long, Integer> entry : degrees.entrySet()) {
        int degree = entry.getValue();
        final JanusGraphVertex v = getV(tx, entry.getKey());
        int count = v.value("uid");
        assertEquals(count, degree);
        totalCount += degree;
    }
    assertEquals(numV * (numV + 1) / 2, totalCount);
    assertEquals(1, result.memory().getIteration());
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) JanusGraphVertex(org.janusgraph.core.JanusGraphVertex) Map(java.util.Map) HashMap(java.util.HashMap) JanusGraphComputer(org.janusgraph.core.JanusGraphComputer) Test(org.junit.jupiter.api.Test) JanusGraphBaseTest(org.janusgraph.graphdb.JanusGraphBaseTest)

Example 14 with ComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.ComputerResult in project janusgraph by JanusGraph.

the class OLAPTest method degreeCountingDistance.

@Test
public void degreeCountingDistance() throws Exception {
    int numV = 100;
    int numE = generateRandomGraph(numV);
    clopen();
    // TODO does this iteration over JanusGraphComputer.ResultMode values imply that DegreeVariation's ResultGraph/Persist should also change?
    for (JanusGraphComputer.ResultMode mode : JanusGraphComputer.ResultMode.values()) {
        final JanusGraphComputer computer = graph.compute();
        computer.resultMode(mode);
        computer.workers(1);
        computer.program(new DegreeCounter(2));
        ComputerResult result = computer.submit().get();
        System.out.println("Execution time (ms) [" + numV + "|" + numE + "]: " + result.memory().getRuntime());
        assertEquals(2, result.memory().getIteration());
        Transaction gview = null;
        switch(mode) {
            case LOCALTX:
                gview = (Transaction) result.graph();
                break;
            case PERSIST:
                newTx();
                gview = tx;
                break;
            case NONE:
                break;
            default:
                throw new AssertionError(mode);
        }
        if (gview == null)
            continue;
        for (JanusGraphVertex v : gview.query().vertices()) {
            long degree2 = ((Integer) v.value(DegreeCounter.DEGREE)).longValue();
            long actualDegree2 = 0;
            for (JanusGraphVertex w : v.query().direction(Direction.OUT).vertices()) {
                actualDegree2 += Iterables.size(w.query().direction(Direction.OUT).vertices());
            }
            assertEquals(actualDegree2, degree2);
        }
        if (mode == JanusGraphComputer.ResultMode.LOCALTX) {
            assertTrue(gview instanceof JanusGraphTransaction);
            ((JanusGraphTransaction) gview).rollback();
        }
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) JanusGraphTransaction(org.janusgraph.core.JanusGraphTransaction) Transaction(org.janusgraph.core.Transaction) JanusGraphTransaction(org.janusgraph.core.JanusGraphTransaction) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) JanusGraphVertex(org.janusgraph.core.JanusGraphVertex) JanusGraphComputer(org.janusgraph.core.JanusGraphComputer) Test(org.junit.jupiter.api.Test) JanusGraphBaseTest(org.janusgraph.graphdb.JanusGraphBaseTest)

Example 15 with ComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.ComputerResult in project janusgraph by JanusGraph.

the class OLAPTest method testShortestDistance.

@Test
public void testShortestDistance() throws Exception {
    PropertyKey distance = mgmt.makePropertyKey("distance").dataType(Integer.class).cardinality(Cardinality.SINGLE).make();
    mgmt.makeEdgeLabel("connect").signature(distance).multiplicity(Multiplicity.MULTI).make();
    finishSchema();
    int maxDepth = 16;
    int maxBranch = 5;
    JanusGraphVertex vertex = tx.addVertex();
    // Grow a star-shaped graph around vertex which will be the single-source for this shortest path computation
    final int numV = growVertex(vertex, 0, maxDepth, maxBranch);
    final int numE = numV - 1;
    assertCount(numV, tx.query().vertices());
    assertCount(numE, tx.query().edges());
    log.debug("seed inE count: {}", vertex.query().direction(Direction.IN).edgeCount());
    log.debug("seed outE count: {}", vertex.query().direction(Direction.OUT).edgeCount());
    clopen();
    final JanusGraphComputer computer = graph.compute();
    computer.resultMode(JanusGraphComputer.ResultMode.NONE);
    computer.workers(4);
    computer.program(ShortestDistanceVertexProgram.build().seed((long) vertex.id()).maxDepth(maxDepth + 4).create(graph));
    computer.mapReduce(ShortestDistanceMapReduce.build().create());
    ComputerResult result = computer.submit().get();
    Iterator<KeyValue<Long, Long>> distances = result.memory().get(ShortestDistanceMapReduce.DEFAULT_MEMORY_KEY);
    int vertexCount = 0;
    while (distances.hasNext()) {
        final KeyValue<Long, Long> kv = distances.next();
        final long dist = kv.getValue();
        assertTrue(dist >= 0 && dist < Integer.MAX_VALUE, "Invalid distance: " + dist);
        JanusGraphVertex v = getV(tx, kv.getKey());
        assertEquals(v.<Integer>value("distance").intValue(), dist);
        vertexCount++;
    }
    assertEquals(numV, vertexCount);
    assertTrue(0 < vertexCount);
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) KeyValue(org.apache.tinkerpop.gremlin.process.computer.KeyValue) JanusGraphVertex(org.janusgraph.core.JanusGraphVertex) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) PropertyKey(org.janusgraph.core.PropertyKey) JanusGraphComputer(org.janusgraph.core.JanusGraphComputer) Test(org.junit.jupiter.api.Test) JanusGraphBaseTest(org.janusgraph.graphdb.JanusGraphBaseTest)

Aggregations

ComputerResult (org.apache.tinkerpop.gremlin.process.computer.ComputerResult)15 LabelId (ai.grakn.concept.LabelId)7 Map (java.util.Map)5 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)5 JanusGraphComputer (org.janusgraph.core.JanusGraphComputer)5 AttributeType (ai.grakn.concept.AttributeType)4 List (java.util.List)4 MapReduce (org.apache.tinkerpop.gremlin.process.computer.MapReduce)4 Concept (ai.grakn.concept.Concept)3 ConceptId (ai.grakn.concept.ConceptId)3 SchemaConcept (ai.grakn.concept.SchemaConcept)3 ClusterMemberMapReduce (ai.grakn.graql.internal.analytics.ClusterMemberMapReduce)3 NoResultException (ai.grakn.graql.internal.analytics.NoResultException)3 JanusGraphVertex (org.janusgraph.core.JanusGraphVertex)3 JanusGraphBaseTest (org.janusgraph.graphdb.JanusGraphBaseTest)3 Test (org.junit.jupiter.api.Test)3 ComputeJob (ai.grakn.ComputeJob)2 GraknComputer (ai.grakn.GraknComputer)2 Label (ai.grakn.concept.Label)2 Type (ai.grakn.concept.Type)2