Search in sources :

Example 1 with VertexComputeKey

use of org.apache.tinkerpop.gremlin.process.computer.VertexComputeKey in project janusgraph by JanusGraph.

the class FulgoraGraphComputer method submit.

@Override
public Future<ComputerResult> submit() {
    if (executed)
        throw Exceptions.computerHasAlreadyBeenSubmittedAVertexProgram();
    else
        executed = true;
    // it is not possible execute a computer if it has no vertex program nor map-reducers
    if (null == vertexProgram && mapReduces.isEmpty())
        throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
    // it is possible to run map-reducers without a vertex program
    if (null != vertexProgram) {
        GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
        this.mapReduces.addAll(this.vertexProgram.getMapReducers());
    }
    // if the user didn't set desired persistence/resultgraph, then get from vertex program or else, no persistence
    this.persistMode = GraphComputerHelper.getPersistState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.persistMode));
    this.resultGraphMode = GraphComputerHelper.getResultGraphState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.resultGraphMode));
    // determine the legality persistence and result graph options
    if (!this.features().supportsResultGraphPersistCombination(this.resultGraphMode, this.persistMode))
        throw GraphComputer.Exceptions.resultGraphPersistCombinationNotSupported(this.resultGraphMode, this.persistMode);
    // ensure requested workers are not larger than supported workers
    if (this.numThreads > this.features().getMaxWorkers())
        throw GraphComputer.Exceptions.computerRequiresMoreWorkersThanSupported(this.numThreads, this.features().getMaxWorkers());
    memory = new FulgoraMemory(vertexProgram, mapReduces);
    return CompletableFuture.supplyAsync(() -> {
        final long time = System.currentTimeMillis();
        if (null != vertexProgram) {
            // ##### Execute vertex program
            vertexMemory = new FulgoraVertexMemory(expectedNumVertices, graph.getIDManager(), vertexProgram);
            // execute the vertex program
            vertexProgram.setup(memory);
            try (VertexProgramScanJob.Executor job = VertexProgramScanJob.getVertexProgramScanJob(graph, memory, vertexMemory, vertexProgram)) {
                for (int iteration = 1; ; iteration++) {
                    memory.completeSubRound();
                    vertexMemory.nextIteration(vertexProgram.getMessageScopes(memory));
                    jobId = name + "#" + iteration;
                    StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
                    scanBuilder.setJobId(jobId);
                    scanBuilder.setNumProcessingThreads(numThreads);
                    scanBuilder.setWorkBlockSize(readBatchSize);
                    scanBuilder.setJob(job);
                    PartitionedVertexProgramExecutor programExecutor = new PartitionedVertexProgramExecutor(graph, memory, vertexMemory, vertexProgram);
                    try {
                        // Iterates over all vertices and computes the vertex program on all non-partitioned vertices. For partitioned ones, the data is aggregated
                        ScanMetrics jobResult = scanBuilder.execute().get();
                        long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
                        if (failures > 0) {
                            throw new JanusGraphException("Failed to process [" + failures + "] vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
                        }
                        // Runs the vertex program on all aggregated, partitioned vertices.
                        programExecutor.run(numThreads, jobResult);
                        failures = jobResult.getCustom(PartitionedVertexProgramExecutor.PARTITION_VERTEX_POSTFAIL);
                        if (failures > 0) {
                            throw new JanusGraphException("Failed to process [" + failures + "] partitioned vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
                        }
                    } catch (Exception e) {
                        throw new JanusGraphException(e);
                    }
                    vertexMemory.completeIteration();
                    memory.completeSubRound();
                    try {
                        if (this.vertexProgram.terminate(this.memory)) {
                            break;
                        }
                    } finally {
                        memory.incrIteration();
                    }
                }
            }
        }
        // ##### Execute map-reduce jobs
        // Collect map jobs
        Map<MapReduce, FulgoraMapEmitter> mapJobs = new HashMap<>(mapReduces.size());
        for (MapReduce mapReduce : mapReduces) {
            if (mapReduce.doStage(MapReduce.Stage.MAP)) {
                FulgoraMapEmitter mapEmitter = new FulgoraMapEmitter<>(mapReduce.doStage(MapReduce.Stage.REDUCE));
                mapJobs.put(mapReduce, mapEmitter);
            }
        }
        // Execute map jobs
        jobId = name + "#map";
        try (VertexMapJob.Executor job = VertexMapJob.getVertexMapJob(graph, vertexMemory, mapJobs)) {
            StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
            scanBuilder.setJobId(jobId);
            scanBuilder.setNumProcessingThreads(numThreads);
            scanBuilder.setWorkBlockSize(readBatchSize);
            scanBuilder.setJob(job);
            try {
                ScanMetrics jobResult = scanBuilder.execute().get();
                long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
                if (failures > 0) {
                    throw new JanusGraphException("Failed to process [" + failures + "] vertices in map phase. Computer is aborting.");
                }
                failures = jobResult.getCustom(VertexMapJob.MAP_JOB_FAILURE);
                if (failures > 0) {
                    throw new JanusGraphException("Failed to process [" + failures + "] individual map jobs. Computer is aborting.");
                }
            } catch (Exception e) {
                throw new JanusGraphException(e);
            }
            // Execute reduce phase and add to memory
            for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) {
                FulgoraMapEmitter<?, ?> mapEmitter = mapJob.getValue();
                MapReduce mapReduce = mapJob.getKey();
                // sort results if a map output sort is defined
                mapEmitter.complete(mapReduce);
                if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
                    final FulgoraReduceEmitter<?, ?> reduceEmitter = new FulgoraReduceEmitter<>();
                    try (WorkerPool workers = new WorkerPool(numThreads)) {
                        workers.submit(() -> mapReduce.workerStart(MapReduce.Stage.REDUCE));
                        for (final Map.Entry queueEntry : mapEmitter.reduceMap.entrySet()) {
                            if (null == queueEntry)
                                break;
                            workers.submit(() -> mapReduce.reduce(queueEntry.getKey(), ((Iterable) queueEntry.getValue()).iterator(), reduceEmitter));
                        }
                        workers.submit(() -> mapReduce.workerEnd(MapReduce.Stage.REDUCE));
                    } catch (Exception e) {
                        throw new JanusGraphException("Exception while executing reduce phase", e);
                    }
                    // mapEmitter.reduceMap.entrySet().parallelStream().forEach(entry -> mapReduce.reduce(entry.getKey(), entry.getValue().iterator(), reduceEmitter));
                    // sort results if a reduce output sort is defined
                    reduceEmitter.complete(mapReduce);
                    mapReduce.addResultToMemory(this.memory, reduceEmitter.reduceQueue.iterator());
                } else {
                    mapReduce.addResultToMemory(this.memory, mapEmitter.mapQueue.iterator());
                }
            }
        }
        memory.attachReferenceElements(graph);
        // #### Write mutated properties back into graph
        Graph resultgraph = graph;
        if (persistMode == Persist.NOTHING && resultGraphMode == ResultGraph.NEW) {
            resultgraph = EmptyGraph.instance();
        } else if (persistMode != Persist.NOTHING && vertexProgram != null && !vertexProgram.getVertexComputeKeys().isEmpty()) {
            // First, create property keys in graph if they don't already exist
            JanusGraphManagement management = graph.openManagement();
            try {
                for (VertexComputeKey key : vertexProgram.getVertexComputeKeys()) {
                    if (!management.containsPropertyKey(key.getKey()))
                        log.warn("Property key [{}] is not part of the schema and will be created. It is advised to initialize all keys.", key.getKey());
                    management.getOrCreatePropertyKey(key.getKey());
                }
                management.commit();
            } finally {
                if (management != null && management.isOpen())
                    management.rollback();
            }
            // TODO: Filter based on VertexProgram
            Map<Long, Map<String, Object>> mutatedProperties = Maps.transformValues(vertexMemory.getMutableVertexProperties(), new Function<Map<String, Object>, Map<String, Object>>() {

                @Nullable
                @Override
                public Map<String, Object> apply(final Map<String, Object> o) {
                    return Maps.filterKeys(o, s -> !VertexProgramHelper.isTransientVertexComputeKey(s, vertexProgram.getVertexComputeKeys()));
                }
            });
            if (resultGraphMode == ResultGraph.ORIGINAL) {
                AtomicInteger failures = new AtomicInteger(0);
                try (WorkerPool workers = new WorkerPool(numThreads)) {
                    List<Map.Entry<Long, Map<String, Object>>> subset = new ArrayList<>(writeBatchSize / vertexProgram.getVertexComputeKeys().size());
                    int currentSize = 0;
                    for (Map.Entry<Long, Map<String, Object>> entry : mutatedProperties.entrySet()) {
                        subset.add(entry);
                        currentSize += entry.getValue().size();
                        if (currentSize >= writeBatchSize) {
                            workers.submit(new VertexPropertyWriter(subset, failures));
                            subset = new ArrayList<>(subset.size());
                            currentSize = 0;
                        }
                    }
                    if (!subset.isEmpty())
                        workers.submit(new VertexPropertyWriter(subset, failures));
                } catch (Exception e) {
                    throw new JanusGraphException("Exception while attempting to persist result into graph", e);
                }
                if (failures.get() > 0)
                    throw new JanusGraphException("Could not persist program results to graph. Check log for details.");
            } else if (resultGraphMode == ResultGraph.NEW) {
                resultgraph = graph.newTransaction();
                for (Map.Entry<Long, Map<String, Object>> vertexProperty : mutatedProperties.entrySet()) {
                    Vertex v = resultgraph.vertices(vertexProperty.getKey()).next();
                    for (Map.Entry<String, Object> prop : vertexProperty.getValue().entrySet()) {
                        if (prop.getValue() instanceof List) {
                            ((List) prop.getValue()).forEach(value -> v.property(VertexProperty.Cardinality.list, prop.getKey(), value));
                        } else {
                            v.property(VertexProperty.Cardinality.single, prop.getKey(), prop.getValue());
                        }
                    }
                }
            }
        }
        // update runtime and return the newly computed graph
        this.memory.setRuntime(System.currentTimeMillis() - time);
        this.memory.complete();
        return new DefaultComputerResult(resultgraph, this.memory);
    });
}
Also used : JanusGraphManagement(org.janusgraph.core.schema.JanusGraphManagement) Vertex(org.apache.tinkerpop.gremlin.structure.Vertex) HashMap(java.util.HashMap) JanusGraphException(org.janusgraph.core.JanusGraphException) ArrayList(java.util.ArrayList) ScanMetrics(org.janusgraph.diskstorage.keycolumnvalue.scan.ScanMetrics) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce) Function(com.google.common.base.Function) ArrayList(java.util.ArrayList) List(java.util.List) JanusGraphException(org.janusgraph.core.JanusGraphException) WorkerPool(org.janusgraph.graphdb.util.WorkerPool) Graph(org.apache.tinkerpop.gremlin.structure.Graph) StandardJanusGraph(org.janusgraph.graphdb.database.StandardJanusGraph) EmptyGraph(org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph) StandardScanner(org.janusgraph.diskstorage.keycolumnvalue.scan.StandardScanner) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) VertexComputeKey(org.apache.tinkerpop.gremlin.process.computer.VertexComputeKey) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

Function (com.google.common.base.Function)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 MapReduce (org.apache.tinkerpop.gremlin.process.computer.MapReduce)1 VertexComputeKey (org.apache.tinkerpop.gremlin.process.computer.VertexComputeKey)1 DefaultComputerResult (org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult)1 Graph (org.apache.tinkerpop.gremlin.structure.Graph)1 Vertex (org.apache.tinkerpop.gremlin.structure.Vertex)1 EmptyGraph (org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph)1 JanusGraphException (org.janusgraph.core.JanusGraphException)1 JanusGraphManagement (org.janusgraph.core.schema.JanusGraphManagement)1 ScanMetrics (org.janusgraph.diskstorage.keycolumnvalue.scan.ScanMetrics)1 StandardScanner (org.janusgraph.diskstorage.keycolumnvalue.scan.StandardScanner)1 StandardJanusGraph (org.janusgraph.graphdb.database.StandardJanusGraph)1 WorkerPool (org.janusgraph.graphdb.util.WorkerPool)1