Search in sources :

Example 1 with MapReduce

use of org.apache.tinkerpop.gremlin.process.computer.MapReduce in project titan by thinkaurelius.

the class FulgoraGraphComputer method submit.

@Override
public Future<ComputerResult> submit() {
    if (executed)
        throw Exceptions.computerHasAlreadyBeenSubmittedAVertexProgram();
    else
        executed = true;
    // it is not possible execute a computer if it has no vertex program nor mapreducers
    if (null == vertexProgram && mapReduces.isEmpty())
        throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
    // it is possible to run mapreducers without a vertex program
    if (null != vertexProgram) {
        GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
        this.mapReduces.addAll(this.vertexProgram.getMapReducers());
    }
    // if the user didn't set desired persistence/resultgraph, then get from vertex program or else, no persistence
    this.persistMode = GraphComputerHelper.getPersistState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.persistMode));
    this.resultGraphMode = GraphComputerHelper.getResultGraphState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.resultGraphMode));
    // determine the legality persistence and result graph options
    if (!this.features().supportsResultGraphPersistCombination(this.resultGraphMode, this.persistMode))
        throw GraphComputer.Exceptions.resultGraphPersistCombinationNotSupported(this.resultGraphMode, this.persistMode);
    memory = new FulgoraMemory(vertexProgram, mapReduces);
    return CompletableFuture.<ComputerResult>supplyAsync(() -> {
        final long time = System.currentTimeMillis();
        if (null != vertexProgram) {
            // ##### Execute vertex program
            vertexMemory = new FulgoraVertexMemory(expectedNumVertices, graph.getIDManager(), vertexProgram);
            // execute the vertex program
            vertexProgram.setup(memory);
            memory.completeSubRound();
            for (int iteration = 1; ; iteration++) {
                vertexMemory.nextIteration(vertexProgram.getMessageScopes(memory));
                jobId = name + "#" + iteration;
                VertexProgramScanJob.Executor job = VertexProgramScanJob.getVertexProgramScanJob(graph, memory, vertexMemory, vertexProgram);
                StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
                scanBuilder.setJobId(jobId);
                scanBuilder.setNumProcessingThreads(numThreads);
                scanBuilder.setWorkBlockSize(readBatchSize);
                scanBuilder.setJob(job);
                PartitionedVertexProgramExecutor pvpe = new PartitionedVertexProgramExecutor(graph, memory, vertexMemory, vertexProgram);
                try {
                    //Iterates over all vertices and computes the vertex program on all non-partitioned vertices. For partitioned ones, the data is aggregated
                    ScanMetrics jobResult = scanBuilder.execute().get();
                    long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
                    if (failures > 0) {
                        throw new TitanException("Failed to process [" + failures + "] vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
                    }
                    //Runs the vertex program on all aggregated, partitioned vertices.
                    pvpe.run(numThreads, jobResult);
                    failures = jobResult.getCustom(PartitionedVertexProgramExecutor.PARTITION_VERTEX_POSTFAIL);
                    if (failures > 0) {
                        throw new TitanException("Failed to process [" + failures + "] partitioned vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
                    }
                } catch (Exception e) {
                    throw new TitanException(e);
                }
                vertexMemory.completeIteration();
                memory.completeSubRound();
                try {
                    if (this.vertexProgram.terminate(this.memory)) {
                        break;
                    }
                } finally {
                    memory.incrIteration();
                    memory.completeSubRound();
                }
            }
        }
        // ##### Execute mapreduce jobs
        // Collect map jobs
        Map<MapReduce, FulgoraMapEmitter> mapJobs = new HashMap<>(mapReduces.size());
        for (MapReduce mapReduce : mapReduces) {
            if (mapReduce.doStage(MapReduce.Stage.MAP)) {
                FulgoraMapEmitter mapEmitter = new FulgoraMapEmitter<>(mapReduce.doStage(MapReduce.Stage.REDUCE));
                mapJobs.put(mapReduce, mapEmitter);
            }
        }
        // Execute map jobs
        jobId = name + "#map";
        VertexMapJob.Executor job = VertexMapJob.getVertexMapJob(graph, vertexMemory, mapJobs);
        StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
        scanBuilder.setJobId(jobId);
        scanBuilder.setNumProcessingThreads(numThreads);
        scanBuilder.setWorkBlockSize(readBatchSize);
        scanBuilder.setJob(job);
        try {
            ScanMetrics jobResult = scanBuilder.execute().get();
            long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
            if (failures > 0) {
                throw new TitanException("Failed to process [" + failures + "] vertices in map phase. Computer is aborting.");
            }
            failures = jobResult.getCustom(VertexMapJob.MAP_JOB_FAILURE);
            if (failures > 0) {
                throw new TitanException("Failed to process [" + failures + "] individual map jobs. Computer is aborting.");
            }
        } catch (Exception e) {
            throw new TitanException(e);
        }
        // Execute reduce phase and add to memory
        for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) {
            FulgoraMapEmitter<?, ?> mapEmitter = mapJob.getValue();
            MapReduce mapReduce = mapJob.getKey();
            // sort results if a map output sort is defined
            mapEmitter.complete(mapReduce);
            if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
                final FulgoraReduceEmitter<?, ?> reduceEmitter = new FulgoraReduceEmitter<>();
                try (WorkerPool workers = new WorkerPool(numThreads)) {
                    workers.submit(() -> mapReduce.workerStart(MapReduce.Stage.REDUCE));
                    for (final Map.Entry queueEntry : mapEmitter.reduceMap.entrySet()) {
                        workers.submit(() -> mapReduce.reduce(queueEntry.getKey(), ((Iterable) queueEntry.getValue()).iterator(), reduceEmitter));
                    }
                    workers.submit(() -> mapReduce.workerEnd(MapReduce.Stage.REDUCE));
                } catch (Exception e) {
                    throw new TitanException("Exception while executing reduce phase", e);
                }
                //                    mapEmitter.reduceMap.entrySet().parallelStream().forEach(entry -> mapReduce.reduce(entry.getKey(), entry.getValue().iterator(), reduceEmitter));
                // sort results if a reduce output sort is defined
                reduceEmitter.complete(mapReduce);
                mapReduce.addResultToMemory(this.memory, reduceEmitter.reduceQueue.iterator());
            } else {
                mapReduce.addResultToMemory(this.memory, mapEmitter.mapQueue.iterator());
            }
        }
        // #### Write mutated properties back into graph
        Graph resultgraph = graph;
        if (persistMode == Persist.NOTHING && resultGraphMode == ResultGraph.NEW) {
            resultgraph = EmptyGraph.instance();
        } else if (persistMode != Persist.NOTHING && vertexProgram != null && !vertexProgram.getElementComputeKeys().isEmpty()) {
            //First, create property keys in graph if they don't already exist
            TitanManagement mgmt = graph.openManagement();
            try {
                for (String key : vertexProgram.getElementComputeKeys()) {
                    if (!mgmt.containsPropertyKey(key))
                        log.warn("Property key [{}] is not part of the schema and will be created. It is advised to initialize all keys.", key);
                    mgmt.getOrCreatePropertyKey(key);
                }
                mgmt.commit();
            } finally {
                if (mgmt != null && mgmt.isOpen())
                    mgmt.rollback();
            }
            //TODO: Filter based on VertexProgram
            Map<Long, Map<String, Object>> mutatedProperties = Maps.transformValues(vertexMemory.getMutableVertexProperties(), new Function<Map<String, Object>, Map<String, Object>>() {

                @Nullable
                @Override
                public Map<String, Object> apply(@Nullable Map<String, Object> o) {
                    return Maps.filterKeys(o, s -> !NON_PERSISTING_KEYS.contains(s));
                }
            });
            if (resultGraphMode == ResultGraph.ORIGINAL) {
                AtomicInteger failures = new AtomicInteger(0);
                try (WorkerPool workers = new WorkerPool(numThreads)) {
                    List<Map.Entry<Long, Map<String, Object>>> subset = new ArrayList<>(writeBatchSize / vertexProgram.getElementComputeKeys().size());
                    int currentSize = 0;
                    for (Map.Entry<Long, Map<String, Object>> entry : mutatedProperties.entrySet()) {
                        subset.add(entry);
                        currentSize += entry.getValue().size();
                        if (currentSize >= writeBatchSize) {
                            workers.submit(new VertexPropertyWriter(subset, failures));
                            subset = new ArrayList<>(subset.size());
                            currentSize = 0;
                        }
                    }
                    if (!subset.isEmpty())
                        workers.submit(new VertexPropertyWriter(subset, failures));
                } catch (Exception e) {
                    throw new TitanException("Exception while attempting to persist result into graph", e);
                }
                if (failures.get() > 0)
                    throw new TitanException("Could not persist program results to graph. Check log for details.");
            } else if (resultGraphMode == ResultGraph.NEW) {
                resultgraph = graph.newTransaction();
                for (Map.Entry<Long, Map<String, Object>> vprop : mutatedProperties.entrySet()) {
                    Vertex v = resultgraph.vertices(vprop.getKey()).next();
                    for (Map.Entry<String, Object> prop : vprop.getValue().entrySet()) {
                        v.property(VertexProperty.Cardinality.single, prop.getKey(), prop.getValue());
                    }
                }
            }
        }
        // update runtime and return the newly computed graph
        this.memory.setRuntime(System.currentTimeMillis() - time);
        this.memory.complete();
        return new DefaultComputerResult(resultgraph, this.memory);
    });
}
Also used : Vertex(org.apache.tinkerpop.gremlin.structure.Vertex) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ScanMetrics(com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.ScanMetrics) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce) Function(com.google.common.base.Function) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) ArrayList(java.util.ArrayList) List(java.util.List) TitanException(com.thinkaurelius.titan.core.TitanException) WorkerPool(com.thinkaurelius.titan.graphdb.util.WorkerPool) Graph(org.apache.tinkerpop.gremlin.structure.Graph) EmptyGraph(org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph) StandardTitanGraph(com.thinkaurelius.titan.graphdb.database.StandardTitanGraph) StandardScanner(com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.StandardScanner) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) TitanException(com.thinkaurelius.titan.core.TitanException) HashMap(java.util.HashMap) Map(java.util.Map) TitanManagement(com.thinkaurelius.titan.core.schema.TitanManagement) Nullable(javax.annotation.Nullable)

Example 2 with MapReduce

use of org.apache.tinkerpop.gremlin.process.computer.MapReduce in project titan by thinkaurelius.

the class VertexMapJob method process.

@Override
public void process(TitanVertex vertex, ScanMetrics metrics) {
    PreloadedVertex v = (PreloadedVertex) vertex;
    if (vertexMemory != null) {
        VertexMemoryHandler vh = new VertexMemoryHandler(vertexMemory, v);
        v.setPropertyMixing(vh);
    }
    v.setAccessCheck(MAPREDUCE_CHECK);
    if (idManager.isPartitionedVertex(v.longId()) && !idManager.isCanonicalVertexId(v.longId())) {
        //Only consider the canonical partition vertex representative
        return;
    } else {
        for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) {
            MapReduce job = mapJob.getKey();
            try {
                job.map(v, mapJob.getValue());
                metrics.incrementCustom(MAP_JOB_SUCCESS);
            } catch (Throwable ex) {
                log.error("Encountered exception executing map job [" + job + "] on vertex [" + vertex + "]:", ex);
                metrics.incrementCustom(MAP_JOB_FAILURE);
            }
        }
    }
}
Also used : PreloadedVertex(com.thinkaurelius.titan.graphdb.vertices.PreloadedVertex) ImmutableMap(com.google.common.collect.ImmutableMap) Map(java.util.Map) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce)

Aggregations

Map (java.util.Map)2 MapReduce (org.apache.tinkerpop.gremlin.process.computer.MapReduce)2 Function (com.google.common.base.Function)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 TitanException (com.thinkaurelius.titan.core.TitanException)1 TitanManagement (com.thinkaurelius.titan.core.schema.TitanManagement)1 ScanMetrics (com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.ScanMetrics)1 StandardScanner (com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.StandardScanner)1 StandardTitanGraph (com.thinkaurelius.titan.graphdb.database.StandardTitanGraph)1 WorkerPool (com.thinkaurelius.titan.graphdb.util.WorkerPool)1 PreloadedVertex (com.thinkaurelius.titan.graphdb.vertices.PreloadedVertex)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Nullable (javax.annotation.Nullable)1 ComputerResult (org.apache.tinkerpop.gremlin.process.computer.ComputerResult)1 DefaultComputerResult (org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult)1 Graph (org.apache.tinkerpop.gremlin.structure.Graph)1 Vertex (org.apache.tinkerpop.gremlin.structure.Vertex)1