Examples with DefaultComputerResult - org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult

Example 1 with DefaultComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult in project titan by thinkaurelius.

the class FulgoraGraphComputer method submit.

@Override
public Future<ComputerResult> submit() {
    if (executed)
        throw Exceptions.computerHasAlreadyBeenSubmittedAVertexProgram();
    else
        executed = true;
    // it is not possible execute a computer if it has no vertex program nor mapreducers
    if (null == vertexProgram && mapReduces.isEmpty())
        throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
    // it is possible to run mapreducers without a vertex program
    if (null != vertexProgram) {
        GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
        this.mapReduces.addAll(this.vertexProgram.getMapReducers());
    }
    // if the user didn't set desired persistence/resultgraph, then get from vertex program or else, no persistence
    this.persistMode = GraphComputerHelper.getPersistState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.persistMode));
    this.resultGraphMode = GraphComputerHelper.getResultGraphState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.resultGraphMode));
    // determine the legality persistence and result graph options
    if (!this.features().supportsResultGraphPersistCombination(this.resultGraphMode, this.persistMode))
        throw GraphComputer.Exceptions.resultGraphPersistCombinationNotSupported(this.resultGraphMode, this.persistMode);
    memory = new FulgoraMemory(vertexProgram, mapReduces);
    return CompletableFuture.<ComputerResult>supplyAsync(() -> {
        final long time = System.currentTimeMillis();
        if (null != vertexProgram) {
            // ##### Execute vertex program
            vertexMemory = new FulgoraVertexMemory(expectedNumVertices, graph.getIDManager(), vertexProgram);
            // execute the vertex program
            vertexProgram.setup(memory);
            memory.completeSubRound();
            for (int iteration = 1; ; iteration++) {
                vertexMemory.nextIteration(vertexProgram.getMessageScopes(memory));
                jobId = name + "#" + iteration;
                VertexProgramScanJob.Executor job = VertexProgramScanJob.getVertexProgramScanJob(graph, memory, vertexMemory, vertexProgram);
                StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
                scanBuilder.setJobId(jobId);
                scanBuilder.setNumProcessingThreads(numThreads);
                scanBuilder.setWorkBlockSize(readBatchSize);
                scanBuilder.setJob(job);
                PartitionedVertexProgramExecutor pvpe = new PartitionedVertexProgramExecutor(graph, memory, vertexMemory, vertexProgram);
                try {
                    //Iterates over all vertices and computes the vertex program on all non-partitioned vertices. For partitioned ones, the data is aggregated
                    ScanMetrics jobResult = scanBuilder.execute().get();
                    long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
                    if (failures > 0) {
                        throw new TitanException("Failed to process [" + failures + "] vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
                    }
                    //Runs the vertex program on all aggregated, partitioned vertices.
                    pvpe.run(numThreads, jobResult);
                    failures = jobResult.getCustom(PartitionedVertexProgramExecutor.PARTITION_VERTEX_POSTFAIL);
                    if (failures > 0) {
                        throw new TitanException("Failed to process [" + failures + "] partitioned vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
                    }
                } catch (Exception e) {
                    throw new TitanException(e);
                }
                vertexMemory.completeIteration();
                memory.completeSubRound();
                try {
                    if (this.vertexProgram.terminate(this.memory)) {
                        break;
                    }
                } finally {
                    memory.incrIteration();
                    memory.completeSubRound();
                }
            }
        }
        // ##### Execute mapreduce jobs
        // Collect map jobs
        Map<MapReduce, FulgoraMapEmitter> mapJobs = new HashMap<>(mapReduces.size());
        for (MapReduce mapReduce : mapReduces) {
            if (mapReduce.doStage(MapReduce.Stage.MAP)) {
                FulgoraMapEmitter mapEmitter = new FulgoraMapEmitter<>(mapReduce.doStage(MapReduce.Stage.REDUCE));
                mapJobs.put(mapReduce, mapEmitter);
            }
        }
        // Execute map jobs
        jobId = name + "#map";
        VertexMapJob.Executor job = VertexMapJob.getVertexMapJob(graph, vertexMemory, mapJobs);
        StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
        scanBuilder.setJobId(jobId);
        scanBuilder.setNumProcessingThreads(numThreads);
        scanBuilder.setWorkBlockSize(readBatchSize);
        scanBuilder.setJob(job);
        try {
            ScanMetrics jobResult = scanBuilder.execute().get();
            long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
            if (failures > 0) {
                throw new TitanException("Failed to process [" + failures + "] vertices in map phase. Computer is aborting.");
            }
            failures = jobResult.getCustom(VertexMapJob.MAP_JOB_FAILURE);
            if (failures > 0) {
                throw new TitanException("Failed to process [" + failures + "] individual map jobs. Computer is aborting.");
            }
        } catch (Exception e) {
            throw new TitanException(e);
        }
        // Execute reduce phase and add to memory
        for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) {
            FulgoraMapEmitter<?, ?> mapEmitter = mapJob.getValue();
            MapReduce mapReduce = mapJob.getKey();
            // sort results if a map output sort is defined
            mapEmitter.complete(mapReduce);
            if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
                final FulgoraReduceEmitter<?, ?> reduceEmitter = new FulgoraReduceEmitter<>();
                try (WorkerPool workers = new WorkerPool(numThreads)) {
                    workers.submit(() -> mapReduce.workerStart(MapReduce.Stage.REDUCE));
                    for (final Map.Entry queueEntry : mapEmitter.reduceMap.entrySet()) {
                        workers.submit(() -> mapReduce.reduce(queueEntry.getKey(), ((Iterable) queueEntry.getValue()).iterator(), reduceEmitter));
                    }
                    workers.submit(() -> mapReduce.workerEnd(MapReduce.Stage.REDUCE));
                } catch (Exception e) {
                    throw new TitanException("Exception while executing reduce phase", e);
                }
                //                    mapEmitter.reduceMap.entrySet().parallelStream().forEach(entry -> mapReduce.reduce(entry.getKey(), entry.getValue().iterator(), reduceEmitter));
                // sort results if a reduce output sort is defined
                reduceEmitter.complete(mapReduce);
                mapReduce.addResultToMemory(this.memory, reduceEmitter.reduceQueue.iterator());
            } else {
                mapReduce.addResultToMemory(this.memory, mapEmitter.mapQueue.iterator());
            }
        }
        // #### Write mutated properties back into graph
        Graph resultgraph = graph;
        if (persistMode == Persist.NOTHING && resultGraphMode == ResultGraph.NEW) {
            resultgraph = EmptyGraph.instance();
        } else if (persistMode != Persist.NOTHING && vertexProgram != null && !vertexProgram.getElementComputeKeys().isEmpty()) {
            //First, create property keys in graph if they don't already exist
            TitanManagement mgmt = graph.openManagement();
            try {
                for (String key : vertexProgram.getElementComputeKeys()) {
                    if (!mgmt.containsPropertyKey(key))
                        log.warn("Property key [{}] is not part of the schema and will be created. It is advised to initialize all keys.", key);
                    mgmt.getOrCreatePropertyKey(key);
                }
                mgmt.commit();
            } finally {
                if (mgmt != null && mgmt.isOpen())
                    mgmt.rollback();
            }
            //TODO: Filter based on VertexProgram
            Map<Long, Map<String, Object>> mutatedProperties = Maps.transformValues(vertexMemory.getMutableVertexProperties(), new Function<Map<String, Object>, Map<String, Object>>() {

                @Nullable
                @Override
                public Map<String, Object> apply(@Nullable Map<String, Object> o) {
                    return Maps.filterKeys(o, s -> !NON_PERSISTING_KEYS.contains(s));
                }
            });
            if (resultGraphMode == ResultGraph.ORIGINAL) {
                AtomicInteger failures = new AtomicInteger(0);
                try (WorkerPool workers = new WorkerPool(numThreads)) {
                    List<Map.Entry<Long, Map<String, Object>>> subset = new ArrayList<>(writeBatchSize / vertexProgram.getElementComputeKeys().size());
                    int currentSize = 0;
                    for (Map.Entry<Long, Map<String, Object>> entry : mutatedProperties.entrySet()) {
                        subset.add(entry);
                        currentSize += entry.getValue().size();
                        if (currentSize >= writeBatchSize) {
                            workers.submit(new VertexPropertyWriter(subset, failures));
                            subset = new ArrayList<>(subset.size());
                            currentSize = 0;
                        }
                    }
                    if (!subset.isEmpty())
                        workers.submit(new VertexPropertyWriter(subset, failures));
                } catch (Exception e) {
                    throw new TitanException("Exception while attempting to persist result into graph", e);
                }
                if (failures.get() > 0)
                    throw new TitanException("Could not persist program results to graph. Check log for details.");
            } else if (resultGraphMode == ResultGraph.NEW) {
                resultgraph = graph.newTransaction();
                for (Map.Entry<Long, Map<String, Object>> vprop : mutatedProperties.entrySet()) {
                    Vertex v = resultgraph.vertices(vprop.getKey()).next();
                    for (Map.Entry<String, Object> prop : vprop.getValue().entrySet()) {
                        v.property(VertexProperty.Cardinality.single, prop.getKey(), prop.getValue());
                    }
                }
            }
        }
        // update runtime and return the newly computed graph
        this.memory.setRuntime(System.currentTimeMillis() - time);
        this.memory.complete();
        return new DefaultComputerResult(resultgraph, this.memory);
    });
}

Also used : Vertex(org.apache.tinkerpop.gremlin.structure.Vertex) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ScanMetrics(com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.ScanMetrics) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce) Function(com.google.common.base.Function) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) ArrayList(java.util.ArrayList) List(java.util.List) TitanException(com.thinkaurelius.titan.core.TitanException) WorkerPool(com.thinkaurelius.titan.graphdb.util.WorkerPool) Graph(org.apache.tinkerpop.gremlin.structure.Graph) EmptyGraph(org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph) StandardTitanGraph(com.thinkaurelius.titan.graphdb.database.StandardTitanGraph) StandardScanner(com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.StandardScanner) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) TitanException(com.thinkaurelius.titan.core.TitanException) HashMap(java.util.HashMap) Map(java.util.Map) TitanManagement(com.thinkaurelius.titan.core.schema.TitanManagement) Nullable(javax.annotation.Nullable)

Example 2 with DefaultComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult in project janusgraph by JanusGraph.

the class FulgoraGraphComputer method submit.

@Override
public Future<ComputerResult> submit() {
    if (executed)
        throw Exceptions.computerHasAlreadyBeenSubmittedAVertexProgram();
    else
        executed = true;
    // it is not possible execute a computer if it has no vertex program nor map-reducers
    if (null == vertexProgram && mapReduces.isEmpty())
        throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
    // it is possible to run map-reducers without a vertex program
    if (null != vertexProgram) {
        GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
        this.mapReduces.addAll(this.vertexProgram.getMapReducers());
    }
    // if the user didn't set desired persistence/resultgraph, then get from vertex program or else, no persistence
    this.persistMode = GraphComputerHelper.getPersistState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.persistMode));
    this.resultGraphMode = GraphComputerHelper.getResultGraphState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.resultGraphMode));
    // determine the legality persistence and result graph options
    if (!this.features().supportsResultGraphPersistCombination(this.resultGraphMode, this.persistMode))
        throw GraphComputer.Exceptions.resultGraphPersistCombinationNotSupported(this.resultGraphMode, this.persistMode);
    // ensure requested workers are not larger than supported workers
    if (this.numThreads > this.features().getMaxWorkers())
        throw GraphComputer.Exceptions.computerRequiresMoreWorkersThanSupported(this.numThreads, this.features().getMaxWorkers());
    memory = new FulgoraMemory(vertexProgram, mapReduces);
    return CompletableFuture.supplyAsync(() -> {
        final long time = System.currentTimeMillis();
        if (null != vertexProgram) {
            // ##### Execute vertex program
            vertexMemory = new FulgoraVertexMemory(expectedNumVertices, graph.getIDManager(), vertexProgram);
            // execute the vertex program
            vertexProgram.setup(memory);
            try (VertexProgramScanJob.Executor job = VertexProgramScanJob.getVertexProgramScanJob(graph, memory, vertexMemory, vertexProgram)) {
                for (int iteration = 1; ; iteration++) {
                    memory.completeSubRound();
                    vertexMemory.nextIteration(vertexProgram.getMessageScopes(memory));
                    jobId = name + "#" + iteration;
                    StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
                    scanBuilder.setJobId(jobId);
                    scanBuilder.setNumProcessingThreads(numThreads);
                    scanBuilder.setWorkBlockSize(readBatchSize);
                    scanBuilder.setJob(job);
                    PartitionedVertexProgramExecutor programExecutor = new PartitionedVertexProgramExecutor(graph, memory, vertexMemory, vertexProgram);
                    try {
                        // Iterates over all vertices and computes the vertex program on all non-partitioned vertices. For partitioned ones, the data is aggregated
                        ScanMetrics jobResult = scanBuilder.execute().get();
                        long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
                        if (failures > 0) {
                            throw new JanusGraphException("Failed to process [" + failures + "] vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
                        }
                        // Runs the vertex program on all aggregated, partitioned vertices.
                        programExecutor.run(numThreads, jobResult);
                        failures = jobResult.getCustom(PartitionedVertexProgramExecutor.PARTITION_VERTEX_POSTFAIL);
                        if (failures > 0) {
                            throw new JanusGraphException("Failed to process [" + failures + "] partitioned vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
                        }
                    } catch (Exception e) {
                        throw new JanusGraphException(e);
                    }
                    vertexMemory.completeIteration();
                    memory.completeSubRound();
                    try {
                        if (this.vertexProgram.terminate(this.memory)) {
                            break;
                        }
                    } finally {
                        memory.incrIteration();
                    }
                }
            }
        }
        // ##### Execute map-reduce jobs
        // Collect map jobs
        Map<MapReduce, FulgoraMapEmitter> mapJobs = new HashMap<>(mapReduces.size());
        for (MapReduce mapReduce : mapReduces) {
            if (mapReduce.doStage(MapReduce.Stage.MAP)) {
                FulgoraMapEmitter mapEmitter = new FulgoraMapEmitter<>(mapReduce.doStage(MapReduce.Stage.REDUCE));
                mapJobs.put(mapReduce, mapEmitter);
            }
        }
        // Execute map jobs
        jobId = name + "#map";
        try (VertexMapJob.Executor job = VertexMapJob.getVertexMapJob(graph, vertexMemory, mapJobs)) {
            StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
            scanBuilder.setJobId(jobId);
            scanBuilder.setNumProcessingThreads(numThreads);
            scanBuilder.setWorkBlockSize(readBatchSize);
            scanBuilder.setJob(job);
            try {
                ScanMetrics jobResult = scanBuilder.execute().get();
                long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
                if (failures > 0) {
                    throw new JanusGraphException("Failed to process [" + failures + "] vertices in map phase. Computer is aborting.");
                }
                failures = jobResult.getCustom(VertexMapJob.MAP_JOB_FAILURE);
                if (failures > 0) {
                    throw new JanusGraphException("Failed to process [" + failures + "] individual map jobs. Computer is aborting.");
                }
            } catch (Exception e) {
                throw new JanusGraphException(e);
            }
            // Execute reduce phase and add to memory
            for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) {
                FulgoraMapEmitter<?, ?> mapEmitter = mapJob.getValue();
                MapReduce mapReduce = mapJob.getKey();
                // sort results if a map output sort is defined
                mapEmitter.complete(mapReduce);
                if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
                    final FulgoraReduceEmitter<?, ?> reduceEmitter = new FulgoraReduceEmitter<>();
                    try (WorkerPool workers = new WorkerPool(numThreads)) {
                        workers.submit(() -> mapReduce.workerStart(MapReduce.Stage.REDUCE));
                        for (final Map.Entry queueEntry : mapEmitter.reduceMap.entrySet()) {
                            if (null == queueEntry)
                                break;
                            workers.submit(() -> mapReduce.reduce(queueEntry.getKey(), ((Iterable) queueEntry.getValue()).iterator(), reduceEmitter));
                        }
                        workers.submit(() -> mapReduce.workerEnd(MapReduce.Stage.REDUCE));
                    } catch (Exception e) {
                        throw new JanusGraphException("Exception while executing reduce phase", e);
                    }
                    // mapEmitter.reduceMap.entrySet().parallelStream().forEach(entry -> mapReduce.reduce(entry.getKey(), entry.getValue().iterator(), reduceEmitter));
                    // sort results if a reduce output sort is defined
                    reduceEmitter.complete(mapReduce);
                    mapReduce.addResultToMemory(this.memory, reduceEmitter.reduceQueue.iterator());
                } else {
                    mapReduce.addResultToMemory(this.memory, mapEmitter.mapQueue.iterator());
                }
            }
        }
        memory.attachReferenceElements(graph);
        // #### Write mutated properties back into graph
        Graph resultgraph = graph;
        if (persistMode == Persist.NOTHING && resultGraphMode == ResultGraph.NEW) {
            resultgraph = EmptyGraph.instance();
        } else if (persistMode != Persist.NOTHING && vertexProgram != null && !vertexProgram.getVertexComputeKeys().isEmpty()) {
            // First, create property keys in graph if they don't already exist
            JanusGraphManagement management = graph.openManagement();
            try {
                for (VertexComputeKey key : vertexProgram.getVertexComputeKeys()) {
                    if (!management.containsPropertyKey(key.getKey()))
                        log.warn("Property key [{}] is not part of the schema and will be created. It is advised to initialize all keys.", key.getKey());
                    management.getOrCreatePropertyKey(key.getKey());
                }
                management.commit();
            } finally {
                if (management != null && management.isOpen())
                    management.rollback();
            }
            // TODO: Filter based on VertexProgram
            Map<Long, Map<String, Object>> mutatedProperties = Maps.transformValues(vertexMemory.getMutableVertexProperties(), new Function<Map<String, Object>, Map<String, Object>>() {

                @Nullable
                @Override
                public Map<String, Object> apply(final Map<String, Object> o) {
                    return Maps.filterKeys(o, s -> !VertexProgramHelper.isTransientVertexComputeKey(s, vertexProgram.getVertexComputeKeys()));
                }
            });
            if (resultGraphMode == ResultGraph.ORIGINAL) {
                AtomicInteger failures = new AtomicInteger(0);
                try (WorkerPool workers = new WorkerPool(numThreads)) {
                    List<Map.Entry<Long, Map<String, Object>>> subset = new ArrayList<>(writeBatchSize / vertexProgram.getVertexComputeKeys().size());
                    int currentSize = 0;
                    for (Map.Entry<Long, Map<String, Object>> entry : mutatedProperties.entrySet()) {
                        subset.add(entry);
                        currentSize += entry.getValue().size();
                        if (currentSize >= writeBatchSize) {
                            workers.submit(new VertexPropertyWriter(subset, failures));
                            subset = new ArrayList<>(subset.size());
                            currentSize = 0;
                        }
                    }
                    if (!subset.isEmpty())
                        workers.submit(new VertexPropertyWriter(subset, failures));
                } catch (Exception e) {
                    throw new JanusGraphException("Exception while attempting to persist result into graph", e);
                }
                if (failures.get() > 0)
                    throw new JanusGraphException("Could not persist program results to graph. Check log for details.");
            } else if (resultGraphMode == ResultGraph.NEW) {
                resultgraph = graph.newTransaction();
                for (Map.Entry<Long, Map<String, Object>> vertexProperty : mutatedProperties.entrySet()) {
                    Vertex v = resultgraph.vertices(vertexProperty.getKey()).next();
                    for (Map.Entry<String, Object> prop : vertexProperty.getValue().entrySet()) {
                        if (prop.getValue() instanceof List) {
                            ((List) prop.getValue()).forEach(value -> v.property(VertexProperty.Cardinality.list, prop.getKey(), value));
                        } else {
                            v.property(VertexProperty.Cardinality.single, prop.getKey(), prop.getValue());
                        }
                    }
                }
            }
        }
        // update runtime and return the newly computed graph
        this.memory.setRuntime(System.currentTimeMillis() - time);
        this.memory.complete();
        return new DefaultComputerResult(resultgraph, this.memory);
    });
}

Also used : JanusGraphManagement(org.janusgraph.core.schema.JanusGraphManagement) Vertex(org.apache.tinkerpop.gremlin.structure.Vertex) HashMap(java.util.HashMap) JanusGraphException(org.janusgraph.core.JanusGraphException) ArrayList(java.util.ArrayList) ScanMetrics(org.janusgraph.diskstorage.keycolumnvalue.scan.ScanMetrics) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce) Function(com.google.common.base.Function) ArrayList(java.util.ArrayList) List(java.util.List) JanusGraphException(org.janusgraph.core.JanusGraphException) WorkerPool(org.janusgraph.graphdb.util.WorkerPool) Graph(org.apache.tinkerpop.gremlin.structure.Graph) StandardJanusGraph(org.janusgraph.graphdb.database.StandardJanusGraph) EmptyGraph(org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph) StandardScanner(org.janusgraph.diskstorage.keycolumnvalue.scan.StandardScanner) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) VertexComputeKey(org.apache.tinkerpop.gremlin.process.computer.VertexComputeKey) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with DefaultComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult in project janusgraph by JanusGraph.

the class FulgoraGraphComputer method submitAsync.

private ComputerResult submitAsync() {
    final long time = System.currentTimeMillis();
    executeVertexProgram();
    Map<MapReduce, FulgoraMapEmitter> mapJobs = collectMapJobs();
    executeMapJobs(mapJobs);
    Graph resultgraph = writeMutatedPropertiesBackIntoGraph();
    // update runtime and return the newly computed graph
    this.memory.setRuntime(System.currentTimeMillis() - time);
    this.memory.complete();
    return new DefaultComputerResult(resultgraph, this.memory);
}

Also used : Graph(org.apache.tinkerpop.gremlin.structure.Graph) StandardJanusGraph(org.janusgraph.graphdb.database.StandardJanusGraph) EmptyGraph(org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce)

Example 4 with DefaultComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult in project grakn by graknlabs.

the class GraknSparkComputer method submitWithExecutor.

@SuppressWarnings("PMD.UnusedFormalParameter")
private Future<ComputerResult> submitWithExecutor() {
    jobGroupId = Integer.toString(ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE));
    String jobDescription = this.vertexProgram == null ? this.mapReducers.toString() : this.vertexProgram + "+" + this.mapReducers;
    // Use different output locations
    this.sparkConfiguration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, this.sparkConfiguration.getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION) + "/" + jobGroupId);
    updateConfigKeys(sparkConfiguration);
    final Future<ComputerResult> result = computerService.submit(() -> {
        final long startTime = System.currentTimeMillis();
        // apache and hadoop configurations that are used throughout the graph computer computation
        final org.apache.commons.configuration.Configuration graphComputerConfiguration = new HadoopConfiguration(this.sparkConfiguration);
        if (!graphComputerConfiguration.containsKey(Constants.SPARK_SERIALIZER)) {
            graphComputerConfiguration.setProperty(Constants.SPARK_SERIALIZER, GryoSerializer.class.getCanonicalName());
        }
        graphComputerConfiguration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES, this.persist.equals(GraphComputer.Persist.EDGES));
        final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(graphComputerConfiguration);
        final Storage fileSystemStorage = FileSystemStorage.open(hadoopConfiguration);
        final boolean inputFromHDFS = FileInputFormat.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean inputFromSpark = PersistedInputRDD.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean outputToHDFS = FileOutputFormat.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean outputToSpark = PersistedOutputRDD.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean skipPartitioner = graphComputerConfiguration.getBoolean(Constants.GREMLIN_SPARK_SKIP_PARTITIONER, false);
        final boolean skipPersist = graphComputerConfiguration.getBoolean(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE, false);
        if (inputFromHDFS) {
            String inputLocation = Constants.getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION), fileSystemStorage).orElse(null);
            if (null != inputLocation) {
                try {
                    graphComputerConfiguration.setProperty(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath().toString());
                    hadoopConfiguration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath().toString());
                } catch (final IOException e) {
                    throw new IllegalStateException(e.getMessage(), e);
                }
            }
        }
        final InputRDD inputRDD;
        final OutputRDD outputRDD;
        final boolean filtered;
        try {
            inputRDD = InputRDD.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class)) ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, InputRDD.class, InputRDD.class).newInstance() : InputFormatRDD.class.newInstance();
            outputRDD = OutputRDD.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class)) ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, OutputRDD.class, OutputRDD.class).newInstance() : OutputFormatRDD.class.newInstance();
            // if the input class can filter on load, then set the filters
            if (inputRDD instanceof InputFormatRDD && GraphFilterAware.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class, InputFormat.class))) {
                GraphFilterAware.storeGraphFilter(graphComputerConfiguration, hadoopConfiguration, this.graphFilter);
                filtered = false;
            } else if (inputRDD instanceof GraphFilterAware) {
                ((GraphFilterAware) inputRDD).setGraphFilter(this.graphFilter);
                filtered = false;
            } else
                filtered = this.graphFilter.hasFilter();
        } catch (final InstantiationException | IllegalAccessException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }
        // create the spark context from the graph computer configuration
        final JavaSparkContext sparkContext = new JavaSparkContext(Spark.create(hadoopConfiguration));
        final Storage sparkContextStorage = SparkContextStorage.open();
        sparkContext.setJobGroup(jobGroupId, jobDescription);
        GraknSparkMemory memory = null;
        // delete output location
        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);
        if (null != outputLocation) {
            if (outputToHDFS && fileSystemStorage.exists(outputLocation)) {
                fileSystemStorage.rm(outputLocation);
            }
            if (outputToSpark && sparkContextStorage.exists(outputLocation)) {
                sparkContextStorage.rm(outputLocation);
            }
        }
        // the Spark application name will always be set by SparkContextStorage,
        // thus, INFO the name to make it easier to debug
        logger.debug(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX + (null == this.vertexProgram ? "No VertexProgram" : this.vertexProgram) + "[" + this.mapReducers + "]");
        // add the project jars to the cluster
        this.loadJars(hadoopConfiguration, sparkContext);
        updateLocalConfiguration(sparkContext, hadoopConfiguration);
        // create a message-passing friendly rdd from the input rdd
        boolean partitioned = false;
        JavaPairRDD<Object, VertexWritable> loadedGraphRDD = inputRDD.readGraphRDD(graphComputerConfiguration, sparkContext);
        // if there are vertex or edge filters, filter the loaded graph rdd prior to partitioning and persisting
        if (filtered) {
            this.logger.debug("Filtering the loaded graphRDD: " + this.graphFilter);
            loadedGraphRDD = GraknSparkExecutor.applyGraphFilter(loadedGraphRDD, this.graphFilter);
        }
        // else partition it with HashPartitioner
        if (loadedGraphRDD.partitioner().isPresent()) {
            this.logger.debug("Using the existing partitioner associated with the loaded graphRDD: " + loadedGraphRDD.partitioner().get());
        } else {
            if (!skipPartitioner) {
                final Partitioner partitioner = new HashPartitioner(this.workersSet ? this.workers : loadedGraphRDD.partitions().size());
                this.logger.debug("Partitioning the loaded graphRDD: " + partitioner);
                loadedGraphRDD = loadedGraphRDD.partitionBy(partitioner);
                partitioned = true;
                assert loadedGraphRDD.partitioner().isPresent();
            } else {
                // no easy way to test this with a test case
                assert skipPartitioner == !loadedGraphRDD.partitioner().isPresent();
                this.logger.debug("Partitioning has been skipped for the loaded graphRDD via " + Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            }
        }
        // then this coalesce/repartition will not take place
        if (this.workersSet) {
            // ensures that the loaded graphRDD does not have more partitions than workers
            if (loadedGraphRDD.partitions().size() > this.workers) {
                loadedGraphRDD = loadedGraphRDD.coalesce(this.workers);
            } else {
                // ensures that the loaded graphRDD does not have less partitions than workers
                if (loadedGraphRDD.partitions().size() < this.workers) {
                    loadedGraphRDD = loadedGraphRDD.repartition(this.workers);
                }
            }
        }
        // or else use default cache() which is MEMORY_ONLY
        if (!skipPersist && (!inputFromSpark || partitioned || filtered)) {
            loadedGraphRDD = loadedGraphRDD.persist(StorageLevel.fromString(hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
        }
        // final graph with view
        // (for persisting and/or mapReducing -- may be null and thus, possible to save space/time)
        JavaPairRDD<Object, VertexWritable> computedGraphRDD = null;
        try {
            // //////////////////////////////
            if (null != this.vertexProgram) {
                memory = new GraknSparkMemory(this.vertexProgram, this.mapReducers, sparkContext);
                // if there is a registered VertexProgramInterceptor, use it to bypass the GraphComputer semantics
                if (graphComputerConfiguration.containsKey(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR)) {
                    try {
                        final GraknSparkVertexProgramInterceptor<VertexProgram> interceptor = (GraknSparkVertexProgramInterceptor) Class.forName(graphComputerConfiguration.getString(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR)).newInstance();
                        computedGraphRDD = interceptor.apply(this.vertexProgram, loadedGraphRDD, memory);
                    } catch (final ClassNotFoundException | IllegalAccessException | InstantiationException e) {
                        throw new IllegalStateException(e.getMessage());
                    }
                } else {
                    // standard GraphComputer semantics
                    // get a configuration that will be propagated to all workers
                    final HadoopConfiguration vertexProgramConfiguration = new HadoopConfiguration();
                    this.vertexProgram.storeState(vertexProgramConfiguration);
                    // set up the vertex program and wire up configurations
                    this.vertexProgram.setup(memory);
                    JavaPairRDD<Object, ViewIncomingPayload<Object>> viewIncomingRDD = null;
                    memory.broadcastMemory(sparkContext);
                    // execute the vertex program
                    while (true) {
                        if (Thread.interrupted()) {
                            sparkContext.cancelAllJobs();
                            throw new TraversalInterruptedException();
                        }
                        memory.setInExecute(true);
                        viewIncomingRDD = GraknSparkExecutor.executeVertexProgramIteration(loadedGraphRDD, viewIncomingRDD, memory, graphComputerConfiguration, vertexProgramConfiguration);
                        memory.setInExecute(false);
                        if (this.vertexProgram.terminate(memory)) {
                            break;
                        } else {
                            memory.incrIteration();
                            memory.broadcastMemory(sparkContext);
                        }
                    }
                    // then generate a view+graph
                    if ((null != outputRDD && !this.persist.equals(Persist.NOTHING)) || !this.mapReducers.isEmpty()) {
                        computedGraphRDD = GraknSparkExecutor.prepareFinalGraphRDD(loadedGraphRDD, viewIncomingRDD, this.vertexProgram.getVertexComputeKeys());
                        assert null != computedGraphRDD && computedGraphRDD != loadedGraphRDD;
                    } else {
                        // ensure that the computedGraphRDD was not created
                        assert null == computedGraphRDD;
                    }
                }
                // ///////////////
                // drop all transient memory keys
                memory.complete();
                // write the computed graph to the respective output (rdd or output format)
                if (null != outputRDD && !this.persist.equals(Persist.NOTHING)) {
                    // the logic holds that a computeGraphRDD must be created at this point
                    assert null != computedGraphRDD;
                    outputRDD.writeGraphRDD(graphComputerConfiguration, computedGraphRDD);
                }
            }
            final boolean computedGraphCreated = computedGraphRDD != null && computedGraphRDD != loadedGraphRDD;
            if (!computedGraphCreated) {
                computedGraphRDD = loadedGraphRDD;
            }
            final Memory.Admin finalMemory = null == memory ? new MapMemory() : new MapMemory(memory);
            // ////////////////////////////
            if (!this.mapReducers.isEmpty()) {
                // create a mapReduceRDD for executing the map reduce jobs on
                JavaPairRDD<Object, VertexWritable> mapReduceRDD = computedGraphRDD;
                if (computedGraphCreated && !outputToSpark) {
                    // drop all the edges of the graph as they are not used in mapReduce processing
                    mapReduceRDD = computedGraphRDD.mapValues(vertexWritable -> {
                        vertexWritable.get().dropEdges(Direction.BOTH);
                        return vertexWritable;
                    });
                    // if there is only one MapReduce to execute, don't bother wasting the clock cycles.
                    if (this.mapReducers.size() > 1) {
                        mapReduceRDD = mapReduceRDD.persist(StorageLevel.fromString(hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
                    }
                }
                for (final MapReduce mapReduce : this.mapReducers) {
                    // execute the map reduce job
                    final HadoopConfiguration newApacheConfiguration = new HadoopConfiguration(graphComputerConfiguration);
                    mapReduce.storeState(newApacheConfiguration);
                    // map
                    final JavaPairRDD mapRDD = GraknSparkExecutor.executeMap(mapReduceRDD, mapReduce, newApacheConfiguration);
                    // combine
                    final JavaPairRDD combineRDD = mapReduce.doStage(MapReduce.Stage.COMBINE) ? GraknSparkExecutor.executeCombine(mapRDD, newApacheConfiguration) : mapRDD;
                    // reduce
                    final JavaPairRDD reduceRDD = mapReduce.doStage(MapReduce.Stage.REDUCE) ? GraknSparkExecutor.executeReduce(combineRDD, mapReduce, newApacheConfiguration) : combineRDD;
                    // write the map reduce output back to disk and computer result memory
                    if (null != outputRDD) {
                        mapReduce.addResultToMemory(finalMemory, outputRDD.writeMemoryRDD(graphComputerConfiguration, mapReduce.getMemoryKey(), reduceRDD));
                    }
                }
                // if the mapReduceRDD is not simply the computed graph, unpersist the mapReduceRDD
                if (computedGraphCreated && !outputToSpark) {
                    assert loadedGraphRDD != computedGraphRDD;
                    assert mapReduceRDD != computedGraphRDD;
                    mapReduceRDD.unpersist();
                } else {
                    assert mapReduceRDD == computedGraphRDD;
                }
            }
            // if the graphRDD was loaded from Spark, but then partitioned or filtered, its a different RDD
            if (!inputFromSpark || partitioned || filtered) {
                loadedGraphRDD.unpersist();
            }
            // then don't unpersist the computedGraphRDD/loadedGraphRDD
            if ((!outputToSpark || this.persist.equals(GraphComputer.Persist.NOTHING)) && computedGraphCreated) {
                computedGraphRDD.unpersist();
            }
            // delete any file system or rdd data if persist nothing
            if (null != outputLocation && this.persist.equals(GraphComputer.Persist.NOTHING)) {
                if (outputToHDFS) {
                    fileSystemStorage.rm(outputLocation);
                }
                if (outputToSpark) {
                    sparkContextStorage.rm(outputLocation);
                }
            }
            // update runtime and return the newly computed graph
            finalMemory.setRuntime(System.currentTimeMillis() - startTime);
            // clear properties that should not be propagated in an OLAP chain
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_GRAPH_FILTER);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            return new DefaultComputerResult(InputOutputHelper.getOutputGraph(graphComputerConfiguration, this.resultGraph, this.persist), finalMemory.asImmutable());
        } catch (Exception e) {
            // So it throws the same exception as tinker does
            throw new RuntimeException(e);
        }
    });
    computerService.shutdown();
    return result;
}

Also used : InputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.InputRDD) PersistedInputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.PersistedInputRDD) TraversalInterruptedException(org.apache.tinkerpop.gremlin.process.traversal.util.TraversalInterruptedException) GryoSerializer(org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer) FileSystem(org.apache.hadoop.fs.FileSystem) GraphFilterAware(org.apache.tinkerpop.gremlin.hadoop.structure.io.GraphFilterAware) GraphComputer(org.apache.tinkerpop.gremlin.process.computer.GraphComputer) LoggerFactory(org.slf4j.LoggerFactory) SparkContextStorage(org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorage) Future(java.util.concurrent.Future) Partitioner(org.apache.spark.Partitioner) StorageLevel(org.apache.spark.storage.StorageLevel) Constants(org.apache.tinkerpop.gremlin.hadoop.Constants) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) ThreadFactory(java.util.concurrent.ThreadFactory) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) InputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.InputRDD) HadoopConfiguration(org.apache.tinkerpop.gremlin.hadoop.structure.HadoopConfiguration) OutputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.OutputRDD) HashPartitioner(org.apache.spark.HashPartitioner) Set(java.util.Set) BasicThreadFactory(org.apache.commons.lang3.concurrent.BasicThreadFactory) Executors(java.util.concurrent.Executors) SparkSingleIterationStrategy(org.apache.tinkerpop.gremlin.spark.process.computer.traversal.strategy.optimization.SparkSingleIterationStrategy) Memory(org.apache.tinkerpop.gremlin.process.computer.Memory) OutputFormatRDD(org.apache.tinkerpop.gremlin.spark.structure.io.OutputFormatRDD) InputFormatRDD(org.apache.tinkerpop.gremlin.spark.structure.io.InputFormatRDD) MapMemory(org.apache.tinkerpop.gremlin.process.computer.util.MapMemory) FileConfiguration(org.apache.commons.configuration.FileConfiguration) TraversalStrategies(org.apache.tinkerpop.gremlin.process.traversal.TraversalStrategies) TraversalInterruptedException(org.apache.tinkerpop.gremlin.process.traversal.util.TraversalInterruptedException) ConfigurationUtils(org.apache.commons.configuration.ConfigurationUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) ComputerSubmissionHelper(org.apache.tinkerpop.gremlin.hadoop.process.computer.util.ComputerSubmissionHelper) VertexProgram(org.apache.tinkerpop.gremlin.process.computer.VertexProgram) HashSet(java.util.HashSet) VertexWritable(org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) AbstractHadoopGraphComputer(org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer) FileInputFormat(org.apache.hadoop.mapreduce.lib.input.FileInputFormat) ExecutorService(java.util.concurrent.ExecutorService) FileSystemStorage(org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage) ConfUtil(org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil) ViewIncomingPayload(org.apache.tinkerpop.gremlin.spark.process.computer.payload.ViewIncomingPayload) Logger(org.slf4j.Logger) SparkLauncher(org.apache.spark.launcher.SparkLauncher) InputFormat(org.apache.hadoop.mapreduce.InputFormat) InputOutputHelper(org.apache.tinkerpop.gremlin.spark.structure.io.InputOutputHelper) Spark(org.apache.tinkerpop.gremlin.spark.structure.Spark) IOException(java.io.IOException) SparkInterceptorStrategy(org.apache.tinkerpop.gremlin.spark.process.computer.traversal.strategy.optimization.SparkInterceptorStrategy) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) File(java.io.File) PersistedOutputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) Direction(org.apache.tinkerpop.gremlin.structure.Direction) HadoopGraph(org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph) PersistedInputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.PersistedInputRDD) Storage(org.apache.tinkerpop.gremlin.structure.io.Storage) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce) VertexWritable(org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable) Configuration(org.apache.hadoop.conf.Configuration) HadoopConfiguration(org.apache.tinkerpop.gremlin.hadoop.structure.HadoopConfiguration) FileConfiguration(org.apache.commons.configuration.FileConfiguration) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) Memory(org.apache.tinkerpop.gremlin.process.computer.Memory) MapMemory(org.apache.tinkerpop.gremlin.process.computer.util.MapMemory) ViewIncomingPayload(org.apache.tinkerpop.gremlin.spark.process.computer.payload.ViewIncomingPayload) InputFormatRDD(org.apache.tinkerpop.gremlin.spark.structure.io.InputFormatRDD) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce) GraphFilterAware(org.apache.tinkerpop.gremlin.hadoop.structure.io.GraphFilterAware) MapMemory(org.apache.tinkerpop.gremlin.process.computer.util.MapMemory) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Partitioner(org.apache.spark.Partitioner) HashPartitioner(org.apache.spark.HashPartitioner) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) VertexProgram(org.apache.tinkerpop.gremlin.process.computer.VertexProgram) TraversalInterruptedException(org.apache.tinkerpop.gremlin.process.traversal.util.TraversalInterruptedException) IOException(java.io.IOException) OutputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.OutputRDD) PersistedOutputRDD(org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD) SparkContextStorage(org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorage) FileSystemStorage(org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage) Storage(org.apache.tinkerpop.gremlin.structure.io.Storage) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) HashPartitioner(org.apache.spark.HashPartitioner) GryoSerializer(org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer) HadoopConfiguration(org.apache.tinkerpop.gremlin.hadoop.structure.HadoopConfiguration)

Aggregations

MapReduce (org.apache.tinkerpop.gremlin.process.computer.MapReduce)4 DefaultComputerResult (org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult)4 Graph (org.apache.tinkerpop.gremlin.structure.Graph)3 EmptyGraph (org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph)3 Function (com.google.common.base.Function)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 ComputerResult (org.apache.tinkerpop.gremlin.process.computer.ComputerResult)2 Vertex (org.apache.tinkerpop.gremlin.structure.Vertex)2 StandardJanusGraph (org.janusgraph.graphdb.database.StandardJanusGraph)2 TitanException (com.thinkaurelius.titan.core.TitanException)1 TitanManagement (com.thinkaurelius.titan.core.schema.TitanManagement)1 ScanMetrics (com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.ScanMetrics)1 StandardScanner (com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.StandardScanner)1 StandardTitanGraph (com.thinkaurelius.titan.graphdb.database.StandardTitanGraph)1 WorkerPool (com.thinkaurelius.titan.graphdb.util.WorkerPool)1 File (java.io.File)1