Search in sources :

Example 1 with ComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.ComputerResult in project titan by thinkaurelius.

the class FulgoraGraphComputer method submit.

@Override
public Future<ComputerResult> submit() {
    if (executed)
        throw Exceptions.computerHasAlreadyBeenSubmittedAVertexProgram();
    else
        executed = true;
    // it is not possible execute a computer if it has no vertex program nor mapreducers
    if (null == vertexProgram && mapReduces.isEmpty())
        throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
    // it is possible to run mapreducers without a vertex program
    if (null != vertexProgram) {
        GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
        this.mapReduces.addAll(this.vertexProgram.getMapReducers());
    }
    // if the user didn't set desired persistence/resultgraph, then get from vertex program or else, no persistence
    this.persistMode = GraphComputerHelper.getPersistState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.persistMode));
    this.resultGraphMode = GraphComputerHelper.getResultGraphState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.resultGraphMode));
    // determine the legality persistence and result graph options
    if (!this.features().supportsResultGraphPersistCombination(this.resultGraphMode, this.persistMode))
        throw GraphComputer.Exceptions.resultGraphPersistCombinationNotSupported(this.resultGraphMode, this.persistMode);
    memory = new FulgoraMemory(vertexProgram, mapReduces);
    return CompletableFuture.<ComputerResult>supplyAsync(() -> {
        final long time = System.currentTimeMillis();
        if (null != vertexProgram) {
            // ##### Execute vertex program
            vertexMemory = new FulgoraVertexMemory(expectedNumVertices, graph.getIDManager(), vertexProgram);
            // execute the vertex program
            vertexProgram.setup(memory);
            memory.completeSubRound();
            for (int iteration = 1; ; iteration++) {
                vertexMemory.nextIteration(vertexProgram.getMessageScopes(memory));
                jobId = name + "#" + iteration;
                VertexProgramScanJob.Executor job = VertexProgramScanJob.getVertexProgramScanJob(graph, memory, vertexMemory, vertexProgram);
                StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
                scanBuilder.setJobId(jobId);
                scanBuilder.setNumProcessingThreads(numThreads);
                scanBuilder.setWorkBlockSize(readBatchSize);
                scanBuilder.setJob(job);
                PartitionedVertexProgramExecutor pvpe = new PartitionedVertexProgramExecutor(graph, memory, vertexMemory, vertexProgram);
                try {
                    //Iterates over all vertices and computes the vertex program on all non-partitioned vertices. For partitioned ones, the data is aggregated
                    ScanMetrics jobResult = scanBuilder.execute().get();
                    long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
                    if (failures > 0) {
                        throw new TitanException("Failed to process [" + failures + "] vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
                    }
                    //Runs the vertex program on all aggregated, partitioned vertices.
                    pvpe.run(numThreads, jobResult);
                    failures = jobResult.getCustom(PartitionedVertexProgramExecutor.PARTITION_VERTEX_POSTFAIL);
                    if (failures > 0) {
                        throw new TitanException("Failed to process [" + failures + "] partitioned vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
                    }
                } catch (Exception e) {
                    throw new TitanException(e);
                }
                vertexMemory.completeIteration();
                memory.completeSubRound();
                try {
                    if (this.vertexProgram.terminate(this.memory)) {
                        break;
                    }
                } finally {
                    memory.incrIteration();
                    memory.completeSubRound();
                }
            }
        }
        // ##### Execute mapreduce jobs
        // Collect map jobs
        Map<MapReduce, FulgoraMapEmitter> mapJobs = new HashMap<>(mapReduces.size());
        for (MapReduce mapReduce : mapReduces) {
            if (mapReduce.doStage(MapReduce.Stage.MAP)) {
                FulgoraMapEmitter mapEmitter = new FulgoraMapEmitter<>(mapReduce.doStage(MapReduce.Stage.REDUCE));
                mapJobs.put(mapReduce, mapEmitter);
            }
        }
        // Execute map jobs
        jobId = name + "#map";
        VertexMapJob.Executor job = VertexMapJob.getVertexMapJob(graph, vertexMemory, mapJobs);
        StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
        scanBuilder.setJobId(jobId);
        scanBuilder.setNumProcessingThreads(numThreads);
        scanBuilder.setWorkBlockSize(readBatchSize);
        scanBuilder.setJob(job);
        try {
            ScanMetrics jobResult = scanBuilder.execute().get();
            long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
            if (failures > 0) {
                throw new TitanException("Failed to process [" + failures + "] vertices in map phase. Computer is aborting.");
            }
            failures = jobResult.getCustom(VertexMapJob.MAP_JOB_FAILURE);
            if (failures > 0) {
                throw new TitanException("Failed to process [" + failures + "] individual map jobs. Computer is aborting.");
            }
        } catch (Exception e) {
            throw new TitanException(e);
        }
        // Execute reduce phase and add to memory
        for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) {
            FulgoraMapEmitter<?, ?> mapEmitter = mapJob.getValue();
            MapReduce mapReduce = mapJob.getKey();
            // sort results if a map output sort is defined
            mapEmitter.complete(mapReduce);
            if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
                final FulgoraReduceEmitter<?, ?> reduceEmitter = new FulgoraReduceEmitter<>();
                try (WorkerPool workers = new WorkerPool(numThreads)) {
                    workers.submit(() -> mapReduce.workerStart(MapReduce.Stage.REDUCE));
                    for (final Map.Entry queueEntry : mapEmitter.reduceMap.entrySet()) {
                        workers.submit(() -> mapReduce.reduce(queueEntry.getKey(), ((Iterable) queueEntry.getValue()).iterator(), reduceEmitter));
                    }
                    workers.submit(() -> mapReduce.workerEnd(MapReduce.Stage.REDUCE));
                } catch (Exception e) {
                    throw new TitanException("Exception while executing reduce phase", e);
                }
                //                    mapEmitter.reduceMap.entrySet().parallelStream().forEach(entry -> mapReduce.reduce(entry.getKey(), entry.getValue().iterator(), reduceEmitter));
                // sort results if a reduce output sort is defined
                reduceEmitter.complete(mapReduce);
                mapReduce.addResultToMemory(this.memory, reduceEmitter.reduceQueue.iterator());
            } else {
                mapReduce.addResultToMemory(this.memory, mapEmitter.mapQueue.iterator());
            }
        }
        // #### Write mutated properties back into graph
        Graph resultgraph = graph;
        if (persistMode == Persist.NOTHING && resultGraphMode == ResultGraph.NEW) {
            resultgraph = EmptyGraph.instance();
        } else if (persistMode != Persist.NOTHING && vertexProgram != null && !vertexProgram.getElementComputeKeys().isEmpty()) {
            //First, create property keys in graph if they don't already exist
            TitanManagement mgmt = graph.openManagement();
            try {
                for (String key : vertexProgram.getElementComputeKeys()) {
                    if (!mgmt.containsPropertyKey(key))
                        log.warn("Property key [{}] is not part of the schema and will be created. It is advised to initialize all keys.", key);
                    mgmt.getOrCreatePropertyKey(key);
                }
                mgmt.commit();
            } finally {
                if (mgmt != null && mgmt.isOpen())
                    mgmt.rollback();
            }
            //TODO: Filter based on VertexProgram
            Map<Long, Map<String, Object>> mutatedProperties = Maps.transformValues(vertexMemory.getMutableVertexProperties(), new Function<Map<String, Object>, Map<String, Object>>() {

                @Nullable
                @Override
                public Map<String, Object> apply(@Nullable Map<String, Object> o) {
                    return Maps.filterKeys(o, s -> !NON_PERSISTING_KEYS.contains(s));
                }
            });
            if (resultGraphMode == ResultGraph.ORIGINAL) {
                AtomicInteger failures = new AtomicInteger(0);
                try (WorkerPool workers = new WorkerPool(numThreads)) {
                    List<Map.Entry<Long, Map<String, Object>>> subset = new ArrayList<>(writeBatchSize / vertexProgram.getElementComputeKeys().size());
                    int currentSize = 0;
                    for (Map.Entry<Long, Map<String, Object>> entry : mutatedProperties.entrySet()) {
                        subset.add(entry);
                        currentSize += entry.getValue().size();
                        if (currentSize >= writeBatchSize) {
                            workers.submit(new VertexPropertyWriter(subset, failures));
                            subset = new ArrayList<>(subset.size());
                            currentSize = 0;
                        }
                    }
                    if (!subset.isEmpty())
                        workers.submit(new VertexPropertyWriter(subset, failures));
                } catch (Exception e) {
                    throw new TitanException("Exception while attempting to persist result into graph", e);
                }
                if (failures.get() > 0)
                    throw new TitanException("Could not persist program results to graph. Check log for details.");
            } else if (resultGraphMode == ResultGraph.NEW) {
                resultgraph = graph.newTransaction();
                for (Map.Entry<Long, Map<String, Object>> vprop : mutatedProperties.entrySet()) {
                    Vertex v = resultgraph.vertices(vprop.getKey()).next();
                    for (Map.Entry<String, Object> prop : vprop.getValue().entrySet()) {
                        v.property(VertexProperty.Cardinality.single, prop.getKey(), prop.getValue());
                    }
                }
            }
        }
        // update runtime and return the newly computed graph
        this.memory.setRuntime(System.currentTimeMillis() - time);
        this.memory.complete();
        return new DefaultComputerResult(resultgraph, this.memory);
    });
}
Also used : Vertex(org.apache.tinkerpop.gremlin.structure.Vertex) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ScanMetrics(com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.ScanMetrics) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce) Function(com.google.common.base.Function) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) ArrayList(java.util.ArrayList) List(java.util.List) TitanException(com.thinkaurelius.titan.core.TitanException) WorkerPool(com.thinkaurelius.titan.graphdb.util.WorkerPool) Graph(org.apache.tinkerpop.gremlin.structure.Graph) EmptyGraph(org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph) StandardTitanGraph(com.thinkaurelius.titan.graphdb.database.StandardTitanGraph) StandardScanner(com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.StandardScanner) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DefaultComputerResult(org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult) TitanException(com.thinkaurelius.titan.core.TitanException) HashMap(java.util.HashMap) Map(java.util.Map) TitanManagement(com.thinkaurelius.titan.core.schema.TitanManagement) Nullable(javax.annotation.Nullable)

Example 2 with ComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.ComputerResult in project grakn by graknlabs.

the class TinkerComputeQueryRunner method run.

public ComputeJob<Long> run(CountQuery query) {
    return runCompute(query, tinkerComputeQuery -> {
        if (!tinkerComputeQuery.selectedTypesHaveInstance()) {
            LOG.debug("Count = 0");
            return 0L;
        }
        Set<LabelId> typeLabelIds = convertLabelsToIds(tinkerComputeQuery.subLabels());
        Map<Integer, Long> count;
        Set<LabelId> rolePlayerLabelIds = tinkerComputeQuery.getRolePlayerLabelIds();
        rolePlayerLabelIds.addAll(typeLabelIds);
        ComputerResult result = tinkerComputeQuery.compute(new CountVertexProgram(), new CountMapReduceWithAttribute(), rolePlayerLabelIds, false);
        count = result.memory().get(CountMapReduceWithAttribute.class.getName());
        long finalCount = count.keySet().stream().filter(id -> typeLabelIds.contains(LabelId.of(id))).mapToLong(count::get).sum();
        if (count.containsKey(GraknMapReduce.RESERVED_TYPE_LABEL_KEY)) {
            finalCount += count.get(GraknMapReduce.RESERVED_TYPE_LABEL_KEY);
        }
        LOG.debug("Count = " + finalCount);
        return finalCount;
    });
}
Also used : CountVertexProgram(ai.grakn.graql.internal.analytics.CountVertexProgram) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) LabelId(ai.grakn.concept.LabelId) CountMapReduceWithAttribute(ai.grakn.graql.internal.analytics.CountMapReduceWithAttribute)

Example 3 with ComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.ComputerResult in project grakn by graknlabs.

the class TinkerComputeQueryRunner method run.

public ComputeJob<Optional<Number>> run(MedianQuery query) {
    return runStatistics(query, tinkerComputeQuery -> {
        AttributeType.DataType<?> dataType = tinkerComputeQuery.getDataTypeOfSelectedResourceTypes();
        if (!tinkerComputeQuery.selectedResourceTypesHaveInstance()) {
            return Optional.empty();
        }
        Set<LabelId> allSubLabelIds = convertLabelsToIds(tinkerComputeQuery.getCombinedSubTypes());
        Set<LabelId> statisticsResourceLabelIds = convertLabelsToIds(tinkerComputeQuery.statisticsResourceLabels());
        ComputerResult result = tinkerComputeQuery.compute(new MedianVertexProgram(statisticsResourceLabelIds, dataType), null, allSubLabelIds);
        Number finalResult = result.memory().get(MedianVertexProgram.MEDIAN);
        LOG.debug("Median = " + finalResult);
        return Optional.of(finalResult);
    });
}
Also used : AttributeType(ai.grakn.concept.AttributeType) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) LabelId(ai.grakn.concept.LabelId) MedianVertexProgram(ai.grakn.graql.internal.analytics.MedianVertexProgram)

Example 4 with ComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.ComputerResult in project grakn by graknlabs.

the class TinkerComputeQueryRunner method run.

public ComputeJob<Map<Long, Set<String>>> run(DegreeQuery query) {
    return runCompute(query, tinkerComputeQuery -> {
        Set<Label> ofLabels;
        // Check if ofType is valid before returning emptyMap
        if (query.targetLabels().isEmpty()) {
            ofLabels = tinkerComputeQuery.subLabels();
        } else {
            ofLabels = query.targetLabels().stream().flatMap(typeLabel -> {
                Type type = tx.getSchemaConcept(typeLabel);
                if (type == null)
                    throw GraqlQueryException.labelNotFound(typeLabel);
                return type.subs();
            }).map(SchemaConcept::getLabel).collect(Collectors.toSet());
        }
        Set<Label> subLabels = Sets.union(tinkerComputeQuery.subLabels(), ofLabels);
        if (!tinkerComputeQuery.selectedTypesHaveInstance()) {
            return Collections.emptyMap();
        }
        Set<LabelId> subLabelIds = convertLabelsToIds(subLabels);
        Set<LabelId> ofLabelIds = convertLabelsToIds(ofLabels);
        ComputerResult result = tinkerComputeQuery.compute(new DegreeVertexProgram(ofLabelIds), new DegreeDistributionMapReduce(ofLabelIds, DegreeVertexProgram.DEGREE), subLabelIds);
        return result.memory().get(DegreeDistributionMapReduce.class.getName());
    });
}
Also used : MaxMapReduce(ai.grakn.graql.internal.analytics.MaxMapReduce) LoggerFactory(org.slf4j.LoggerFactory) ConnectedComponentQuery(ai.grakn.graql.analytics.ConnectedComponentQuery) Type(ai.grakn.concept.Type) KCoreVertexProgram(ai.grakn.graql.internal.analytics.KCoreVertexProgram) CountMapReduceWithAttribute(ai.grakn.graql.internal.analytics.CountMapReduceWithAttribute) Label(ai.grakn.concept.Label) LabelId(ai.grakn.concept.LabelId) Map(java.util.Map) CorenessVertexProgram(ai.grakn.graql.internal.analytics.CorenessVertexProgram) ComputeQuery(ai.grakn.graql.ComputeQuery) ConceptId(ai.grakn.concept.ConceptId) MinQuery(ai.grakn.graql.analytics.MinQuery) CorenessQuery(ai.grakn.graql.analytics.CorenessQuery) MaxQuery(ai.grakn.graql.analytics.MaxQuery) ConnectedComponentVertexProgram(ai.grakn.graql.internal.analytics.ConnectedComponentVertexProgram) Set(java.util.Set) DegreeStatisticsVertexProgram(ai.grakn.graql.internal.analytics.DegreeStatisticsVertexProgram) GraknMapReduce(ai.grakn.graql.internal.analytics.GraknMapReduce) MeanMapReduce(ai.grakn.graql.internal.analytics.MeanMapReduce) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) SumMapReduce(ai.grakn.graql.internal.analytics.SumMapReduce) Serializable(java.io.Serializable) List(java.util.List) Memory(org.apache.tinkerpop.gremlin.process.computer.Memory) MedianVertexProgram(ai.grakn.graql.internal.analytics.MedianVertexProgram) Optional(java.util.Optional) DegreeDistributionMapReduce(ai.grakn.graql.internal.analytics.DegreeDistributionMapReduce) DegreeQuery(ai.grakn.graql.analytics.DegreeQuery) ComputeJob(ai.grakn.ComputeJob) StdQuery(ai.grakn.graql.analytics.StdQuery) Concept(ai.grakn.concept.Concept) SchemaConcept(ai.grakn.concept.SchemaConcept) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) StatisticsQuery(ai.grakn.graql.StatisticsQuery) ConnectedComponentsVertexProgram(ai.grakn.graql.internal.analytics.ConnectedComponentsVertexProgram) ClusterSizeMapReduce(ai.grakn.graql.internal.analytics.ClusterSizeMapReduce) PathQuery(ai.grakn.graql.analytics.PathQuery) AttributeType(ai.grakn.concept.AttributeType) NoResultException(ai.grakn.graql.internal.analytics.NoResultException) GraknComputer(ai.grakn.GraknComputer) MeanQuery(ai.grakn.graql.analytics.MeanQuery) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) GraknVertexProgram(ai.grakn.graql.internal.analytics.GraknVertexProgram) SumQuery(ai.grakn.graql.analytics.SumQuery) GraqlQueryException(ai.grakn.exception.GraqlQueryException) DegreeVertexProgram(ai.grakn.graql.internal.analytics.DegreeVertexProgram) Logger(org.slf4j.Logger) ShortestPathVertexProgram(ai.grakn.graql.internal.analytics.ShortestPathVertexProgram) MedianQuery(ai.grakn.graql.analytics.MedianQuery) MinMapReduce(ai.grakn.graql.internal.analytics.MinMapReduce) CountVertexProgram(ai.grakn.graql.internal.analytics.CountVertexProgram) ClusterMemberMapReduce(ai.grakn.graql.internal.analytics.ClusterMemberMapReduce) EmbeddedGraknTx(ai.grakn.kb.internal.EmbeddedGraknTx) CountQuery(ai.grakn.graql.analytics.CountQuery) PathsQuery(ai.grakn.graql.analytics.PathsQuery) KCoreQuery(ai.grakn.graql.analytics.KCoreQuery) MapReduce(org.apache.tinkerpop.gremlin.process.computer.MapReduce) StdMapReduce(ai.grakn.graql.internal.analytics.StdMapReduce) Collections(java.util.Collections) Type(ai.grakn.concept.Type) AttributeType(ai.grakn.concept.AttributeType) DegreeDistributionMapReduce(ai.grakn.graql.internal.analytics.DegreeDistributionMapReduce) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult) Label(ai.grakn.concept.Label) DegreeVertexProgram(ai.grakn.graql.internal.analytics.DegreeVertexProgram) SchemaConcept(ai.grakn.concept.SchemaConcept) LabelId(ai.grakn.concept.LabelId)

Example 5 with ComputerResult

use of org.apache.tinkerpop.gremlin.process.computer.ComputerResult in project janusgraph by JanusGraph.

the class JanusGraphPartitionGraphTest method testVertexPartitionOlap.

private void testVertexPartitionOlap(CommitMode commitMode) throws Exception {
    Object[] options = { option(GraphDatabaseConfiguration.IDS_FLUSH), false };
    clopen(options);
    // int[] groupDegrees = {10,20,30};
    int[] groupDegrees = { 2 };
    int numVertices = setupGroupClusters(groupDegrees, commitMode);
    Map<Long, Integer> degreeMap = new HashMap<>(groupDegrees.length);
    for (int i = 0; i < groupDegrees.length; i++) {
        degreeMap.put(getOnlyVertex(tx.query().has("groupid", "group" + i)).longId(), groupDegrees[i]);
    }
    clopen(options);
    // Test OLAP works with partitioned vertices
    JanusGraphComputer computer = graph.compute(FulgoraGraphComputer.class);
    computer.resultMode(JanusGraphComputer.ResultMode.NONE);
    computer.workers(1);
    computer.program(new OLAPTest.DegreeCounter());
    computer.mapReduce(new OLAPTest.DegreeMapper());
    ComputerResult result = computer.submit().get();
    assertTrue(result.memory().exists(OLAPTest.DegreeMapper.DEGREE_RESULT));
    Map<Long, Integer> degrees = result.memory().get(OLAPTest.DegreeMapper.DEGREE_RESULT);
    assertNotNull(degrees);
    assertEquals(numVertices, degrees.size());
    final IDManager idManager = graph.getIDManager();
    for (Map.Entry<Long, Integer> entry : degrees.entrySet()) {
        long vid = entry.getKey();
        Integer degree = entry.getValue();
        if (idManager.isPartitionedVertex(vid)) {
            // System.out.println("Partitioned: " + degree );
            assertEquals(degreeMap.get(vid), degree);
        } else {
            assertEquals(1, (long) degree);
        }
    }
}
Also used : IDManager(org.janusgraph.graphdb.idmanagement.IDManager) OLAPTest(org.janusgraph.olap.OLAPTest) ComputerResult(org.apache.tinkerpop.gremlin.process.computer.ComputerResult)

Aggregations

ComputerResult (org.apache.tinkerpop.gremlin.process.computer.ComputerResult)11 LabelId (ai.grakn.concept.LabelId)7 AttributeType (ai.grakn.concept.AttributeType)4 List (java.util.List)4 MapReduce (org.apache.tinkerpop.gremlin.process.computer.MapReduce)4 Concept (ai.grakn.concept.Concept)3 ConceptId (ai.grakn.concept.ConceptId)3 SchemaConcept (ai.grakn.concept.SchemaConcept)3 ClusterMemberMapReduce (ai.grakn.graql.internal.analytics.ClusterMemberMapReduce)3 DegreeStatisticsVertexProgram (ai.grakn.graql.internal.analytics.DegreeStatisticsVertexProgram)3 NoResultException (ai.grakn.graql.internal.analytics.NoResultException)3 ComputeJob (ai.grakn.ComputeJob)2 GraknComputer (ai.grakn.GraknComputer)2 Label (ai.grakn.concept.Label)2 Type (ai.grakn.concept.Type)2 GraqlQueryException (ai.grakn.exception.GraqlQueryException)2 ComputeQuery (ai.grakn.graql.ComputeQuery)2 StatisticsQuery (ai.grakn.graql.StatisticsQuery)2 ConnectedComponentQuery (ai.grakn.graql.analytics.ConnectedComponentQuery)2 CorenessQuery (ai.grakn.graql.analytics.CorenessQuery)2