Search in sources :

Example 21 with GraphSchema

use of com.alibaba.maxgraph.compiler.api.schema.GraphSchema in project GraphScope by alibaba.

the class RemoteProxy method getInEdges.

public Iterator<Edge> getInEdges(Set<Vertex> v, String... label) {
    List<Iterator<StoreApi.GraphEdgeReponse>> iterEdgeList = Lists.newArrayList();
    Pair<GraphSchema, Long> schemaPair = schemaFetcher.getSchemaSnapshotPair();
    GraphSchema schema = schemaPair.getLeft();
    long snapshotId = schemaPair.getRight();
    for (Vertex vertex : v) {
        if (label.length == 0) {
            StoreApi.GetInEdgesRequest.Builder req = StoreApi.GetInEdgesRequest.newBuilder();
            req.setSnapshotId(snapshotId).setDstId(vertex.id.id());
            Iterator<StoreApi.GraphEdgeReponse> edgeResponse = stub.withDeadlineAfter(timeout, TimeUnit.SECONDS).getInEdges(req.build());
            iterEdgeList.add(edgeResponse);
        } else {
            for (String labelVal : label) {
                try {
                    GraphElement element = schema.getElement(labelVal);
                    int labelId = element.getLabelId();
                    StoreApi.GetInEdgesRequest.Builder req = StoreApi.GetInEdgesRequest.newBuilder();
                    req.setSnapshotId(snapshotId).setDstId(vertex.id.id()).setTypeId(labelId);
                    Iterator<StoreApi.GraphEdgeReponse> edgeResponse = stub.withDeadlineAfter(timeout, TimeUnit.SECONDS).getInEdges(req.build());
                    iterEdgeList.add(edgeResponse);
                } catch (Exception ignored) {
                }
            }
        }
    }
    return new IteratorList<>(iterEdgeList, new EdgeResponseFunction(schema, this.graph));
}
Also used : Vertex(com.alibaba.maxgraph.structure.Vertex) EdgeResponseFunction(com.alibaba.maxgraph.iterator.function.EdgeResponseFunction) StoreApi(com.alibaba.maxgraph.proto.StoreApi) GraphSchema(com.alibaba.maxgraph.compiler.api.schema.GraphSchema) GraphElement(com.alibaba.maxgraph.compiler.api.schema.GraphElement) IteratorList(com.alibaba.maxgraph.iterator.IteratorList)

Example 22 with GraphSchema

use of com.alibaba.maxgraph.compiler.api.schema.GraphSchema in project GraphScope by alibaba.

the class OfflineBuild method main.

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    String propertiesFile = args[0];
    Properties properties = new Properties();
    try (InputStream is = new FileInputStream(propertiesFile)) {
        properties.load(is);
    }
    String inputPath = properties.getProperty(INPUT_PATH);
    String outputPath = properties.getProperty(OUTPUT_PATH);
    String columnMappingConfigStr = properties.getProperty(COLUMN_MAPPING_CONFIG);
    String graphEndpoint = properties.getProperty(GRAPH_ENDPOINT);
    MaxGraphClient client = MaxGraphClient.newBuilder().setHosts(graphEndpoint).build();
    ObjectMapper objectMapper = new ObjectMapper();
    Map<String, FileColumnMapping> columnMappingConfig = objectMapper.readValue(columnMappingConfigStr, new TypeReference<Map<String, FileColumnMapping>>() {
    });
    List<DataLoadTarget> targets = new ArrayList<>();
    for (FileColumnMapping fileColumnMapping : columnMappingConfig.values()) {
        targets.add(DataLoadTarget.newBuilder().setLabel(fileColumnMapping.getLabel()).setSrcLabel(fileColumnMapping.getSrcLabel()).setDstLabel(fileColumnMapping.getDstLabel()).build());
    }
    GraphSchema schema = client.prepareDataLoad(targets);
    String schemaJson = GraphSchemaMapper.parseFromSchema(schema).toJsonString();
    int partitionNum = client.getPartitionNum();
    Map<String, ColumnMappingInfo> columnMappingInfos = new HashMap<>();
    columnMappingConfig.forEach((fileName, fileColumnMapping) -> {
        columnMappingInfos.put(fileName, fileColumnMapping.toColumnMappingInfo(schema));
    });
    String ldbcCustomize = properties.getProperty(LDBC_CUSTOMIZE, "true");
    long splitSize = Long.valueOf(properties.getProperty(SPLIT_SIZE, "256")) * 1024 * 1024;
    boolean loadAfterBuild = properties.getProperty(LOAD_AFTER_BUILD, "false").equalsIgnoreCase("true");
    boolean skipHeader = properties.getProperty(SKIP_HEADER, "true").equalsIgnoreCase("true");
    Configuration conf = new Configuration();
    conf.setBoolean("mapreduce.map.speculative", false);
    conf.setBoolean("mapreduce.reduce.speculative", false);
    conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERNODE, splitSize);
    conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERRACK, splitSize);
    conf.setStrings(SCHEMA_JSON, schemaJson);
    String mappings = objectMapper.writeValueAsString(columnMappingInfos);
    conf.setStrings(COLUMN_MAPPINGS, mappings);
    conf.setBoolean(LDBC_CUSTOMIZE, ldbcCustomize.equalsIgnoreCase("true"));
    conf.set(SEPARATOR, properties.getProperty(SEPARATOR, "\\|"));
    conf.setBoolean(SKIP_HEADER, skipHeader);
    Job job = Job.getInstance(conf, "build graph data");
    job.setJarByClass(OfflineBuild.class);
    job.setMapperClass(DataBuildMapper.class);
    job.setPartitionerClass(DataBuildPartitioner.class);
    job.setReducerClass(DataBuildReducer.class);
    job.setNumReduceTasks(partitionNum);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(CombineTextInputFormat.class);
    CombineTextInputFormat.setMaxInputSplitSize(job, splitSize);
    LazyOutputFormat.setOutputFormatClass(job, SstOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileInputFormat.setInputDirRecursive(job, true);
    Path outputDir = new Path(outputPath);
    FileOutputFormat.setOutputPath(job, outputDir);
    if (!job.waitForCompletion(true)) {
        System.exit(1);
    }
    FileSystem fs = outputDir.getFileSystem(job.getConfiguration());
    String dataPath = fs.makeQualified(outputDir).toString();
    Map<String, String> outputMeta = new HashMap<>();
    outputMeta.put("endpoint", graphEndpoint);
    outputMeta.put("schema", schemaJson);
    outputMeta.put("mappings", mappings);
    outputMeta.put("datapath", dataPath);
    FSDataOutputStream os = fs.create(new Path(outputDir, "META"));
    os.writeUTF(objectMapper.writeValueAsString(outputMeta));
    os.flush();
    os.close();
    if (loadAfterBuild) {
        logger.info("start ingesting data");
        client.ingestData(dataPath);
        logger.info("commit bulk load");
        Map<Long, DataLoadTarget> tableToTarget = new HashMap<>();
        for (ColumnMappingInfo columnMappingInfo : columnMappingInfos.values()) {
            long tableId = columnMappingInfo.getTableId();
            int labelId = columnMappingInfo.getLabelId();
            GraphElement graphElement = schema.getElement(labelId);
            String label = graphElement.getLabel();
            DataLoadTarget.Builder builder = DataLoadTarget.newBuilder();
            builder.setLabel(label);
            if (graphElement instanceof GraphEdge) {
                builder.setSrcLabel(schema.getElement(columnMappingInfo.getSrcLabelId()).getLabel());
                builder.setDstLabel(schema.getElement(columnMappingInfo.getDstLabelId()).getLabel());
            }
            tableToTarget.put(tableId, builder.build());
        }
        client.commitDataLoad(tableToTarget);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) GraphSchema(com.alibaba.maxgraph.compiler.api.schema.GraphSchema) DataLoadTarget(com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget) FileSystem(org.apache.hadoop.fs.FileSystem) GraphElement(com.alibaba.maxgraph.compiler.api.schema.GraphElement) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Job(org.apache.hadoop.mapreduce.Job) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Path(org.apache.hadoop.fs.Path) MaxGraphClient(com.alibaba.graphscope.groot.sdk.MaxGraphClient) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileInputStream(java.io.FileInputStream) GraphEdge(com.alibaba.maxgraph.compiler.api.schema.GraphEdge)

Example 23 with GraphSchema

use of com.alibaba.maxgraph.compiler.api.schema.GraphSchema in project GraphScope by alibaba.

the class MixedOpProcessor method processQueryTraversal.

private Long processQueryTraversal(String script, Object traversal, long timeout, String queryId, Stopwatch timer, RemoteRpcConnector remoteRpcConnector, RemoteRpcProcessor remoteRpcProcessor, AbstractResultProcessor nettyResultProcessor) throws RetryGremlinException {
    Pair<GraphSchema, Long> snapshotSchema;
    GraphSchema schema;
    Long totalResultNum;
    QueryStatus queryStatus;
    QueryFlowManager queryFlowManager;
    // 保证一查看到snapshotId就开始维护query_status
    synchronized (queryCallbackManager) {
        snapshotSchema = this.schemaFetcher.getSchemaSnapshotPair();
        queryStatus = queryCallbackManager.beforeExecution(snapshotSchema.getRight());
    }
    schema = snapshotSchema.getLeft();
    LogicalPlanOptimizer logicalPlanOptimizer = new LogicalPlanOptimizer(new OptimizeConfig(), this.globalPullGraphFlag, schema, snapshotSchema.getRight(), this.lambdaEnableFlag);
    queryFlowManager = (traversal instanceof GraphTraversal) ? logicalPlanOptimizer.build(GraphTraversal.class.cast(traversal)) : logicalPlanOptimizer.build(DfsTraversal.class.cast(traversal));
    boolean isLambdaExisted = TraversalHelper.anyStepRecursively(s -> s instanceof LambdaHolder, (Traversal.Admin<?, ?>) traversal);
    queryFlowManager.getQueryFlow().setScript(script).setFrontId(serverId);
    if (this.lambdaEnableFlag && isLambdaExisted) {
        queryFlowManager.getQueryFlow().setLambdaExisted(isLambdaExisted);
    }
    nettyResultProcessor.setResultTransform(new GremlinResultTransform(remoteRpcConnector, remoteRpcProcessor, this.graph, queryFlowManager.getResultValueType(), vertexCacheFlag));
    nettyResultProcessor.setLabelIndexNameList(queryFlowManager.getTreeNodeLabelManager().getUserIndexLabelList());
    nettyResultProcessor.setSchema(schema);
    try {
        TimelyQuery timelyQuery = new TimelyQuery(queryFlowManager, nettyResultProcessor, this.graph);
        Logging.query(this.graphName, com.alibaba.maxgraph.proto.RoleType.FRONTEND, this.serverId, queryId, QueryType.EXECUTE, QueryEvent.PLAN_GENERATED, timer.elapsed(TimeUnit.NANOSECONDS), null, null, script);
        timelyExecutor.execute(timelyQuery, schema, timeout, queryId);
        totalResultNum = nettyResultProcessor.total();
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        queryCallbackManager.afterExecution(queryStatus);
    }
    return totalResultNum;
}
Also used : TimelyQuery(com.alibaba.maxgraph.compiler.query.TimelyQuery) DfsTraversal(com.alibaba.maxgraph.compiler.dfs.DfsTraversal) LambdaHolder(org.apache.tinkerpop.gremlin.process.traversal.step.LambdaHolder) DfsTraversal(com.alibaba.maxgraph.compiler.dfs.DfsTraversal) PreparedTraversal(com.alibaba.maxgraph.compiler.prepare.PreparedTraversal) GraphTraversal(org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal) Traversal(org.apache.tinkerpop.gremlin.process.traversal.Traversal) QueryFlowManager(com.alibaba.maxgraph.compiler.optimizer.QueryFlowManager) QueryStatus(com.alibaba.maxgraph.api.query.QueryStatus) GraphSchema(com.alibaba.maxgraph.compiler.api.schema.GraphSchema) RetryGremlinException(com.alibaba.maxgraph.compiler.exception.RetryGremlinException) OptimizeConfig(com.alibaba.maxgraph.compiler.optimizer.OptimizeConfig) LogicalPlanOptimizer(com.alibaba.maxgraph.compiler.optimizer.LogicalPlanOptimizer) GraphTraversal(org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal)

Example 24 with GraphSchema

use of com.alibaba.maxgraph.compiler.api.schema.GraphSchema in project GraphScope by alibaba.

the class MixedOpProcessor method doProcessGraphTraversal.

private Long doProcessGraphTraversal(String script, Context context, Object traversal, long timeout, String queryId, Stopwatch timer) throws Exception {
    Long totalResultNum = 0L;
    Pair<GraphSchema, Long> snapshotSchema;
    GraphSchema schema;
    if (traversal instanceof GraphTraversal || traversal instanceof DfsTraversal) {
        final int resultIterationBatchSize = (Integer) context.getRequestMessage().optionalArgs(Tokens.ARGS_BATCH_SIZE).orElse(this.resultIterationBatchSize);
        NettyVertexRpcProcessor nettyVertexRpcProcessor = new NettyVertexRpcProcessor(context, resultIterationBatchSize, false);
        AbstractResultProcessor nettyResultProcessor = new NettyResultProcessor(queryId, script, context, executeConfig.getBatchQuerySize(), resultIterationBatchSize, false);
        totalResultNum = processQueryTraversal(script, traversal, timeout, queryId, timer, this.remoteRpcConnector, nettyVertexRpcProcessor, nettyResultProcessor);
    } else {
        snapshotSchema = this.schemaFetcher.getSchemaSnapshotPair();
        schema = snapshotSchema.getLeft();
        if (traversal instanceof PreparedTraversal) {
            throw new UnsupportedOperationException();
        } else if (traversal instanceof PreparedExecuteParam) {
            throw new UnsupportedOperationException();
        } else if (traversal instanceof ShowProcessListQuery) {
            TimelyResultProcessor nettyResultProcessor = newNettyResultProcessor(queryId, script, context, graph, schema);
            timelyExecutor.showProcessList(nettyResultProcessor);
        } else if (traversal instanceof CancelDataflow) {
            NettyResultProcessor nettyResultProcessor = newNettyResultProcessor(queryId, script, context, graph, schema);
            timelyExecutor.cancelDataflow(nettyResultProcessor, ((CancelDataflow) traversal).queryId);
        } else if (traversal instanceof RecordManager) {
            Object result = processRecordManager(RecordManager.class.cast(traversal));
            writeResultList(context, Lists.newArrayList(result), ResponseStatusCode.SUCCESS);
        } else if (traversal instanceof EstimateRequest) {
            writeResultList(context, Lists.newArrayList(processEstimateManager((EstimateRequest) traversal, timeout)), ResponseStatusCode.SUCCESS);
        } else if (traversal instanceof StatisticsRequest) {
            CostDataStatistics costDataStatistics = CostDataStatistics.getInstance();
            writeResultList(context, Lists.newArrayList(costDataStatistics.formatJson()), ResponseStatusCode.SUCCESS);
        } else if (traversal instanceof ShowPlanPathListRequest) {
            ShowPlanPathListRequest showPlanPathListRequest = (ShowPlanPathListRequest) traversal;
            writeResultList(context, Lists.newArrayList(buildCostPathList(showPlanPathListRequest.getTraversal())), ResponseStatusCode.SUCCESS);
        } else if (traversal instanceof Element) {
            writeResultList(context, Lists.newArrayList((Element) traversal), ResponseStatusCode.SUCCESS);
        } else if (traversal instanceof List) {
            writeResultList(context, (List) traversal, ResponseStatusCode.SUCCESS);
        } else if (traversal instanceof GraphSchema) {
            writeResultList(context, Lists.newArrayList(((GraphSchema) traversal).formatJson()), ResponseStatusCode.SUCCESS);
        } else if (traversal instanceof String) {
            writeResultList(context, Lists.newArrayList(traversal), ResponseStatusCode.SUCCESS);
        } else if (traversal != null && (!(traversal instanceof String) || !StringUtils.isEmpty(traversal.toString()))) {
            throw new IllegalArgumentException(traversal.toString());
        }
    }
    return totalResultNum;
}
Also used : PreparedTraversal(com.alibaba.maxgraph.compiler.prepare.PreparedTraversal) CancelDataflow(com.alibaba.maxgraph.sdkcommon.graph.CancelDataflow) Element(org.apache.tinkerpop.gremlin.structure.Element) GraphElement(com.alibaba.maxgraph.compiler.api.schema.GraphElement) DfsTraversal(com.alibaba.maxgraph.compiler.dfs.DfsTraversal) TimelyResultProcessor(com.alibaba.maxgraph.rpc.TimelyResultProcessor) GraphSchema(com.alibaba.maxgraph.compiler.api.schema.GraphSchema) CostDataStatistics(com.alibaba.maxgraph.compiler.cost.statistics.CostDataStatistics) GraphTraversal(org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal) ShowProcessListQuery(com.alibaba.maxgraph.sdkcommon.graph.ShowProcessListQuery) List(java.util.List) ShowPlanPathListRequest(com.alibaba.maxgraph.sdkcommon.graph.ShowPlanPathListRequest) EstimateRequest(com.alibaba.maxgraph.sdkcommon.graph.EstimateRequest) StatisticsRequest(com.alibaba.maxgraph.sdkcommon.graph.StatisticsRequest) RecordManager(com.alibaba.maxgraph.structure.manager.record.RecordManager) PreparedExecuteParam(com.alibaba.maxgraph.compiler.prepare.PreparedExecuteParam)

Example 25 with GraphSchema

use of com.alibaba.maxgraph.compiler.api.schema.GraphSchema in project GraphScope by alibaba.

the class MixedOpProcessor method processEstimateManager.

private Object processEstimateManager(EstimateRequest request, long timeout) throws RetryGremlinException {
    Stopwatch timer = Stopwatch.createStarted();
    CostDataStatistics statistics = CostDataStatistics.getInstance();
    TinkerMaxGraph emptyGraph = new TinkerMaxGraph(null, null, null);
    MaxGraphTraversalSource g = (MaxGraphTraversalSource) emptyGraph.traversal();
    GraphSchema graphSchema = schemaFetcher.getSchemaSnapshotPair().getLeft();
    Map<String, Double> vertexCountList = Maps.newHashMap();
    for (GraphElement vertex : graphSchema.getVertexList()) {
        String queryId = String.valueOf(ThreadLocalRandom.current().nextLong());
        GraphTraversal vertexQuery = g.estimateVCount(vertex.getLabel());
        RemoteRpcProcessor remoteRpcProcessor = new DefaultVertexRpcProcessor();
        MemoryResultProcessor resultProcessor = new MemoryResultProcessor(executeConfig.getBatchQuerySize(), resultIterationBatchSize, queryId);
        processQueryTraversal(vertexQuery.toString(), vertexQuery, timeout, queryId, timer, this.httpRpcConnector, remoteRpcProcessor, resultProcessor);
        double countValue = Double.parseDouble(resultProcessor.getResultList().get(0).toString());
        vertexCountList.put(vertex.getLabel(), countValue);
    }
    Map<String, Double> edgeCountList = Maps.newHashMap();
    for (GraphElement edge : graphSchema.getEdgeList()) {
        GraphTraversal edgeQuery = g.estimateECount(edge.getLabel());
        String queryId = String.valueOf(ThreadLocalRandom.current().nextLong());
        RemoteRpcProcessor remoteRpcProcessor = new DefaultVertexRpcProcessor();
        MemoryResultProcessor resultProcessor = new MemoryResultProcessor(executeConfig.getBatchQuerySize(), resultIterationBatchSize, queryId);
        processQueryTraversal(edgeQuery.toString(), edgeQuery, timeout, queryId, timer, this.httpRpcConnector, remoteRpcProcessor, resultProcessor);
        double countValue = Double.parseDouble(resultProcessor.getResultList().get(0).toString());
        edgeCountList.put(edge.getLabel(), countValue);
    }
    for (Map.Entry<String, Double> entry : vertexCountList.entrySet()) {
        statistics.addVertexCount(entry.getKey(), entry.getValue());
    }
    for (Map.Entry<String, Double> entry : edgeCountList.entrySet()) {
        statistics.addEdgeCount(entry.getKey(), entry.getValue());
    }
    return "Estimate vertex/edge count success";
}
Also used : Stopwatch(com.google.common.base.Stopwatch) MaxGraphTraversalSource(com.alibaba.maxgraph.tinkerpop.traversal.MaxGraphTraversalSource) GraphSchema(com.alibaba.maxgraph.compiler.api.schema.GraphSchema) TinkerMaxGraph(com.alibaba.maxgraph.structure.graph.TinkerMaxGraph) CostDataStatistics(com.alibaba.maxgraph.compiler.cost.statistics.CostDataStatistics) GraphTraversal(org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal) GraphElement(com.alibaba.maxgraph.compiler.api.schema.GraphElement) Map(java.util.Map)

Aggregations

GraphSchema (com.alibaba.maxgraph.compiler.api.schema.GraphSchema)26 GraphElement (com.alibaba.maxgraph.compiler.api.schema.GraphElement)12 List (java.util.List)9 Map (java.util.Map)8 Lists (com.google.common.collect.Lists)7 Sets (com.google.common.collect.Sets)7 Set (java.util.Set)7 TreeNode (com.alibaba.maxgraph.compiler.tree.TreeNode)6 Maps (com.google.common.collect.Maps)6 Collectors (java.util.stream.Collectors)6 EdgeRelation (com.alibaba.maxgraph.compiler.api.schema.EdgeRelation)5 GraphEdge (com.alibaba.maxgraph.compiler.api.schema.GraphEdge)5 EdgeOtherVertexTreeNode (com.alibaba.maxgraph.compiler.tree.EdgeOtherVertexTreeNode)5 EdgeTreeNode (com.alibaba.maxgraph.compiler.tree.EdgeTreeNode)5 EdgeVertexTreeNode (com.alibaba.maxgraph.compiler.tree.EdgeVertexTreeNode)5 VertexTreeNode (com.alibaba.maxgraph.compiler.tree.VertexTreeNode)5 SourceTreeNode (com.alibaba.maxgraph.compiler.tree.source.SourceTreeNode)5 Direction (org.apache.tinkerpop.gremlin.structure.Direction)5 JSONObject (com.alibaba.fastjson.JSONObject)4 SchemaFetcher (com.alibaba.maxgraph.compiler.api.schema.SchemaFetcher)4