Search in sources :

Example 6 with DataLoadTarget

use of com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget in project GraphScope by alibaba.

the class CommitDataCommand method run.

public void run() {
    MaxGraphClient client = MaxGraphClient.newBuilder().setHosts(graphEndpoint).build();
    Map<Long, DataLoadTarget> tableToTarget = new HashMap<>();
    for (ColumnMappingInfo columnMappingInfo : columnMappingInfos.values()) {
        long tableId = columnMappingInfo.getTableId();
        int labelId = columnMappingInfo.getLabelId();
        GraphElement graphElement = schema.getElement(labelId);
        String label = graphElement.getLabel();
        DataLoadTarget.Builder builder = DataLoadTarget.newBuilder();
        builder.setLabel(label);
        if (graphElement instanceof GraphEdge) {
            builder.setSrcLabel(schema.getElement(columnMappingInfo.getSrcLabelId()).getLabel());
            builder.setDstLabel(schema.getElement(columnMappingInfo.getDstLabelId()).getLabel());
        }
        tableToTarget.put(tableId, builder.build());
    }
    client.commitDataLoad(tableToTarget);
}
Also used : MaxGraphClient(com.alibaba.graphscope.groot.sdk.MaxGraphClient) DataLoadTarget(com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget) ColumnMappingInfo(com.alibaba.maxgraph.dataload.databuild.ColumnMappingInfo) HashMap(java.util.HashMap) GraphElement(com.alibaba.maxgraph.compiler.api.schema.GraphElement) GraphEdge(com.alibaba.maxgraph.compiler.api.schema.GraphEdge)

Example 7 with DataLoadTarget

use of com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget in project GraphScope by alibaba.

the class OfflineBuild method main.

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    String propertiesFile = args[0];
    Properties properties = new Properties();
    try (InputStream is = new FileInputStream(propertiesFile)) {
        properties.load(is);
    }
    String inputPath = properties.getProperty(INPUT_PATH);
    String outputPath = properties.getProperty(OUTPUT_PATH);
    String columnMappingConfigStr = properties.getProperty(COLUMN_MAPPING_CONFIG);
    String graphEndpoint = properties.getProperty(GRAPH_ENDPOINT);
    MaxGraphClient client = MaxGraphClient.newBuilder().setHosts(graphEndpoint).build();
    ObjectMapper objectMapper = new ObjectMapper();
    Map<String, FileColumnMapping> columnMappingConfig = objectMapper.readValue(columnMappingConfigStr, new TypeReference<Map<String, FileColumnMapping>>() {
    });
    List<DataLoadTarget> targets = new ArrayList<>();
    for (FileColumnMapping fileColumnMapping : columnMappingConfig.values()) {
        targets.add(DataLoadTarget.newBuilder().setLabel(fileColumnMapping.getLabel()).setSrcLabel(fileColumnMapping.getSrcLabel()).setDstLabel(fileColumnMapping.getDstLabel()).build());
    }
    GraphSchema schema = client.prepareDataLoad(targets);
    String schemaJson = GraphSchemaMapper.parseFromSchema(schema).toJsonString();
    int partitionNum = client.getPartitionNum();
    Map<String, ColumnMappingInfo> columnMappingInfos = new HashMap<>();
    columnMappingConfig.forEach((fileName, fileColumnMapping) -> {
        columnMappingInfos.put(fileName, fileColumnMapping.toColumnMappingInfo(schema));
    });
    String ldbcCustomize = properties.getProperty(LDBC_CUSTOMIZE, "true");
    long splitSize = Long.valueOf(properties.getProperty(SPLIT_SIZE, "256")) * 1024 * 1024;
    boolean loadAfterBuild = properties.getProperty(LOAD_AFTER_BUILD, "false").equalsIgnoreCase("true");
    boolean skipHeader = properties.getProperty(SKIP_HEADER, "true").equalsIgnoreCase("true");
    Configuration conf = new Configuration();
    conf.setBoolean("mapreduce.map.speculative", false);
    conf.setBoolean("mapreduce.reduce.speculative", false);
    conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERNODE, splitSize);
    conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERRACK, splitSize);
    conf.setStrings(SCHEMA_JSON, schemaJson);
    String mappings = objectMapper.writeValueAsString(columnMappingInfos);
    conf.setStrings(COLUMN_MAPPINGS, mappings);
    conf.setBoolean(LDBC_CUSTOMIZE, ldbcCustomize.equalsIgnoreCase("true"));
    conf.set(SEPARATOR, properties.getProperty(SEPARATOR, "\\|"));
    conf.setBoolean(SKIP_HEADER, skipHeader);
    Job job = Job.getInstance(conf, "build graph data");
    job.setJarByClass(OfflineBuild.class);
    job.setMapperClass(DataBuildMapper.class);
    job.setPartitionerClass(DataBuildPartitioner.class);
    job.setReducerClass(DataBuildReducer.class);
    job.setNumReduceTasks(partitionNum);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(CombineTextInputFormat.class);
    CombineTextInputFormat.setMaxInputSplitSize(job, splitSize);
    LazyOutputFormat.setOutputFormatClass(job, SstOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileInputFormat.setInputDirRecursive(job, true);
    Path outputDir = new Path(outputPath);
    FileOutputFormat.setOutputPath(job, outputDir);
    if (!job.waitForCompletion(true)) {
        System.exit(1);
    }
    FileSystem fs = outputDir.getFileSystem(job.getConfiguration());
    String dataPath = fs.makeQualified(outputDir).toString();
    Map<String, String> outputMeta = new HashMap<>();
    outputMeta.put("endpoint", graphEndpoint);
    outputMeta.put("schema", schemaJson);
    outputMeta.put("mappings", mappings);
    outputMeta.put("datapath", dataPath);
    FSDataOutputStream os = fs.create(new Path(outputDir, "META"));
    os.writeUTF(objectMapper.writeValueAsString(outputMeta));
    os.flush();
    os.close();
    if (loadAfterBuild) {
        logger.info("start ingesting data");
        client.ingestData(dataPath);
        logger.info("commit bulk load");
        Map<Long, DataLoadTarget> tableToTarget = new HashMap<>();
        for (ColumnMappingInfo columnMappingInfo : columnMappingInfos.values()) {
            long tableId = columnMappingInfo.getTableId();
            int labelId = columnMappingInfo.getLabelId();
            GraphElement graphElement = schema.getElement(labelId);
            String label = graphElement.getLabel();
            DataLoadTarget.Builder builder = DataLoadTarget.newBuilder();
            builder.setLabel(label);
            if (graphElement instanceof GraphEdge) {
                builder.setSrcLabel(schema.getElement(columnMappingInfo.getSrcLabelId()).getLabel());
                builder.setDstLabel(schema.getElement(columnMappingInfo.getDstLabelId()).getLabel());
            }
            tableToTarget.put(tableId, builder.build());
        }
        client.commitDataLoad(tableToTarget);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) GraphSchema(com.alibaba.maxgraph.compiler.api.schema.GraphSchema) DataLoadTarget(com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget) FileSystem(org.apache.hadoop.fs.FileSystem) GraphElement(com.alibaba.maxgraph.compiler.api.schema.GraphElement) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Job(org.apache.hadoop.mapreduce.Job) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Path(org.apache.hadoop.fs.Path) MaxGraphClient(com.alibaba.graphscope.groot.sdk.MaxGraphClient) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileInputStream(java.io.FileInputStream) GraphEdge(com.alibaba.maxgraph.compiler.api.schema.GraphEdge)

Example 8 with DataLoadTarget

use of com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget in project GraphScope by alibaba.

the class CommitDataLoadExecutor method execute.

@Override
public DdlResult execute(ByteString ddlBlob, GraphDef graphDef, int partitionCount) throws InvalidProtocolBufferException {
    CommitDataLoadPb commitDataLoadPb = CommitDataLoadPb.parseFrom(ddlBlob);
    DataLoadTargetPb dataLoadTargetPb = commitDataLoadPb.getTarget();
    DataLoadTarget dataLoadTarget = DataLoadTarget.parseProto(dataLoadTargetPb);
    String label = dataLoadTarget.getLabel();
    String srcLabel = dataLoadTarget.getSrcLabel();
    String dstLabel = dataLoadTarget.getDstLabel();
    long version = graphDef.getSchemaVersion();
    if (!graphDef.hasLabel(label)) {
        throw new DdlException("label [" + label + "] not exists, schema version [" + version + "]");
    }
    GraphDef.Builder graphDefBuilder = GraphDef.newBuilder(graphDef);
    TypeDef typeDef = graphDef.getTypeDef(label);
    DataLoadTarget.Builder targetBuilder = DataLoadTarget.newBuilder(dataLoadTarget);
    if (srcLabel == null || srcLabel.isEmpty()) {
        // Vertex type
        if (typeDef.getTypeEnum() != TypeEnum.VERTEX) {
            throw new DdlException("invalid data load target [" + dataLoadTarget + "], label is not a vertex");
        }
        targetBuilder.setLabelId(typeDef.getLabelId());
    } else {
        // Edge kind
        if (typeDef.getTypeEnum() != TypeEnum.EDGE) {
            throw new DdlException("invalid data load target [" + dataLoadTarget + "], label is not an edge");
        }
        EdgeKind.Builder edgeKindBuilder = EdgeKind.newBuilder();
        LabelId edgeLabelId = graphDef.getLabelId(label);
        if (edgeLabelId == null) {
            throw new DdlException("invalid edgeLabel [" + label + "], schema version [" + version + "]");
        }
        edgeKindBuilder.setEdgeLabelId(edgeLabelId);
        targetBuilder.setLabelId(edgeLabelId.getId());
        LabelId srcVertexLabelId = graphDef.getLabelId(srcLabel);
        if (srcVertexLabelId == null) {
            throw new DdlException("invalid srcVertexLabel [" + srcLabel + "], schema version [" + version + "]");
        }
        edgeKindBuilder.setSrcVertexLabelId(srcVertexLabelId);
        targetBuilder.setSrcLabelId(srcVertexLabelId.getId());
        LabelId dstVertexLabelId = graphDef.getLabelId(dstLabel);
        if (dstVertexLabelId == null) {
            throw new DdlException("invalid dstVertexLabel [" + dstLabel + "], schema version [" + version + "]");
        }
        edgeKindBuilder.setDstVertexLabelId(dstVertexLabelId);
        targetBuilder.setDstLabelId(dstVertexLabelId.getId());
        EdgeKind edgeKind = edgeKindBuilder.build();
        if (!graphDef.hasEdgeKind(edgeKind)) {
            throw new DdlException("invalid data load target [" + dataLoadTarget + "], edgeKind not exists");
        }
    }
    version++;
    graphDefBuilder.setVersion(version);
    GraphDef newGraphDef = graphDefBuilder.build();
    List<Operation> operations = new ArrayList<>(partitionCount);
    for (int i = 0; i < partitionCount; i++) {
        operations.add(new CommitDataLoadOperation(i, version, CommitDataLoadPb.newBuilder().setTableIdx(commitDataLoadPb.getTableIdx()).setTarget(targetBuilder.build().toProto()).build()));
    }
    return new DdlResult(newGraphDef, operations);
}
Also used : DdlException(com.alibaba.graphscope.groot.schema.request.DdlException) EdgeKind(com.alibaba.maxgraph.sdkcommon.schema.EdgeKind) CommitDataLoadOperation(com.alibaba.graphscope.groot.operation.ddl.CommitDataLoadOperation) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) Operation(com.alibaba.graphscope.groot.operation.Operation) CommitDataLoadOperation(com.alibaba.graphscope.groot.operation.ddl.CommitDataLoadOperation) CommitDataLoadPb(com.alibaba.maxgraph.proto.CommitDataLoadPb) DataLoadTargetPb(com.alibaba.maxgraph.proto.DataLoadTargetPb) GraphDef(com.alibaba.maxgraph.sdkcommon.schema.GraphDef) DataLoadTarget(com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget) TypeDef(com.alibaba.maxgraph.sdkcommon.schema.TypeDef) LabelId(com.alibaba.maxgraph.sdkcommon.schema.LabelId)

Example 9 with DataLoadTarget

use of com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget in project GraphScope by alibaba.

the class Client method prepareDataLoad.

public GraphSchema prepareDataLoad(List<DataLoadTarget> targets) {
    PrepareDataLoadRequest.Builder builder = PrepareDataLoadRequest.newBuilder();
    for (DataLoadTarget target : targets) {
        builder.addDataLoadTargets(target.toProto());
    }
    PrepareDataLoadResponse response = this.stub.prepareDataLoad(builder.build());
    return GraphDef.parseProto(response.getGraphDef());
}
Also used : DataLoadTarget(com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget)

Aggregations

DataLoadTarget (com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget)9 DataLoadTargetPb (com.alibaba.maxgraph.proto.DataLoadTargetPb)4 Operation (com.alibaba.graphscope.groot.operation.Operation)2 com.alibaba.graphscope.groot.schema.request (com.alibaba.graphscope.groot.schema.request)2 DdlException (com.alibaba.graphscope.groot.schema.request.DdlException)2 MaxGraphClient (com.alibaba.graphscope.groot.sdk.MaxGraphClient)2 GraphEdge (com.alibaba.maxgraph.compiler.api.schema.GraphEdge)2 GraphElement (com.alibaba.maxgraph.compiler.api.schema.GraphElement)2 com.alibaba.maxgraph.proto.groot (com.alibaba.maxgraph.proto.groot)2 EdgeKind (com.alibaba.maxgraph.sdkcommon.schema.EdgeKind)2 GraphDef (com.alibaba.maxgraph.sdkcommon.schema.GraphDef)2 LabelId (com.alibaba.maxgraph.sdkcommon.schema.LabelId)2 TypeDef (com.alibaba.maxgraph.sdkcommon.schema.TypeDef)2 ByteString (com.google.protobuf.ByteString)2 ArrayList (java.util.ArrayList)2 CommitDataLoadOperation (com.alibaba.graphscope.groot.operation.ddl.CommitDataLoadOperation)1 PrepareDataLoadOperation (com.alibaba.graphscope.groot.operation.ddl.PrepareDataLoadOperation)1 GraphSchema (com.alibaba.maxgraph.compiler.api.schema.GraphSchema)1 ColumnMappingInfo (com.alibaba.maxgraph.dataload.databuild.ColumnMappingInfo)1 CommitDataLoadPb (com.alibaba.maxgraph.proto.CommitDataLoadPb)1