Search in sources :

Example 16 with GraphEdge

use of com.alibaba.maxgraph.compiler.api.schema.GraphEdge in project GraphScope by alibaba.

the class CommitDataCommand method run.

public void run() {
    MaxGraphClient client = MaxGraphClient.newBuilder().setHosts(graphEndpoint).build();
    Map<Long, DataLoadTarget> tableToTarget = new HashMap<>();
    for (ColumnMappingInfo columnMappingInfo : columnMappingInfos.values()) {
        long tableId = columnMappingInfo.getTableId();
        int labelId = columnMappingInfo.getLabelId();
        GraphElement graphElement = schema.getElement(labelId);
        String label = graphElement.getLabel();
        DataLoadTarget.Builder builder = DataLoadTarget.newBuilder();
        builder.setLabel(label);
        if (graphElement instanceof GraphEdge) {
            builder.setSrcLabel(schema.getElement(columnMappingInfo.getSrcLabelId()).getLabel());
            builder.setDstLabel(schema.getElement(columnMappingInfo.getDstLabelId()).getLabel());
        }
        tableToTarget.put(tableId, builder.build());
    }
    client.commitDataLoad(tableToTarget);
}
Also used : MaxGraphClient(com.alibaba.graphscope.groot.sdk.MaxGraphClient) DataLoadTarget(com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget) ColumnMappingInfo(com.alibaba.maxgraph.dataload.databuild.ColumnMappingInfo) HashMap(java.util.HashMap) GraphElement(com.alibaba.maxgraph.compiler.api.schema.GraphElement) GraphEdge(com.alibaba.maxgraph.compiler.api.schema.GraphEdge)

Example 17 with GraphEdge

use of com.alibaba.maxgraph.compiler.api.schema.GraphEdge in project GraphScope by alibaba.

the class OfflineBuild method main.

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    String propertiesFile = args[0];
    Properties properties = new Properties();
    try (InputStream is = new FileInputStream(propertiesFile)) {
        properties.load(is);
    }
    String inputPath = properties.getProperty(INPUT_PATH);
    String outputPath = properties.getProperty(OUTPUT_PATH);
    String columnMappingConfigStr = properties.getProperty(COLUMN_MAPPING_CONFIG);
    String graphEndpoint = properties.getProperty(GRAPH_ENDPOINT);
    MaxGraphClient client = MaxGraphClient.newBuilder().setHosts(graphEndpoint).build();
    ObjectMapper objectMapper = new ObjectMapper();
    Map<String, FileColumnMapping> columnMappingConfig = objectMapper.readValue(columnMappingConfigStr, new TypeReference<Map<String, FileColumnMapping>>() {
    });
    List<DataLoadTarget> targets = new ArrayList<>();
    for (FileColumnMapping fileColumnMapping : columnMappingConfig.values()) {
        targets.add(DataLoadTarget.newBuilder().setLabel(fileColumnMapping.getLabel()).setSrcLabel(fileColumnMapping.getSrcLabel()).setDstLabel(fileColumnMapping.getDstLabel()).build());
    }
    GraphSchema schema = client.prepareDataLoad(targets);
    String schemaJson = GraphSchemaMapper.parseFromSchema(schema).toJsonString();
    int partitionNum = client.getPartitionNum();
    Map<String, ColumnMappingInfo> columnMappingInfos = new HashMap<>();
    columnMappingConfig.forEach((fileName, fileColumnMapping) -> {
        columnMappingInfos.put(fileName, fileColumnMapping.toColumnMappingInfo(schema));
    });
    String ldbcCustomize = properties.getProperty(LDBC_CUSTOMIZE, "true");
    long splitSize = Long.valueOf(properties.getProperty(SPLIT_SIZE, "256")) * 1024 * 1024;
    boolean loadAfterBuild = properties.getProperty(LOAD_AFTER_BUILD, "false").equalsIgnoreCase("true");
    boolean skipHeader = properties.getProperty(SKIP_HEADER, "true").equalsIgnoreCase("true");
    Configuration conf = new Configuration();
    conf.setBoolean("mapreduce.map.speculative", false);
    conf.setBoolean("mapreduce.reduce.speculative", false);
    conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERNODE, splitSize);
    conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERRACK, splitSize);
    conf.setStrings(SCHEMA_JSON, schemaJson);
    String mappings = objectMapper.writeValueAsString(columnMappingInfos);
    conf.setStrings(COLUMN_MAPPINGS, mappings);
    conf.setBoolean(LDBC_CUSTOMIZE, ldbcCustomize.equalsIgnoreCase("true"));
    conf.set(SEPARATOR, properties.getProperty(SEPARATOR, "\\|"));
    conf.setBoolean(SKIP_HEADER, skipHeader);
    Job job = Job.getInstance(conf, "build graph data");
    job.setJarByClass(OfflineBuild.class);
    job.setMapperClass(DataBuildMapper.class);
    job.setPartitionerClass(DataBuildPartitioner.class);
    job.setReducerClass(DataBuildReducer.class);
    job.setNumReduceTasks(partitionNum);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(CombineTextInputFormat.class);
    CombineTextInputFormat.setMaxInputSplitSize(job, splitSize);
    LazyOutputFormat.setOutputFormatClass(job, SstOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileInputFormat.setInputDirRecursive(job, true);
    Path outputDir = new Path(outputPath);
    FileOutputFormat.setOutputPath(job, outputDir);
    if (!job.waitForCompletion(true)) {
        System.exit(1);
    }
    FileSystem fs = outputDir.getFileSystem(job.getConfiguration());
    String dataPath = fs.makeQualified(outputDir).toString();
    Map<String, String> outputMeta = new HashMap<>();
    outputMeta.put("endpoint", graphEndpoint);
    outputMeta.put("schema", schemaJson);
    outputMeta.put("mappings", mappings);
    outputMeta.put("datapath", dataPath);
    FSDataOutputStream os = fs.create(new Path(outputDir, "META"));
    os.writeUTF(objectMapper.writeValueAsString(outputMeta));
    os.flush();
    os.close();
    if (loadAfterBuild) {
        logger.info("start ingesting data");
        client.ingestData(dataPath);
        logger.info("commit bulk load");
        Map<Long, DataLoadTarget> tableToTarget = new HashMap<>();
        for (ColumnMappingInfo columnMappingInfo : columnMappingInfos.values()) {
            long tableId = columnMappingInfo.getTableId();
            int labelId = columnMappingInfo.getLabelId();
            GraphElement graphElement = schema.getElement(labelId);
            String label = graphElement.getLabel();
            DataLoadTarget.Builder builder = DataLoadTarget.newBuilder();
            builder.setLabel(label);
            if (graphElement instanceof GraphEdge) {
                builder.setSrcLabel(schema.getElement(columnMappingInfo.getSrcLabelId()).getLabel());
                builder.setDstLabel(schema.getElement(columnMappingInfo.getDstLabelId()).getLabel());
            }
            tableToTarget.put(tableId, builder.build());
        }
        client.commitDataLoad(tableToTarget);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) GraphSchema(com.alibaba.maxgraph.compiler.api.schema.GraphSchema) DataLoadTarget(com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget) FileSystem(org.apache.hadoop.fs.FileSystem) GraphElement(com.alibaba.maxgraph.compiler.api.schema.GraphElement) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Job(org.apache.hadoop.mapreduce.Job) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Path(org.apache.hadoop.fs.Path) MaxGraphClient(com.alibaba.graphscope.groot.sdk.MaxGraphClient) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileInputStream(java.io.FileInputStream) GraphEdge(com.alibaba.maxgraph.compiler.api.schema.GraphEdge)

Aggregations

GraphEdge (com.alibaba.maxgraph.compiler.api.schema.GraphEdge)17 EdgeRelation (com.alibaba.maxgraph.compiler.api.schema.EdgeRelation)6 GraphElement (com.alibaba.maxgraph.compiler.api.schema.GraphElement)6 JSONObject (com.alibaba.fastjson.JSONObject)5 GraphSchema (com.alibaba.maxgraph.compiler.api.schema.GraphSchema)5 GraphVertex (com.alibaba.maxgraph.compiler.api.schema.GraphVertex)5 Step (org.apache.tinkerpop.gremlin.process.traversal.Step)5 HasStep (org.apache.tinkerpop.gremlin.process.traversal.step.filter.HasStep)5 VertexStep (org.apache.tinkerpop.gremlin.process.traversal.step.map.VertexStep)5 HasContainer (org.apache.tinkerpop.gremlin.process.traversal.step.util.HasContainer)5 SchemaFetcher (com.alibaba.maxgraph.compiler.api.schema.SchemaFetcher)4 EdgeOtherVertexTreeNode (com.alibaba.maxgraph.compiler.tree.EdgeOtherVertexTreeNode)4 EdgeTreeNode (com.alibaba.maxgraph.compiler.tree.EdgeTreeNode)4 EdgeVertexTreeNode (com.alibaba.maxgraph.compiler.tree.EdgeVertexTreeNode)4 NodeType (com.alibaba.maxgraph.compiler.tree.NodeType)4 TreeNode (com.alibaba.maxgraph.compiler.tree.TreeNode)4 VertexTreeNode (com.alibaba.maxgraph.compiler.tree.VertexTreeNode)4 SourceTreeNode (com.alibaba.maxgraph.compiler.tree.source.SourceTreeNode)4 Lists (com.google.common.collect.Lists)4 Maps (com.google.common.collect.Maps)4