Search in sources :

Example 6 with MaxGraphClient

use of com.alibaba.graphscope.groot.sdk.MaxGraphClient in project GraphScope by alibaba.

the class OfflineBuild method main.

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    String propertiesFile = args[0];
    Properties properties = new Properties();
    try (InputStream is = new FileInputStream(propertiesFile)) {
        properties.load(is);
    }
    String inputPath = properties.getProperty(INPUT_PATH);
    String outputPath = properties.getProperty(OUTPUT_PATH);
    String columnMappingConfigStr = properties.getProperty(COLUMN_MAPPING_CONFIG);
    String graphEndpoint = properties.getProperty(GRAPH_ENDPOINT);
    MaxGraphClient client = MaxGraphClient.newBuilder().setHosts(graphEndpoint).build();
    ObjectMapper objectMapper = new ObjectMapper();
    Map<String, FileColumnMapping> columnMappingConfig = objectMapper.readValue(columnMappingConfigStr, new TypeReference<Map<String, FileColumnMapping>>() {
    });
    List<DataLoadTarget> targets = new ArrayList<>();
    for (FileColumnMapping fileColumnMapping : columnMappingConfig.values()) {
        targets.add(DataLoadTarget.newBuilder().setLabel(fileColumnMapping.getLabel()).setSrcLabel(fileColumnMapping.getSrcLabel()).setDstLabel(fileColumnMapping.getDstLabel()).build());
    }
    GraphSchema schema = client.prepareDataLoad(targets);
    String schemaJson = GraphSchemaMapper.parseFromSchema(schema).toJsonString();
    int partitionNum = client.getPartitionNum();
    Map<String, ColumnMappingInfo> columnMappingInfos = new HashMap<>();
    columnMappingConfig.forEach((fileName, fileColumnMapping) -> {
        columnMappingInfos.put(fileName, fileColumnMapping.toColumnMappingInfo(schema));
    });
    String ldbcCustomize = properties.getProperty(LDBC_CUSTOMIZE, "true");
    long splitSize = Long.valueOf(properties.getProperty(SPLIT_SIZE, "256")) * 1024 * 1024;
    boolean loadAfterBuild = properties.getProperty(LOAD_AFTER_BUILD, "false").equalsIgnoreCase("true");
    boolean skipHeader = properties.getProperty(SKIP_HEADER, "true").equalsIgnoreCase("true");
    Configuration conf = new Configuration();
    conf.setBoolean("mapreduce.map.speculative", false);
    conf.setBoolean("mapreduce.reduce.speculative", false);
    conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERNODE, splitSize);
    conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERRACK, splitSize);
    conf.setStrings(SCHEMA_JSON, schemaJson);
    String mappings = objectMapper.writeValueAsString(columnMappingInfos);
    conf.setStrings(COLUMN_MAPPINGS, mappings);
    conf.setBoolean(LDBC_CUSTOMIZE, ldbcCustomize.equalsIgnoreCase("true"));
    conf.set(SEPARATOR, properties.getProperty(SEPARATOR, "\\|"));
    conf.setBoolean(SKIP_HEADER, skipHeader);
    Job job = Job.getInstance(conf, "build graph data");
    job.setJarByClass(OfflineBuild.class);
    job.setMapperClass(DataBuildMapper.class);
    job.setPartitionerClass(DataBuildPartitioner.class);
    job.setReducerClass(DataBuildReducer.class);
    job.setNumReduceTasks(partitionNum);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(CombineTextInputFormat.class);
    CombineTextInputFormat.setMaxInputSplitSize(job, splitSize);
    LazyOutputFormat.setOutputFormatClass(job, SstOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileInputFormat.setInputDirRecursive(job, true);
    Path outputDir = new Path(outputPath);
    FileOutputFormat.setOutputPath(job, outputDir);
    if (!job.waitForCompletion(true)) {
        System.exit(1);
    }
    FileSystem fs = outputDir.getFileSystem(job.getConfiguration());
    String dataPath = fs.makeQualified(outputDir).toString();
    Map<String, String> outputMeta = new HashMap<>();
    outputMeta.put("endpoint", graphEndpoint);
    outputMeta.put("schema", schemaJson);
    outputMeta.put("mappings", mappings);
    outputMeta.put("datapath", dataPath);
    FSDataOutputStream os = fs.create(new Path(outputDir, "META"));
    os.writeUTF(objectMapper.writeValueAsString(outputMeta));
    os.flush();
    os.close();
    if (loadAfterBuild) {
        logger.info("start ingesting data");
        client.ingestData(dataPath);
        logger.info("commit bulk load");
        Map<Long, DataLoadTarget> tableToTarget = new HashMap<>();
        for (ColumnMappingInfo columnMappingInfo : columnMappingInfos.values()) {
            long tableId = columnMappingInfo.getTableId();
            int labelId = columnMappingInfo.getLabelId();
            GraphElement graphElement = schema.getElement(labelId);
            String label = graphElement.getLabel();
            DataLoadTarget.Builder builder = DataLoadTarget.newBuilder();
            builder.setLabel(label);
            if (graphElement instanceof GraphEdge) {
                builder.setSrcLabel(schema.getElement(columnMappingInfo.getSrcLabelId()).getLabel());
                builder.setDstLabel(schema.getElement(columnMappingInfo.getDstLabelId()).getLabel());
            }
            tableToTarget.put(tableId, builder.build());
        }
        client.commitDataLoad(tableToTarget);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) GraphSchema(com.alibaba.maxgraph.compiler.api.schema.GraphSchema) DataLoadTarget(com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget) FileSystem(org.apache.hadoop.fs.FileSystem) GraphElement(com.alibaba.maxgraph.compiler.api.schema.GraphElement) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Job(org.apache.hadoop.mapreduce.Job) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Path(org.apache.hadoop.fs.Path) MaxGraphClient(com.alibaba.graphscope.groot.sdk.MaxGraphClient) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileInputStream(java.io.FileInputStream) GraphEdge(com.alibaba.maxgraph.compiler.api.schema.GraphEdge)

Aggregations

MaxGraphClient (com.alibaba.graphscope.groot.sdk.MaxGraphClient)6 GraphEdge (com.alibaba.maxgraph.compiler.api.schema.GraphEdge)2 GraphElement (com.alibaba.maxgraph.compiler.api.schema.GraphElement)2 DataLoadTarget (com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget)2 Path (java.nio.file.Path)2 HashMap (java.util.HashMap)2 GraphSchema (com.alibaba.maxgraph.compiler.api.schema.GraphSchema)1 ColumnMappingInfo (com.alibaba.maxgraph.dataload.databuild.ColumnMappingInfo)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileReader (java.io.FileReader)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 Configuration (org.apache.hadoop.conf.Configuration)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 Job (org.apache.hadoop.mapreduce.Job)1