use of com.alibaba.graphscope.groot.sdk.MaxGraphClient in project GraphScope by alibaba.
the class OfflineBuild method main.
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
String propertiesFile = args[0];
Properties properties = new Properties();
try (InputStream is = new FileInputStream(propertiesFile)) {
properties.load(is);
}
String inputPath = properties.getProperty(INPUT_PATH);
String outputPath = properties.getProperty(OUTPUT_PATH);
String columnMappingConfigStr = properties.getProperty(COLUMN_MAPPING_CONFIG);
String graphEndpoint = properties.getProperty(GRAPH_ENDPOINT);
MaxGraphClient client = MaxGraphClient.newBuilder().setHosts(graphEndpoint).build();
ObjectMapper objectMapper = new ObjectMapper();
Map<String, FileColumnMapping> columnMappingConfig = objectMapper.readValue(columnMappingConfigStr, new TypeReference<Map<String, FileColumnMapping>>() {
});
List<DataLoadTarget> targets = new ArrayList<>();
for (FileColumnMapping fileColumnMapping : columnMappingConfig.values()) {
targets.add(DataLoadTarget.newBuilder().setLabel(fileColumnMapping.getLabel()).setSrcLabel(fileColumnMapping.getSrcLabel()).setDstLabel(fileColumnMapping.getDstLabel()).build());
}
GraphSchema schema = client.prepareDataLoad(targets);
String schemaJson = GraphSchemaMapper.parseFromSchema(schema).toJsonString();
int partitionNum = client.getPartitionNum();
Map<String, ColumnMappingInfo> columnMappingInfos = new HashMap<>();
columnMappingConfig.forEach((fileName, fileColumnMapping) -> {
columnMappingInfos.put(fileName, fileColumnMapping.toColumnMappingInfo(schema));
});
String ldbcCustomize = properties.getProperty(LDBC_CUSTOMIZE, "true");
long splitSize = Long.valueOf(properties.getProperty(SPLIT_SIZE, "256")) * 1024 * 1024;
boolean loadAfterBuild = properties.getProperty(LOAD_AFTER_BUILD, "false").equalsIgnoreCase("true");
boolean skipHeader = properties.getProperty(SKIP_HEADER, "true").equalsIgnoreCase("true");
Configuration conf = new Configuration();
conf.setBoolean("mapreduce.map.speculative", false);
conf.setBoolean("mapreduce.reduce.speculative", false);
conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERNODE, splitSize);
conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERRACK, splitSize);
conf.setStrings(SCHEMA_JSON, schemaJson);
String mappings = objectMapper.writeValueAsString(columnMappingInfos);
conf.setStrings(COLUMN_MAPPINGS, mappings);
conf.setBoolean(LDBC_CUSTOMIZE, ldbcCustomize.equalsIgnoreCase("true"));
conf.set(SEPARATOR, properties.getProperty(SEPARATOR, "\\|"));
conf.setBoolean(SKIP_HEADER, skipHeader);
Job job = Job.getInstance(conf, "build graph data");
job.setJarByClass(OfflineBuild.class);
job.setMapperClass(DataBuildMapper.class);
job.setPartitionerClass(DataBuildPartitioner.class);
job.setReducerClass(DataBuildReducer.class);
job.setNumReduceTasks(partitionNum);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setInputFormatClass(CombineTextInputFormat.class);
CombineTextInputFormat.setMaxInputSplitSize(job, splitSize);
LazyOutputFormat.setOutputFormatClass(job, SstOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileInputFormat.setInputDirRecursive(job, true);
Path outputDir = new Path(outputPath);
FileOutputFormat.setOutputPath(job, outputDir);
if (!job.waitForCompletion(true)) {
System.exit(1);
}
FileSystem fs = outputDir.getFileSystem(job.getConfiguration());
String dataPath = fs.makeQualified(outputDir).toString();
Map<String, String> outputMeta = new HashMap<>();
outputMeta.put("endpoint", graphEndpoint);
outputMeta.put("schema", schemaJson);
outputMeta.put("mappings", mappings);
outputMeta.put("datapath", dataPath);
FSDataOutputStream os = fs.create(new Path(outputDir, "META"));
os.writeUTF(objectMapper.writeValueAsString(outputMeta));
os.flush();
os.close();
if (loadAfterBuild) {
logger.info("start ingesting data");
client.ingestData(dataPath);
logger.info("commit bulk load");
Map<Long, DataLoadTarget> tableToTarget = new HashMap<>();
for (ColumnMappingInfo columnMappingInfo : columnMappingInfos.values()) {
long tableId = columnMappingInfo.getTableId();
int labelId = columnMappingInfo.getLabelId();
GraphElement graphElement = schema.getElement(labelId);
String label = graphElement.getLabel();
DataLoadTarget.Builder builder = DataLoadTarget.newBuilder();
builder.setLabel(label);
if (graphElement instanceof GraphEdge) {
builder.setSrcLabel(schema.getElement(columnMappingInfo.getSrcLabelId()).getLabel());
builder.setDstLabel(schema.getElement(columnMappingInfo.getDstLabelId()).getLabel());
}
tableToTarget.put(tableId, builder.build());
}
client.commitDataLoad(tableToTarget);
}
}
Aggregations