use of com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget in project GraphScope by alibaba.
the class CommitDataCommand method run.
public void run() {
MaxGraphClient client = MaxGraphClient.newBuilder().setHosts(graphEndpoint).build();
Map<Long, DataLoadTarget> tableToTarget = new HashMap<>();
for (ColumnMappingInfo columnMappingInfo : columnMappingInfos.values()) {
long tableId = columnMappingInfo.getTableId();
int labelId = columnMappingInfo.getLabelId();
GraphElement graphElement = schema.getElement(labelId);
String label = graphElement.getLabel();
DataLoadTarget.Builder builder = DataLoadTarget.newBuilder();
builder.setLabel(label);
if (graphElement instanceof GraphEdge) {
builder.setSrcLabel(schema.getElement(columnMappingInfo.getSrcLabelId()).getLabel());
builder.setDstLabel(schema.getElement(columnMappingInfo.getDstLabelId()).getLabel());
}
tableToTarget.put(tableId, builder.build());
}
client.commitDataLoad(tableToTarget);
}
use of com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget in project GraphScope by alibaba.
the class OfflineBuild method main.
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
String propertiesFile = args[0];
Properties properties = new Properties();
try (InputStream is = new FileInputStream(propertiesFile)) {
properties.load(is);
}
String inputPath = properties.getProperty(INPUT_PATH);
String outputPath = properties.getProperty(OUTPUT_PATH);
String columnMappingConfigStr = properties.getProperty(COLUMN_MAPPING_CONFIG);
String graphEndpoint = properties.getProperty(GRAPH_ENDPOINT);
MaxGraphClient client = MaxGraphClient.newBuilder().setHosts(graphEndpoint).build();
ObjectMapper objectMapper = new ObjectMapper();
Map<String, FileColumnMapping> columnMappingConfig = objectMapper.readValue(columnMappingConfigStr, new TypeReference<Map<String, FileColumnMapping>>() {
});
List<DataLoadTarget> targets = new ArrayList<>();
for (FileColumnMapping fileColumnMapping : columnMappingConfig.values()) {
targets.add(DataLoadTarget.newBuilder().setLabel(fileColumnMapping.getLabel()).setSrcLabel(fileColumnMapping.getSrcLabel()).setDstLabel(fileColumnMapping.getDstLabel()).build());
}
GraphSchema schema = client.prepareDataLoad(targets);
String schemaJson = GraphSchemaMapper.parseFromSchema(schema).toJsonString();
int partitionNum = client.getPartitionNum();
Map<String, ColumnMappingInfo> columnMappingInfos = new HashMap<>();
columnMappingConfig.forEach((fileName, fileColumnMapping) -> {
columnMappingInfos.put(fileName, fileColumnMapping.toColumnMappingInfo(schema));
});
String ldbcCustomize = properties.getProperty(LDBC_CUSTOMIZE, "true");
long splitSize = Long.valueOf(properties.getProperty(SPLIT_SIZE, "256")) * 1024 * 1024;
boolean loadAfterBuild = properties.getProperty(LOAD_AFTER_BUILD, "false").equalsIgnoreCase("true");
boolean skipHeader = properties.getProperty(SKIP_HEADER, "true").equalsIgnoreCase("true");
Configuration conf = new Configuration();
conf.setBoolean("mapreduce.map.speculative", false);
conf.setBoolean("mapreduce.reduce.speculative", false);
conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERNODE, splitSize);
conf.setLong(CombineTextInputFormat.SPLIT_MINSIZE_PERRACK, splitSize);
conf.setStrings(SCHEMA_JSON, schemaJson);
String mappings = objectMapper.writeValueAsString(columnMappingInfos);
conf.setStrings(COLUMN_MAPPINGS, mappings);
conf.setBoolean(LDBC_CUSTOMIZE, ldbcCustomize.equalsIgnoreCase("true"));
conf.set(SEPARATOR, properties.getProperty(SEPARATOR, "\\|"));
conf.setBoolean(SKIP_HEADER, skipHeader);
Job job = Job.getInstance(conf, "build graph data");
job.setJarByClass(OfflineBuild.class);
job.setMapperClass(DataBuildMapper.class);
job.setPartitionerClass(DataBuildPartitioner.class);
job.setReducerClass(DataBuildReducer.class);
job.setNumReduceTasks(partitionNum);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setInputFormatClass(CombineTextInputFormat.class);
CombineTextInputFormat.setMaxInputSplitSize(job, splitSize);
LazyOutputFormat.setOutputFormatClass(job, SstOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileInputFormat.setInputDirRecursive(job, true);
Path outputDir = new Path(outputPath);
FileOutputFormat.setOutputPath(job, outputDir);
if (!job.waitForCompletion(true)) {
System.exit(1);
}
FileSystem fs = outputDir.getFileSystem(job.getConfiguration());
String dataPath = fs.makeQualified(outputDir).toString();
Map<String, String> outputMeta = new HashMap<>();
outputMeta.put("endpoint", graphEndpoint);
outputMeta.put("schema", schemaJson);
outputMeta.put("mappings", mappings);
outputMeta.put("datapath", dataPath);
FSDataOutputStream os = fs.create(new Path(outputDir, "META"));
os.writeUTF(objectMapper.writeValueAsString(outputMeta));
os.flush();
os.close();
if (loadAfterBuild) {
logger.info("start ingesting data");
client.ingestData(dataPath);
logger.info("commit bulk load");
Map<Long, DataLoadTarget> tableToTarget = new HashMap<>();
for (ColumnMappingInfo columnMappingInfo : columnMappingInfos.values()) {
long tableId = columnMappingInfo.getTableId();
int labelId = columnMappingInfo.getLabelId();
GraphElement graphElement = schema.getElement(labelId);
String label = graphElement.getLabel();
DataLoadTarget.Builder builder = DataLoadTarget.newBuilder();
builder.setLabel(label);
if (graphElement instanceof GraphEdge) {
builder.setSrcLabel(schema.getElement(columnMappingInfo.getSrcLabelId()).getLabel());
builder.setDstLabel(schema.getElement(columnMappingInfo.getDstLabelId()).getLabel());
}
tableToTarget.put(tableId, builder.build());
}
client.commitDataLoad(tableToTarget);
}
}
use of com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget in project GraphScope by alibaba.
the class CommitDataLoadExecutor method execute.
@Override
public DdlResult execute(ByteString ddlBlob, GraphDef graphDef, int partitionCount) throws InvalidProtocolBufferException {
CommitDataLoadPb commitDataLoadPb = CommitDataLoadPb.parseFrom(ddlBlob);
DataLoadTargetPb dataLoadTargetPb = commitDataLoadPb.getTarget();
DataLoadTarget dataLoadTarget = DataLoadTarget.parseProto(dataLoadTargetPb);
String label = dataLoadTarget.getLabel();
String srcLabel = dataLoadTarget.getSrcLabel();
String dstLabel = dataLoadTarget.getDstLabel();
long version = graphDef.getSchemaVersion();
if (!graphDef.hasLabel(label)) {
throw new DdlException("label [" + label + "] not exists, schema version [" + version + "]");
}
GraphDef.Builder graphDefBuilder = GraphDef.newBuilder(graphDef);
TypeDef typeDef = graphDef.getTypeDef(label);
DataLoadTarget.Builder targetBuilder = DataLoadTarget.newBuilder(dataLoadTarget);
if (srcLabel == null || srcLabel.isEmpty()) {
// Vertex type
if (typeDef.getTypeEnum() != TypeEnum.VERTEX) {
throw new DdlException("invalid data load target [" + dataLoadTarget + "], label is not a vertex");
}
targetBuilder.setLabelId(typeDef.getLabelId());
} else {
// Edge kind
if (typeDef.getTypeEnum() != TypeEnum.EDGE) {
throw new DdlException("invalid data load target [" + dataLoadTarget + "], label is not an edge");
}
EdgeKind.Builder edgeKindBuilder = EdgeKind.newBuilder();
LabelId edgeLabelId = graphDef.getLabelId(label);
if (edgeLabelId == null) {
throw new DdlException("invalid edgeLabel [" + label + "], schema version [" + version + "]");
}
edgeKindBuilder.setEdgeLabelId(edgeLabelId);
targetBuilder.setLabelId(edgeLabelId.getId());
LabelId srcVertexLabelId = graphDef.getLabelId(srcLabel);
if (srcVertexLabelId == null) {
throw new DdlException("invalid srcVertexLabel [" + srcLabel + "], schema version [" + version + "]");
}
edgeKindBuilder.setSrcVertexLabelId(srcVertexLabelId);
targetBuilder.setSrcLabelId(srcVertexLabelId.getId());
LabelId dstVertexLabelId = graphDef.getLabelId(dstLabel);
if (dstVertexLabelId == null) {
throw new DdlException("invalid dstVertexLabel [" + dstLabel + "], schema version [" + version + "]");
}
edgeKindBuilder.setDstVertexLabelId(dstVertexLabelId);
targetBuilder.setDstLabelId(dstVertexLabelId.getId());
EdgeKind edgeKind = edgeKindBuilder.build();
if (!graphDef.hasEdgeKind(edgeKind)) {
throw new DdlException("invalid data load target [" + dataLoadTarget + "], edgeKind not exists");
}
}
version++;
graphDefBuilder.setVersion(version);
GraphDef newGraphDef = graphDefBuilder.build();
List<Operation> operations = new ArrayList<>(partitionCount);
for (int i = 0; i < partitionCount; i++) {
operations.add(new CommitDataLoadOperation(i, version, CommitDataLoadPb.newBuilder().setTableIdx(commitDataLoadPb.getTableIdx()).setTarget(targetBuilder.build().toProto()).build()));
}
return new DdlResult(newGraphDef, operations);
}
use of com.alibaba.maxgraph.sdkcommon.common.DataLoadTarget in project GraphScope by alibaba.
the class Client method prepareDataLoad.
public GraphSchema prepareDataLoad(List<DataLoadTarget> targets) {
PrepareDataLoadRequest.Builder builder = PrepareDataLoadRequest.newBuilder();
for (DataLoadTarget target : targets) {
builder.addDataLoadTargets(target.toProto());
}
PrepareDataLoadResponse response = this.stub.prepareDataLoad(builder.build());
return GraphDef.parseProto(response.getGraphDef());
}
Aggregations