use of com.baidu.hugegraph.loader.mapping.ElementMapping in project incubator-hugegraph-toolchain by apache.
the class ElementBuilder method retainField.
/**
* Retain only the key-value pairs needed by the current vertex or edge
*/
protected boolean retainField(String fieldName, Object fieldValue) {
ElementMapping mapping = this.mapping();
Set<String> selectedFields = mapping.selectedFields();
Set<String> ignoredFields = mapping.ignoredFields();
// Retain selected fields or remove ignored fields
if (!selectedFields.isEmpty() && !selectedFields.contains(fieldName)) {
return false;
}
if (!ignoredFields.isEmpty() && ignoredFields.contains(fieldName)) {
return false;
}
String mappedKey = mapping.mappingField(fieldName);
Set<String> nullableKeys = this.schemaLabel().nullableKeys();
Set<Object> nullValues = mapping.nullValues();
if (nullableKeys.isEmpty() || nullValues.isEmpty()) {
return true;
}
return !nullableKeys.contains(mappedKey) || !nullValues.contains(fieldValue);
}
use of com.baidu.hugegraph.loader.mapping.ElementMapping in project incubator-hugegraph-toolchain by apache.
the class MappingUtil method parseV1.
private static LoadMapping parseV1(String json) {
GraphStructV1 graphStruct = JsonUtil.fromJson(json, GraphStructV1.class);
Map<FileSourceKey, InputStruct> fileSourceInputStructs = InsertionOrderUtil.newMap();
List<InputStruct> jdbcSourceInputStructs = new ArrayList<>();
for (ElementStructV1 originStruct : graphStruct.structs()) {
InputSource inputSource = originStruct.input();
ElementMapping targetStruct = convertV1ToV2(originStruct);
SourceType type = inputSource.type();
if (type == SourceType.FILE || type == SourceType.HDFS) {
FileSource source = (FileSource) inputSource;
FileSourceKey key = new FileSourceKey(type, source.path());
fileSourceInputStructs.compute(key, (k, inputStruct) -> {
if (inputStruct == null) {
inputStruct = new InputStruct(null, null);
inputStruct.input(source);
}
inputStruct.add(targetStruct);
return inputStruct;
});
} else {
assert type == SourceType.JDBC;
InputStruct inputStruct = new InputStruct(null, null);
inputStruct.input(inputSource);
inputStruct.add(targetStruct);
jdbcSourceInputStructs.add(inputStruct);
}
}
// Generate id for every input mapping
List<InputStruct> inputStructs = new ArrayList<>();
int id = 0;
for (InputStruct inputStruct : fileSourceInputStructs.values()) {
inputStruct.id(String.valueOf(++id));
inputStructs.add(inputStruct);
}
for (InputStruct inputStruct : jdbcSourceInputStructs) {
inputStruct.id(String.valueOf(++id));
inputStructs.add(inputStruct);
}
return new LoadMapping(inputStructs);
}
use of com.baidu.hugegraph.loader.mapping.ElementMapping in project incubator-hugegraph-toolchain by apache.
the class MappingUtil method convertV1ToV2.
private static ElementMapping convertV1ToV2(ElementStructV1 origin) {
ElementMapping target;
if (origin.type().isVertex()) {
VertexStructV1 originVertex = (VertexStructV1) origin;
target = new VertexMapping(originVertex.idField(), originVertex.unfold());
} else {
EdgeStructV1 originEdge = (EdgeStructV1) origin;
target = new EdgeMapping(originEdge.sourceFields(), originEdge.unfoldSource(), originEdge.targetFields(), originEdge.unfoldTarget());
}
fill(origin, target);
return target;
}
use of com.baidu.hugegraph.loader.mapping.ElementMapping in project incubator-hugegraph-toolchain by apache.
the class ParseTaskBuilder method buildTask.
private ParseTask buildTask(ElementBuilder builder, List<Line> lines) {
final LoadMetrics metrics = this.context.summary().metrics(this.struct);
final int batchSize = this.context.options().batchSize;
final ElementMapping mapping = builder.mapping();
final boolean needRemoveId = builder instanceof VertexBuilder && ((VertexLabel) builder.schemaLabel()).idStrategy().isPrimaryKey();
return new ParseTask(mapping, () -> {
List<List<Record>> batches = new ArrayList<>();
// One batch record
List<Record> records = new ArrayList<>(batchSize);
int count = 0;
for (Line line : lines) {
try {
// NOTE: don't remove entry in keyValues
@SuppressWarnings("unchecked") List<GraphElement> elements = builder.build(line.names(), line.values());
E.checkState(elements.size() <= batchSize, "The number of columns in a line cannot " + "exceed the size of a batch, but got %s > %s", elements.size(), batchSize);
// Prevent batch size from exceeding limit
if (records.size() + elements.size() > batchSize) {
LOG.debug("Create a new batch for {}", mapping);
// Add current batch and create a new batch
batches.add(records);
records = new ArrayList<>(batchSize);
}
for (GraphElement element : elements) {
if (needRemoveId) {
((Vertex) element).id(null);
}
records.add(new Record(line.rawLine(), element));
count++;
}
} catch (IllegalArgumentException e) {
metrics.increaseParseFailure(mapping);
ParseException pe = new ParseException(line.rawLine(), e);
this.handleParseFailure(mapping, pe);
}
}
if (!records.isEmpty()) {
batches.add(records);
}
metrics.plusParseSuccess(mapping, count);
return batches;
});
}
Aggregations