use of com.baidu.hugegraph.loader.metrics.LoadMetrics in project incubator-hugegraph-toolchain by apache.
the class HugeGraphLoader method loadStruct.
/**
* TODO: Seperate classes: ReadHandler -> ParseHandler -> InsertHandler
* Let load task worked in pipeline mode
*/
private void loadStruct(InputStruct struct, InputReader reader) {
LOG.info("Start loading '{}'", struct);
LoadMetrics metrics = this.context.summary().metrics(struct);
metrics.startInFlight();
ParseTaskBuilder taskBuilder = new ParseTaskBuilder(this.context, struct);
final int batchSize = this.context.options().batchSize;
List<Line> lines = new ArrayList<>(batchSize);
for (boolean finished = false; !finished; ) {
if (this.context.stopped()) {
break;
}
try {
// Read next line from data source
if (reader.hasNext()) {
lines.add(reader.next());
metrics.increaseReadSuccess();
} else {
finished = true;
}
} catch (ReadException e) {
metrics.increaseReadFailure();
this.handleReadFailure(struct, e);
}
// If readed max allowed lines, stop loading
boolean reachedMaxReadLines = this.reachedMaxReadLines();
if (reachedMaxReadLines) {
finished = true;
}
if (lines.size() >= batchSize || finished) {
List<ParseTask> tasks = taskBuilder.build(lines);
for (ParseTask task : tasks) {
this.executeParseTask(struct, task.mapping(), task);
}
// Confirm offset to avoid lost records
reader.confirmOffset();
this.context.newProgress().markLoaded(struct, finished);
this.handleParseFailure();
if (reachedMaxReadLines) {
LOG.warn("Read lines exceed limit, stopped loading tasks");
this.context.stopLoading();
}
lines = new ArrayList<>(batchSize);
}
}
metrics.stopInFlight();
LOG.info("Finish loading '{}'", struct);
}
use of com.baidu.hugegraph.loader.metrics.LoadMetrics in project incubator-hugegraph-toolchain by apache.
the class ParseTaskBuilder method buildTask.
private ParseTask buildTask(ElementBuilder builder, List<Line> lines) {
final LoadMetrics metrics = this.context.summary().metrics(this.struct);
final int batchSize = this.context.options().batchSize;
final ElementMapping mapping = builder.mapping();
final boolean needRemoveId = builder instanceof VertexBuilder && ((VertexLabel) builder.schemaLabel()).idStrategy().isPrimaryKey();
return new ParseTask(mapping, () -> {
List<List<Record>> batches = new ArrayList<>();
// One batch record
List<Record> records = new ArrayList<>(batchSize);
int count = 0;
for (Line line : lines) {
try {
// NOTE: don't remove entry in keyValues
@SuppressWarnings("unchecked") List<GraphElement> elements = builder.build(line.names(), line.values());
E.checkState(elements.size() <= batchSize, "The number of columns in a line cannot " + "exceed the size of a batch, but got %s > %s", elements.size(), batchSize);
// Prevent batch size from exceeding limit
if (records.size() + elements.size() > batchSize) {
LOG.debug("Create a new batch for {}", mapping);
// Add current batch and create a new batch
batches.add(records);
records = new ArrayList<>(batchSize);
}
for (GraphElement element : elements) {
if (needRemoveId) {
((Vertex) element).id(null);
}
records.add(new Record(line.rawLine(), element));
count++;
}
} catch (IllegalArgumentException e) {
metrics.increaseParseFailure(mapping);
ParseException pe = new ParseException(line.rawLine(), e);
this.handleParseFailure(mapping, pe);
}
}
if (!records.isEmpty()) {
batches.add(records);
}
metrics.plusParseSuccess(mapping, count);
return batches;
});
}
use of com.baidu.hugegraph.loader.metrics.LoadMetrics in project incubator-hugegraph-toolchain by apache.
the class InsertTask method plusLoadSuccess.
protected void plusLoadSuccess(int count) {
LoadMetrics metrics = this.summary().metrics(this.struct);
metrics.plusInsertSuccess(this.mapping, count);
this.summary().plusLoaded(this.type(), count);
}
Aggregations