use of com.baidu.hugegraph.loader.exception.ReadException in project incubator-hugegraph-toolchain by apache.
the class HugeGraphLoader method loadStruct.
/**
* TODO: Seperate classes: ReadHandler -> ParseHandler -> InsertHandler
* Let load task worked in pipeline mode
*/
private void loadStruct(InputStruct struct, InputReader reader) {
LOG.info("Start loading '{}'", struct);
LoadMetrics metrics = this.context.summary().metrics(struct);
metrics.startInFlight();
ParseTaskBuilder taskBuilder = new ParseTaskBuilder(this.context, struct);
final int batchSize = this.context.options().batchSize;
List<Line> lines = new ArrayList<>(batchSize);
for (boolean finished = false; !finished; ) {
if (this.context.stopped()) {
break;
}
try {
// Read next line from data source
if (reader.hasNext()) {
lines.add(reader.next());
metrics.increaseReadSuccess();
} else {
finished = true;
}
} catch (ReadException e) {
metrics.increaseReadFailure();
this.handleReadFailure(struct, e);
}
// If readed max allowed lines, stop loading
boolean reachedMaxReadLines = this.reachedMaxReadLines();
if (reachedMaxReadLines) {
finished = true;
}
if (lines.size() >= batchSize || finished) {
List<ParseTask> tasks = taskBuilder.build(lines);
for (ParseTask task : tasks) {
this.executeParseTask(struct, task.mapping(), task);
}
// Confirm offset to avoid lost records
reader.confirmOffset();
this.context.newProgress().markLoaded(struct, finished);
this.handleParseFailure();
if (reachedMaxReadLines) {
LOG.warn("Read lines exceed limit, stopped loading tasks");
this.context.stopLoading();
}
lines = new ArrayList<>(batchSize);
}
}
metrics.stopInFlight();
LOG.info("Finish loading '{}'", struct);
}
use of com.baidu.hugegraph.loader.exception.ReadException in project incubator-hugegraph-toolchain by apache.
the class JsonLineParser method parse.
@Override
public Line parse(String[] header, String rawLine) {
Map<String, Object> keyValues;
try {
keyValues = JsonUtil.convertMap(rawLine, String.class, Object.class);
String[] names = names(keyValues);
Object[] values = values(keyValues, names);
return new Line(rawLine, names, values);
} catch (SerializeException e) {
throw new ReadException(rawLine, "Deserialize line '%s' error", e, rawLine);
}
}
use of com.baidu.hugegraph.loader.exception.ReadException in project incubator-hugegraph-toolchain by apache.
the class TextLineParser method parse.
@Override
public Line parse(String[] header, String rawLine) throws ReadException {
String[] columns = this.split(rawLine);
if (columns.length > header.length) {
// Ignore extra empty string at the tail of line
int extra = columns.length - header.length;
if (!this.tailColumnEmpty(columns, extra)) {
throw new ReadException(rawLine, "The column length '%s' doesn't match with " + "header length '%s' on: %s", columns.length, header.length, rawLine);
}
String[] subColumns = new String[header.length];
System.arraycopy(columns, 0, subColumns, 0, header.length);
return new Line(rawLine, header, subColumns);
} else if (columns.length < header.length) {
// Fill with an empty string
String[] supColumns = new String[header.length];
System.arraycopy(columns, 0, supColumns, 0, columns.length);
Arrays.fill(supColumns, columns.length, supColumns.length, Constants.EMPTY_STR);
return new Line(rawLine, header, supColumns);
}
return new Line(rawLine, header, columns);
}
Aggregations