Search in sources :

Example 1 with ReadException

use of com.baidu.hugegraph.loader.exception.ReadException in project incubator-hugegraph-toolchain by apache.

the class HugeGraphLoader method loadStruct.

/**
 * TODO: Seperate classes: ReadHandler -> ParseHandler -> InsertHandler
 * Let load task worked in pipeline mode
 */
private void loadStruct(InputStruct struct, InputReader reader) {
    LOG.info("Start loading '{}'", struct);
    LoadMetrics metrics = this.context.summary().metrics(struct);
    metrics.startInFlight();
    ParseTaskBuilder taskBuilder = new ParseTaskBuilder(this.context, struct);
    final int batchSize = this.context.options().batchSize;
    List<Line> lines = new ArrayList<>(batchSize);
    for (boolean finished = false; !finished; ) {
        if (this.context.stopped()) {
            break;
        }
        try {
            // Read next line from data source
            if (reader.hasNext()) {
                lines.add(reader.next());
                metrics.increaseReadSuccess();
            } else {
                finished = true;
            }
        } catch (ReadException e) {
            metrics.increaseReadFailure();
            this.handleReadFailure(struct, e);
        }
        // If readed max allowed lines, stop loading
        boolean reachedMaxReadLines = this.reachedMaxReadLines();
        if (reachedMaxReadLines) {
            finished = true;
        }
        if (lines.size() >= batchSize || finished) {
            List<ParseTask> tasks = taskBuilder.build(lines);
            for (ParseTask task : tasks) {
                this.executeParseTask(struct, task.mapping(), task);
            }
            // Confirm offset to avoid lost records
            reader.confirmOffset();
            this.context.newProgress().markLoaded(struct, finished);
            this.handleParseFailure();
            if (reachedMaxReadLines) {
                LOG.warn("Read lines exceed limit, stopped loading tasks");
                this.context.stopLoading();
            }
            lines = new ArrayList<>(batchSize);
        }
    }
    metrics.stopInFlight();
    LOG.info("Finish loading '{}'", struct);
}
Also used : Line(com.baidu.hugegraph.loader.reader.line.Line) ReadException(com.baidu.hugegraph.loader.exception.ReadException) ParseTaskBuilder(com.baidu.hugegraph.loader.task.ParseTaskBuilder) ParseTask(com.baidu.hugegraph.loader.task.ParseTaskBuilder.ParseTask) ArrayList(java.util.ArrayList) LoadMetrics(com.baidu.hugegraph.loader.metrics.LoadMetrics)

Example 2 with ReadException

use of com.baidu.hugegraph.loader.exception.ReadException in project incubator-hugegraph-toolchain by apache.

the class JsonLineParser method parse.

@Override
public Line parse(String[] header, String rawLine) {
    Map<String, Object> keyValues;
    try {
        keyValues = JsonUtil.convertMap(rawLine, String.class, Object.class);
        String[] names = names(keyValues);
        Object[] values = values(keyValues, names);
        return new Line(rawLine, names, values);
    } catch (SerializeException e) {
        throw new ReadException(rawLine, "Deserialize line '%s' error", e, rawLine);
    }
}
Also used : Line(com.baidu.hugegraph.loader.reader.line.Line) ReadException(com.baidu.hugegraph.loader.exception.ReadException) SerializeException(com.baidu.hugegraph.rest.SerializeException)

Example 3 with ReadException

use of com.baidu.hugegraph.loader.exception.ReadException in project incubator-hugegraph-toolchain by apache.

the class TextLineParser method parse.

@Override
public Line parse(String[] header, String rawLine) throws ReadException {
    String[] columns = this.split(rawLine);
    if (columns.length > header.length) {
        // Ignore extra empty string at the tail of line
        int extra = columns.length - header.length;
        if (!this.tailColumnEmpty(columns, extra)) {
            throw new ReadException(rawLine, "The column length '%s' doesn't match with " + "header length '%s' on: %s", columns.length, header.length, rawLine);
        }
        String[] subColumns = new String[header.length];
        System.arraycopy(columns, 0, subColumns, 0, header.length);
        return new Line(rawLine, header, subColumns);
    } else if (columns.length < header.length) {
        // Fill with an empty string
        String[] supColumns = new String[header.length];
        System.arraycopy(columns, 0, supColumns, 0, columns.length);
        Arrays.fill(supColumns, columns.length, supColumns.length, Constants.EMPTY_STR);
        return new Line(rawLine, header, supColumns);
    }
    return new Line(rawLine, header, columns);
}
Also used : Line(com.baidu.hugegraph.loader.reader.line.Line) ReadException(com.baidu.hugegraph.loader.exception.ReadException)

Aggregations

ReadException (com.baidu.hugegraph.loader.exception.ReadException)3 Line (com.baidu.hugegraph.loader.reader.line.Line)3 LoadMetrics (com.baidu.hugegraph.loader.metrics.LoadMetrics)1 ParseTaskBuilder (com.baidu.hugegraph.loader.task.ParseTaskBuilder)1 ParseTask (com.baidu.hugegraph.loader.task.ParseTaskBuilder.ParseTask)1 SerializeException (com.baidu.hugegraph.rest.SerializeException)1 ArrayList (java.util.ArrayList)1