Search in sources :

Example 1 with ParseTask

use of com.baidu.hugegraph.loader.task.ParseTaskBuilder.ParseTask in project incubator-hugegraph-toolchain by apache.

the class HugeGraphLoader method loadStruct.

/**
 * TODO: Seperate classes: ReadHandler -> ParseHandler -> InsertHandler
 * Let load task worked in pipeline mode
 */
private void loadStruct(InputStruct struct, InputReader reader) {
    LOG.info("Start loading '{}'", struct);
    LoadMetrics metrics = this.context.summary().metrics(struct);
    metrics.startInFlight();
    ParseTaskBuilder taskBuilder = new ParseTaskBuilder(this.context, struct);
    final int batchSize = this.context.options().batchSize;
    List<Line> lines = new ArrayList<>(batchSize);
    for (boolean finished = false; !finished; ) {
        if (this.context.stopped()) {
            break;
        }
        try {
            // Read next line from data source
            if (reader.hasNext()) {
                lines.add(reader.next());
                metrics.increaseReadSuccess();
            } else {
                finished = true;
            }
        } catch (ReadException e) {
            metrics.increaseReadFailure();
            this.handleReadFailure(struct, e);
        }
        // If readed max allowed lines, stop loading
        boolean reachedMaxReadLines = this.reachedMaxReadLines();
        if (reachedMaxReadLines) {
            finished = true;
        }
        if (lines.size() >= batchSize || finished) {
            List<ParseTask> tasks = taskBuilder.build(lines);
            for (ParseTask task : tasks) {
                this.executeParseTask(struct, task.mapping(), task);
            }
            // Confirm offset to avoid lost records
            reader.confirmOffset();
            this.context.newProgress().markLoaded(struct, finished);
            this.handleParseFailure();
            if (reachedMaxReadLines) {
                LOG.warn("Read lines exceed limit, stopped loading tasks");
                this.context.stopLoading();
            }
            lines = new ArrayList<>(batchSize);
        }
    }
    metrics.stopInFlight();
    LOG.info("Finish loading '{}'", struct);
}
Also used : Line(com.baidu.hugegraph.loader.reader.line.Line) ReadException(com.baidu.hugegraph.loader.exception.ReadException) ParseTaskBuilder(com.baidu.hugegraph.loader.task.ParseTaskBuilder) ParseTask(com.baidu.hugegraph.loader.task.ParseTaskBuilder.ParseTask) ArrayList(java.util.ArrayList) LoadMetrics(com.baidu.hugegraph.loader.metrics.LoadMetrics)

Aggregations

ReadException (com.baidu.hugegraph.loader.exception.ReadException)1 LoadMetrics (com.baidu.hugegraph.loader.metrics.LoadMetrics)1 Line (com.baidu.hugegraph.loader.reader.line.Line)1 ParseTaskBuilder (com.baidu.hugegraph.loader.task.ParseTaskBuilder)1 ParseTask (com.baidu.hugegraph.loader.task.ParseTaskBuilder.ParseTask)1 ArrayList (java.util.ArrayList)1