Search in sources :

Example 1 with Line

use of com.baidu.hugegraph.loader.reader.line.Line in project incubator-hugegraph-toolchain by apache.

the class HugeGraphLoader method loadStruct.

/**
 * TODO: Seperate classes: ReadHandler -> ParseHandler -> InsertHandler
 * Let load task worked in pipeline mode
 */
private void loadStruct(InputStruct struct, InputReader reader) {
    LOG.info("Start loading '{}'", struct);
    LoadMetrics metrics = this.context.summary().metrics(struct);
    metrics.startInFlight();
    ParseTaskBuilder taskBuilder = new ParseTaskBuilder(this.context, struct);
    final int batchSize = this.context.options().batchSize;
    List<Line> lines = new ArrayList<>(batchSize);
    for (boolean finished = false; !finished; ) {
        if (this.context.stopped()) {
            break;
        }
        try {
            // Read next line from data source
            if (reader.hasNext()) {
                lines.add(reader.next());
                metrics.increaseReadSuccess();
            } else {
                finished = true;
            }
        } catch (ReadException e) {
            metrics.increaseReadFailure();
            this.handleReadFailure(struct, e);
        }
        // If readed max allowed lines, stop loading
        boolean reachedMaxReadLines = this.reachedMaxReadLines();
        if (reachedMaxReadLines) {
            finished = true;
        }
        if (lines.size() >= batchSize || finished) {
            List<ParseTask> tasks = taskBuilder.build(lines);
            for (ParseTask task : tasks) {
                this.executeParseTask(struct, task.mapping(), task);
            }
            // Confirm offset to avoid lost records
            reader.confirmOffset();
            this.context.newProgress().markLoaded(struct, finished);
            this.handleParseFailure();
            if (reachedMaxReadLines) {
                LOG.warn("Read lines exceed limit, stopped loading tasks");
                this.context.stopLoading();
            }
            lines = new ArrayList<>(batchSize);
        }
    }
    metrics.stopInFlight();
    LOG.info("Finish loading '{}'", struct);
}
Also used : Line(com.baidu.hugegraph.loader.reader.line.Line) ReadException(com.baidu.hugegraph.loader.exception.ReadException) ParseTaskBuilder(com.baidu.hugegraph.loader.task.ParseTaskBuilder) ParseTask(com.baidu.hugegraph.loader.task.ParseTaskBuilder.ParseTask) ArrayList(java.util.ArrayList) LoadMetrics(com.baidu.hugegraph.loader.metrics.LoadMetrics)

Example 2 with Line

use of com.baidu.hugegraph.loader.reader.line.Line in project incubator-hugegraph-toolchain by apache.

the class ParseTaskBuilder method buildTask.

private ParseTask buildTask(ElementBuilder builder, List<Line> lines) {
    final LoadMetrics metrics = this.context.summary().metrics(this.struct);
    final int batchSize = this.context.options().batchSize;
    final ElementMapping mapping = builder.mapping();
    final boolean needRemoveId = builder instanceof VertexBuilder && ((VertexLabel) builder.schemaLabel()).idStrategy().isPrimaryKey();
    return new ParseTask(mapping, () -> {
        List<List<Record>> batches = new ArrayList<>();
        // One batch record
        List<Record> records = new ArrayList<>(batchSize);
        int count = 0;
        for (Line line : lines) {
            try {
                // NOTE: don't remove entry in keyValues
                @SuppressWarnings("unchecked") List<GraphElement> elements = builder.build(line.names(), line.values());
                E.checkState(elements.size() <= batchSize, "The number of columns in a line cannot " + "exceed the size of a batch, but got %s > %s", elements.size(), batchSize);
                // Prevent batch size from exceeding limit
                if (records.size() + elements.size() > batchSize) {
                    LOG.debug("Create a new batch for {}", mapping);
                    // Add current batch and create a new batch
                    batches.add(records);
                    records = new ArrayList<>(batchSize);
                }
                for (GraphElement element : elements) {
                    if (needRemoveId) {
                        ((Vertex) element).id(null);
                    }
                    records.add(new Record(line.rawLine(), element));
                    count++;
                }
            } catch (IllegalArgumentException e) {
                metrics.increaseParseFailure(mapping);
                ParseException pe = new ParseException(line.rawLine(), e);
                this.handleParseFailure(mapping, pe);
            }
        }
        if (!records.isEmpty()) {
            batches.add(records);
        }
        metrics.plusParseSuccess(mapping, count);
        return batches;
    });
}
Also used : ElementMapping(com.baidu.hugegraph.loader.mapping.ElementMapping) Vertex(com.baidu.hugegraph.structure.graph.Vertex) ArrayList(java.util.ArrayList) LoadMetrics(com.baidu.hugegraph.loader.metrics.LoadMetrics) Line(com.baidu.hugegraph.loader.reader.line.Line) VertexBuilder(com.baidu.hugegraph.loader.builder.VertexBuilder) VertexLabel(com.baidu.hugegraph.structure.schema.VertexLabel) GraphElement(com.baidu.hugegraph.structure.GraphElement) ArrayList(java.util.ArrayList) List(java.util.List) Record(com.baidu.hugegraph.loader.builder.Record) ParseException(com.baidu.hugegraph.loader.exception.ParseException)

Example 3 with Line

use of com.baidu.hugegraph.loader.reader.line.Line in project incubator-hugegraph-toolchain by apache.

the class LineTest method testNameValues.

@Test
public void testNameValues() {
    Line line = new Line("1,marko,27", new String[] { "id", "name", "age" }, new Object[] { 1, "marko", 27 });
    Assert.assertArrayEquals(new String[] { "id", "name", "age" }, line.names());
    Assert.assertArrayEquals(new Object[] { 1, "marko", 27 }, line.values());
}
Also used : Line(com.baidu.hugegraph.loader.reader.line.Line) Test(org.junit.Test)

Example 4 with Line

use of com.baidu.hugegraph.loader.reader.line.Line in project incubator-hugegraph-toolchain by apache.

the class LineTest method testRetainAll.

@Test
public void testRetainAll() {
    Line line = new Line("1,marko,27", new String[] { "id", "name", "age" }, new Object[] { 1, "marko", 27 });
    line.retainAll(new String[] { "id" });
    Assert.assertArrayEquals(new String[] { "id" }, line.names());
    Assert.assertArrayEquals(new Object[] { 1 }, line.values());
}
Also used : Line(com.baidu.hugegraph.loader.reader.line.Line) Test(org.junit.Test)

Example 5 with Line

use of com.baidu.hugegraph.loader.reader.line.Line in project incubator-hugegraph-toolchain by apache.

the class RowFetcher method nextBatch.

public List<Line> nextBatch() throws SQLException {
    if (this.fullyFetched) {
        return null;
    }
    String select = this.source.existsCustomSQL() ? this.source.customSQL() : this.source.vendor().buildSelectSql(this.source, this.nextStartRow);
    LOG.debug("The sql for select is: {}", select);
    List<Line> batch = new ArrayList<>(this.source.batchSize() + 1);
    try (Statement stmt = this.conn.createStatement();
        ResultSet result = stmt.executeQuery(select)) {
        if (this.source.existsCustomSQL()) {
            this.readHeader(result);
        }
        while (result.next()) {
            Object[] values = new Object[this.columns.length];
            for (int i = 1, n = this.columns.length; i <= n; i++) {
                Object value = result.getObject(i);
                if (value == null) {
                    value = Constants.NULL_STR;
                }
                values[i - 1] = value;
            }
            String rawLine = StringUtils.join(values, Constants.COMMA_STR);
            Line line = new Line(rawLine, this.columns, values);
            batch.add(line);
        }
    } catch (SQLException e) {
        this.close();
        throw e;
    }
    if (this.source.existsCustomSQL() || batch.size() != this.source.batchSize() + 1) {
        this.fullyFetched = true;
    } else {
        // Remove the last one
        Line lastLine = batch.remove(batch.size() - 1);
        lastLine.retainAll(this.primaryKeys);
        this.nextStartRow = lastLine;
    }
    return batch;
}
Also used : Line(com.baidu.hugegraph.loader.reader.line.Line) SQLException(java.sql.SQLException) Statement(java.sql.Statement) ArrayList(java.util.ArrayList) ResultSet(java.sql.ResultSet)

Aggregations

Line (com.baidu.hugegraph.loader.reader.line.Line)9 ReadException (com.baidu.hugegraph.loader.exception.ReadException)3 ArrayList (java.util.ArrayList)3 LoadMetrics (com.baidu.hugegraph.loader.metrics.LoadMetrics)2 Test (org.junit.Test)2 Record (com.baidu.hugegraph.loader.builder.Record)1 VertexBuilder (com.baidu.hugegraph.loader.builder.VertexBuilder)1 ParseException (com.baidu.hugegraph.loader.exception.ParseException)1 ElementMapping (com.baidu.hugegraph.loader.mapping.ElementMapping)1 ParseTaskBuilder (com.baidu.hugegraph.loader.task.ParseTaskBuilder)1 ParseTask (com.baidu.hugegraph.loader.task.ParseTaskBuilder.ParseTask)1 SerializeException (com.baidu.hugegraph.rest.SerializeException)1 GraphElement (com.baidu.hugegraph.structure.GraphElement)1 Vertex (com.baidu.hugegraph.structure.graph.Vertex)1 VertexLabel (com.baidu.hugegraph.structure.schema.VertexLabel)1 ResultSet (java.sql.ResultSet)1 SQLException (java.sql.SQLException)1 Statement (java.sql.Statement)1 List (java.util.List)1 NoSuchElementException (java.util.NoSuchElementException)1