use of com.baidu.hugegraph.loader.reader.line.Line in project incubator-hugegraph-toolchain by apache.
the class HugeGraphLoader method loadStruct.
/**
* TODO: Seperate classes: ReadHandler -> ParseHandler -> InsertHandler
* Let load task worked in pipeline mode
*/
private void loadStruct(InputStruct struct, InputReader reader) {
LOG.info("Start loading '{}'", struct);
LoadMetrics metrics = this.context.summary().metrics(struct);
metrics.startInFlight();
ParseTaskBuilder taskBuilder = new ParseTaskBuilder(this.context, struct);
final int batchSize = this.context.options().batchSize;
List<Line> lines = new ArrayList<>(batchSize);
for (boolean finished = false; !finished; ) {
if (this.context.stopped()) {
break;
}
try {
// Read next line from data source
if (reader.hasNext()) {
lines.add(reader.next());
metrics.increaseReadSuccess();
} else {
finished = true;
}
} catch (ReadException e) {
metrics.increaseReadFailure();
this.handleReadFailure(struct, e);
}
// If readed max allowed lines, stop loading
boolean reachedMaxReadLines = this.reachedMaxReadLines();
if (reachedMaxReadLines) {
finished = true;
}
if (lines.size() >= batchSize || finished) {
List<ParseTask> tasks = taskBuilder.build(lines);
for (ParseTask task : tasks) {
this.executeParseTask(struct, task.mapping(), task);
}
// Confirm offset to avoid lost records
reader.confirmOffset();
this.context.newProgress().markLoaded(struct, finished);
this.handleParseFailure();
if (reachedMaxReadLines) {
LOG.warn("Read lines exceed limit, stopped loading tasks");
this.context.stopLoading();
}
lines = new ArrayList<>(batchSize);
}
}
metrics.stopInFlight();
LOG.info("Finish loading '{}'", struct);
}
use of com.baidu.hugegraph.loader.reader.line.Line in project incubator-hugegraph-toolchain by apache.
the class ParseTaskBuilder method buildTask.
private ParseTask buildTask(ElementBuilder builder, List<Line> lines) {
final LoadMetrics metrics = this.context.summary().metrics(this.struct);
final int batchSize = this.context.options().batchSize;
final ElementMapping mapping = builder.mapping();
final boolean needRemoveId = builder instanceof VertexBuilder && ((VertexLabel) builder.schemaLabel()).idStrategy().isPrimaryKey();
return new ParseTask(mapping, () -> {
List<List<Record>> batches = new ArrayList<>();
// One batch record
List<Record> records = new ArrayList<>(batchSize);
int count = 0;
for (Line line : lines) {
try {
// NOTE: don't remove entry in keyValues
@SuppressWarnings("unchecked") List<GraphElement> elements = builder.build(line.names(), line.values());
E.checkState(elements.size() <= batchSize, "The number of columns in a line cannot " + "exceed the size of a batch, but got %s > %s", elements.size(), batchSize);
// Prevent batch size from exceeding limit
if (records.size() + elements.size() > batchSize) {
LOG.debug("Create a new batch for {}", mapping);
// Add current batch and create a new batch
batches.add(records);
records = new ArrayList<>(batchSize);
}
for (GraphElement element : elements) {
if (needRemoveId) {
((Vertex) element).id(null);
}
records.add(new Record(line.rawLine(), element));
count++;
}
} catch (IllegalArgumentException e) {
metrics.increaseParseFailure(mapping);
ParseException pe = new ParseException(line.rawLine(), e);
this.handleParseFailure(mapping, pe);
}
}
if (!records.isEmpty()) {
batches.add(records);
}
metrics.plusParseSuccess(mapping, count);
return batches;
});
}
use of com.baidu.hugegraph.loader.reader.line.Line in project incubator-hugegraph-toolchain by apache.
the class LineTest method testNameValues.
@Test
public void testNameValues() {
Line line = new Line("1,marko,27", new String[] { "id", "name", "age" }, new Object[] { 1, "marko", 27 });
Assert.assertArrayEquals(new String[] { "id", "name", "age" }, line.names());
Assert.assertArrayEquals(new Object[] { 1, "marko", 27 }, line.values());
}
use of com.baidu.hugegraph.loader.reader.line.Line in project incubator-hugegraph-toolchain by apache.
the class LineTest method testRetainAll.
@Test
public void testRetainAll() {
Line line = new Line("1,marko,27", new String[] { "id", "name", "age" }, new Object[] { 1, "marko", 27 });
line.retainAll(new String[] { "id" });
Assert.assertArrayEquals(new String[] { "id" }, line.names());
Assert.assertArrayEquals(new Object[] { 1 }, line.values());
}
use of com.baidu.hugegraph.loader.reader.line.Line in project incubator-hugegraph-toolchain by apache.
the class RowFetcher method nextBatch.
public List<Line> nextBatch() throws SQLException {
if (this.fullyFetched) {
return null;
}
String select = this.source.existsCustomSQL() ? this.source.customSQL() : this.source.vendor().buildSelectSql(this.source, this.nextStartRow);
LOG.debug("The sql for select is: {}", select);
List<Line> batch = new ArrayList<>(this.source.batchSize() + 1);
try (Statement stmt = this.conn.createStatement();
ResultSet result = stmt.executeQuery(select)) {
if (this.source.existsCustomSQL()) {
this.readHeader(result);
}
while (result.next()) {
Object[] values = new Object[this.columns.length];
for (int i = 1, n = this.columns.length; i <= n; i++) {
Object value = result.getObject(i);
if (value == null) {
value = Constants.NULL_STR;
}
values[i - 1] = value;
}
String rawLine = StringUtils.join(values, Constants.COMMA_STR);
Line line = new Line(rawLine, this.columns, values);
batch.add(line);
}
} catch (SQLException e) {
this.close();
throw e;
}
if (this.source.existsCustomSQL() || batch.size() != this.source.batchSize() + 1) {
this.fullyFetched = true;
} else {
// Remove the last one
Line lastLine = batch.remove(batch.size() - 1);
lastLine.retainAll(this.primaryKeys);
this.nextStartRow = lastLine;
}
return batch;
}
Aggregations