use of org.embulk.spi.PageBuilder in project embulk by embulk.
the class ConfigInputPlugin method run.
@Override
public TaskReport run(TaskSource taskSource, Schema schema, int taskIndex, PageOutput output) {
final PluginTask task = taskSource.loadTask(PluginTask.class);
final List<List<JsonNode>> taskValues = task.getValues().get(taskIndex);
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
final JsonParser jsonParser = new JsonParser();
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
for (final List<JsonNode> rowValues : taskValues) {
schema.visitColumns(new ColumnVisitor() {
public void booleanColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
pageBuilder.setBoolean(column, value.asBoolean());
}
}
public void longColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
pageBuilder.setLong(column, value.asLong());
}
}
public void doubleColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
pageBuilder.setDouble(column, value.asDouble());
}
}
public void stringColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
pageBuilder.setString(column, value.asText());
}
}
public void timestampColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(value.asText()));
} catch (TimestampParseException ex) {
throw new DataException(ex);
}
}
}
public void jsonColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setJson(column, jsonParser.parse(value.toString()));
} catch (JsonParseException ex) {
throw new DataException(ex);
}
}
}
});
pageBuilder.addRecord();
}
pageBuilder.finish();
}
return Exec.newTaskReport();
}
use of org.embulk.spi.PageBuilder in project embulk by embulk.
the class CsvParserPlugin method run.
@Override
public void run(TaskSource taskSource, final Schema schema, FileInput input, PageOutput output) {
PluginTask task = taskSource.loadTask(PluginTask.class);
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
final JsonParser jsonParser = new JsonParser();
final CsvTokenizer tokenizer = new CsvTokenizer(new LineDecoder(input, task), task);
final boolean allowOptionalColumns = task.getAllowOptionalColumns();
final boolean allowExtraColumns = task.getAllowExtraColumns();
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
final int skipHeaderLines = task.getSkipHeaderLines();
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
while (tokenizer.nextFile()) {
// skip the header lines for each file
for (int skipHeaderLineNumber = skipHeaderLines; skipHeaderLineNumber > 0; skipHeaderLineNumber--) {
if (!tokenizer.skipHeaderLine()) {
break;
}
}
if (!tokenizer.nextRecord()) {
// empty file
continue;
}
while (true) {
boolean hasNextRecord;
try {
schema.visitColumns(new ColumnVisitor() {
public void booleanColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
pageBuilder.setBoolean(column, TRUE_STRINGS.contains(v));
}
}
public void longColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setLong(column, Long.parseLong(v));
} catch (NumberFormatException e) {
// TODO support default value
throw new CsvRecordValidateException(e);
}
}
}
public void doubleColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setDouble(column, Double.parseDouble(v));
} catch (NumberFormatException e) {
// TODO support default value
throw new CsvRecordValidateException(e);
}
}
}
public void stringColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
pageBuilder.setString(column, v);
}
}
public void timestampColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(v));
} catch (TimestampParseException e) {
// TODO support default value
throw new CsvRecordValidateException(e);
}
}
}
public void jsonColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setJson(column, jsonParser.parse(v));
} catch (JsonParseException e) {
// TODO support default value
throw new CsvRecordValidateException(e);
}
}
}
private String nextColumn() {
if (allowOptionalColumns && !tokenizer.hasNextColumn()) {
// TODO warning
return null;
}
return tokenizer.nextColumnOrNull();
}
});
try {
hasNextRecord = tokenizer.nextRecord();
} catch (CsvTokenizer.TooManyColumnsException ex) {
if (allowExtraColumns) {
String tooManyColumnsLine = tokenizer.skipCurrentLine();
// TODO warning
hasNextRecord = tokenizer.nextRecord();
} else {
// this line will be skipped at the following catch section
throw ex;
}
}
pageBuilder.addRecord();
} catch (CsvTokenizer.InvalidFormatException | CsvTokenizer.InvalidValueException | CsvRecordValidateException e) {
String skippedLine = tokenizer.skipCurrentLine();
long lineNumber = tokenizer.getCurrentLineNumber();
if (stopOnInvalidRecord) {
throw new DataException(String.format("Invalid record at line %d: %s", lineNumber, skippedLine), e);
}
log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), skippedLine));
// exec.notice().skippedLine(skippedLine);
hasNextRecord = tokenizer.nextRecord();
}
if (!hasNextRecord) {
break;
}
}
}
pageBuilder.finish();
}
}
use of org.embulk.spi.PageBuilder in project embulk by embulk.
the class JsonParserPlugin method run.
@Override
public void run(TaskSource taskSource, Schema schema, FileInput input, PageOutput output) {
PluginTask task = taskSource.loadTask(PluginTask.class);
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
// record column
final Column column = schema.getColumn(0);
try (PageBuilder pageBuilder = newPageBuilder(schema, output);
FileInputInputStream in = new FileInputInputStream(input)) {
while (in.nextFile()) {
boolean evenOneJsonParsed = false;
try (JsonParser.Stream stream = newJsonStream(in, task)) {
Value value;
while ((value = stream.next()) != null) {
try {
if (!value.isMapValue()) {
throw new JsonRecordValidateException(String.format("A Json record must not represent map value but it's %s", value.getValueType().name()));
}
pageBuilder.setJson(column, value);
pageBuilder.addRecord();
evenOneJsonParsed = true;
} catch (JsonRecordValidateException e) {
if (stopOnInvalidRecord) {
throw new DataException(String.format("Invalid record: %s", value.toJson()), e);
}
log.warn(String.format("Skipped record (%s): %s", e.getMessage(), value.toJson()));
}
}
} catch (IOException | JsonParseException e) {
if (Exec.isPreview() && evenOneJsonParsed) {
// ignore in preview if at least one JSON is already parsed.
break;
}
throw new DataException(e);
}
}
pageBuilder.finish();
}
}
use of org.embulk.spi.PageBuilder in project embulk by embulk.
the class RemoveColumnsFilterPlugin method open.
@Override
public PageOutput open(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output) {
PluginTask task = taskSource.loadTask(PluginTask.class);
PageReader pageReader = new PageReader(inputSchema);
PageBuilder pageBuilder = new PageBuilder(getBufferAllocator(), outputSchema, output);
return new PageConverter(pageReader, pageBuilder, task.getIndexMapping());
}
Aggregations