use of org.embulk.spi.time.TimestampParser in project embulk by embulk.
the class ConfigInputPlugin method run.
@Override
public TaskReport run(TaskSource taskSource, Schema schema, int taskIndex, PageOutput output) {
final PluginTask task = taskSource.loadTask(PluginTask.class);
final List<List<JsonNode>> taskValues = task.getValues().get(taskIndex);
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
final JsonParser jsonParser = new JsonParser();
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
for (final List<JsonNode> rowValues : taskValues) {
schema.visitColumns(new ColumnVisitor() {
public void booleanColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
pageBuilder.setBoolean(column, value.asBoolean());
}
}
public void longColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
pageBuilder.setLong(column, value.asLong());
}
}
public void doubleColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
pageBuilder.setDouble(column, value.asDouble());
}
}
public void stringColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
pageBuilder.setString(column, value.asText());
}
}
public void timestampColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(value.asText()));
} catch (TimestampParseException ex) {
throw new DataException(ex);
}
}
}
public void jsonColumn(Column column) {
final JsonNode value = rowValues.get(column.getIndex());
if (value == null || value.isNull()) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setJson(column, jsonParser.parse(value.toString()));
} catch (JsonParseException ex) {
throw new DataException(ex);
}
}
}
});
pageBuilder.addRecord();
}
pageBuilder.finish();
}
return Exec.newTaskReport();
}
use of org.embulk.spi.time.TimestampParser in project embulk by embulk.
the class CsvParserPlugin method run.
@Override
public void run(TaskSource taskSource, final Schema schema, FileInput input, PageOutput output) {
PluginTask task = taskSource.loadTask(PluginTask.class);
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
final JsonParser jsonParser = new JsonParser();
final CsvTokenizer tokenizer = new CsvTokenizer(new LineDecoder(input, task), task);
final boolean allowOptionalColumns = task.getAllowOptionalColumns();
final boolean allowExtraColumns = task.getAllowExtraColumns();
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
final int skipHeaderLines = task.getSkipHeaderLines();
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
while (tokenizer.nextFile()) {
// skip the header lines for each file
for (int skipHeaderLineNumber = skipHeaderLines; skipHeaderLineNumber > 0; skipHeaderLineNumber--) {
if (!tokenizer.skipHeaderLine()) {
break;
}
}
if (!tokenizer.nextRecord()) {
// empty file
continue;
}
while (true) {
boolean hasNextRecord;
try {
schema.visitColumns(new ColumnVisitor() {
public void booleanColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
pageBuilder.setBoolean(column, TRUE_STRINGS.contains(v));
}
}
public void longColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setLong(column, Long.parseLong(v));
} catch (NumberFormatException e) {
// TODO support default value
throw new CsvRecordValidateException(e);
}
}
}
public void doubleColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setDouble(column, Double.parseDouble(v));
} catch (NumberFormatException e) {
// TODO support default value
throw new CsvRecordValidateException(e);
}
}
}
public void stringColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
pageBuilder.setString(column, v);
}
}
public void timestampColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(v));
} catch (TimestampParseException e) {
// TODO support default value
throw new CsvRecordValidateException(e);
}
}
}
public void jsonColumn(Column column) {
String v = nextColumn();
if (v == null) {
pageBuilder.setNull(column);
} else {
try {
pageBuilder.setJson(column, jsonParser.parse(v));
} catch (JsonParseException e) {
// TODO support default value
throw new CsvRecordValidateException(e);
}
}
}
private String nextColumn() {
if (allowOptionalColumns && !tokenizer.hasNextColumn()) {
// TODO warning
return null;
}
return tokenizer.nextColumnOrNull();
}
});
try {
hasNextRecord = tokenizer.nextRecord();
} catch (CsvTokenizer.TooManyColumnsException ex) {
if (allowExtraColumns) {
String tooManyColumnsLine = tokenizer.skipCurrentLine();
// TODO warning
hasNextRecord = tokenizer.nextRecord();
} else {
// this line will be skipped at the following catch section
throw ex;
}
}
pageBuilder.addRecord();
} catch (CsvTokenizer.InvalidFormatException | CsvTokenizer.InvalidValueException | CsvRecordValidateException e) {
String skippedLine = tokenizer.skipCurrentLine();
long lineNumber = tokenizer.getCurrentLineNumber();
if (stopOnInvalidRecord) {
throw new DataException(String.format("Invalid record at line %d: %s", lineNumber, skippedLine), e);
}
log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), skippedLine));
// exec.notice().skippedLine(skippedLine);
hasNextRecord = tokenizer.nextRecord();
}
if (!hasNextRecord) {
break;
}
}
}
pageBuilder.finish();
}
}
use of org.embulk.spi.time.TimestampParser in project embulk by embulk.
the class DynamicColumnSetterFactory method newColumnSetter.
public DynamicColumnSetter newColumnSetter(PageBuilder pageBuilder, Column column) {
Type type = column.getType();
if (type instanceof BooleanType) {
return new BooleanColumnSetter(pageBuilder, column, defaultValue);
} else if (type instanceof LongType) {
return new LongColumnSetter(pageBuilder, column, defaultValue);
} else if (type instanceof DoubleType) {
return new DoubleColumnSetter(pageBuilder, column, defaultValue);
} else if (type instanceof StringType) {
TimestampFormatter formatter = TimestampFormatter.of(getTimestampFormatForFormatter(column), getTimeZoneId(column));
return new StringColumnSetter(pageBuilder, column, defaultValue, formatter);
} else if (type instanceof TimestampType) {
// TODO use flexible time format like Ruby's Time.parse
final TimestampParser parser;
if (this.useColumnForTimestampMetadata) {
final TimestampType timestampType = (TimestampType) type;
// https://github.com/embulk/embulk/issues/935
parser = TimestampParser.of(getFormatFromTimestampTypeWithDepracationSuppressed(timestampType), getTimeZoneId(column));
} else {
parser = TimestampParser.of(getTimestampFormatForParser(column), getTimeZoneId(column));
}
return new TimestampColumnSetter(pageBuilder, column, defaultValue, parser);
} else if (type instanceof JsonType) {
TimestampFormatter formatter = TimestampFormatter.of(getTimestampFormatForFormatter(column), getTimeZoneId(column));
return new JsonColumnSetter(pageBuilder, column, defaultValue, formatter);
}
throw new ConfigException("Unknown column type: " + type);
}
use of org.embulk.spi.time.TimestampParser in project embulk by embulk.
the class Timestamps method newTimestampColumnParsers.
public static TimestampParser[] newTimestampColumnParsers(TimestampParser.Task parserTask, SchemaConfig schema) {
TimestampParser[] parsers = new TimestampParser[schema.getColumnCount()];
int i = 0;
for (ColumnConfig column : schema.getColumns()) {
if (column.getType() instanceof TimestampType) {
TimestampColumnOption option = column.getOption().loadConfig(TimestampColumnOption.class);
parsers[i] = TimestampParser.of(parserTask, option);
}
i++;
}
return parsers;
}
Aggregations