Search in sources :

Example 1 with TimestampParser

use of org.embulk.spi.time.TimestampParser in project embulk by embulk.

the class ConfigInputPlugin method run.

@Override
public TaskReport run(TaskSource taskSource, Schema schema, int taskIndex, PageOutput output) {
    final PluginTask task = taskSource.loadTask(PluginTask.class);
    final List<List<JsonNode>> taskValues = task.getValues().get(taskIndex);
    final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
    final JsonParser jsonParser = new JsonParser();
    try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
        for (final List<JsonNode> rowValues : taskValues) {
            schema.visitColumns(new ColumnVisitor() {

                public void booleanColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setBoolean(column, value.asBoolean());
                    }
                }

                public void longColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setLong(column, value.asLong());
                    }
                }

                public void doubleColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setDouble(column, value.asDouble());
                    }
                }

                public void stringColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setString(column, value.asText());
                    }
                }

                public void timestampColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        try {
                            pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(value.asText()));
                        } catch (TimestampParseException ex) {
                            throw new DataException(ex);
                        }
                    }
                }

                public void jsonColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        try {
                            pageBuilder.setJson(column, jsonParser.parse(value.toString()));
                        } catch (JsonParseException ex) {
                            throw new DataException(ex);
                        }
                    }
                }
            });
            pageBuilder.addRecord();
        }
        pageBuilder.finish();
    }
    return Exec.newTaskReport();
}
Also used : TimestampParser(org.embulk.spi.time.TimestampParser) JsonNode(com.fasterxml.jackson.databind.JsonNode) PageBuilder(org.embulk.spi.PageBuilder) JsonParseException(org.embulk.spi.json.JsonParseException) TimestampParseException(org.embulk.spi.time.TimestampParseException) DataException(org.embulk.spi.DataException) ColumnVisitor(org.embulk.spi.ColumnVisitor) Column(org.embulk.spi.Column) List(java.util.List) JsonParser(org.embulk.spi.json.JsonParser)

Example 2 with TimestampParser

use of org.embulk.spi.time.TimestampParser in project embulk by embulk.

the class CsvParserPlugin method run.

@Override
public void run(TaskSource taskSource, final Schema schema, FileInput input, PageOutput output) {
    PluginTask task = taskSource.loadTask(PluginTask.class);
    final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
    final JsonParser jsonParser = new JsonParser();
    final CsvTokenizer tokenizer = new CsvTokenizer(new LineDecoder(input, task), task);
    final boolean allowOptionalColumns = task.getAllowOptionalColumns();
    final boolean allowExtraColumns = task.getAllowExtraColumns();
    final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
    final int skipHeaderLines = task.getSkipHeaderLines();
    try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
        while (tokenizer.nextFile()) {
            // skip the header lines for each file
            for (int skipHeaderLineNumber = skipHeaderLines; skipHeaderLineNumber > 0; skipHeaderLineNumber--) {
                if (!tokenizer.skipHeaderLine()) {
                    break;
                }
            }
            if (!tokenizer.nextRecord()) {
                // empty file
                continue;
            }
            while (true) {
                boolean hasNextRecord;
                try {
                    schema.visitColumns(new ColumnVisitor() {

                        public void booleanColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                pageBuilder.setBoolean(column, TRUE_STRINGS.contains(v));
                            }
                        }

                        public void longColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setLong(column, Long.parseLong(v));
                                } catch (NumberFormatException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        public void doubleColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setDouble(column, Double.parseDouble(v));
                                } catch (NumberFormatException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        public void stringColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                pageBuilder.setString(column, v);
                            }
                        }

                        public void timestampColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(v));
                                } catch (TimestampParseException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        public void jsonColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setJson(column, jsonParser.parse(v));
                                } catch (JsonParseException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        private String nextColumn() {
                            if (allowOptionalColumns && !tokenizer.hasNextColumn()) {
                                // TODO warning
                                return null;
                            }
                            return tokenizer.nextColumnOrNull();
                        }
                    });
                    try {
                        hasNextRecord = tokenizer.nextRecord();
                    } catch (CsvTokenizer.TooManyColumnsException ex) {
                        if (allowExtraColumns) {
                            String tooManyColumnsLine = tokenizer.skipCurrentLine();
                            // TODO warning
                            hasNextRecord = tokenizer.nextRecord();
                        } else {
                            // this line will be skipped at the following catch section
                            throw ex;
                        }
                    }
                    pageBuilder.addRecord();
                } catch (CsvTokenizer.InvalidFormatException | CsvTokenizer.InvalidValueException | CsvRecordValidateException e) {
                    String skippedLine = tokenizer.skipCurrentLine();
                    long lineNumber = tokenizer.getCurrentLineNumber();
                    if (stopOnInvalidRecord) {
                        throw new DataException(String.format("Invalid record at line %d: %s", lineNumber, skippedLine), e);
                    }
                    log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), skippedLine));
                    // exec.notice().skippedLine(skippedLine);
                    hasNextRecord = tokenizer.nextRecord();
                }
                if (!hasNextRecord) {
                    break;
                }
            }
        }
        pageBuilder.finish();
    }
}
Also used : TimestampParser(org.embulk.spi.time.TimestampParser) PageBuilder(org.embulk.spi.PageBuilder) JsonParseException(org.embulk.spi.json.JsonParseException) TimestampParseException(org.embulk.spi.time.TimestampParseException) DataException(org.embulk.spi.DataException) ColumnVisitor(org.embulk.spi.ColumnVisitor) Column(org.embulk.spi.Column) LineDecoder(org.embulk.spi.util.LineDecoder) JsonParser(org.embulk.spi.json.JsonParser)

Example 3 with TimestampParser

use of org.embulk.spi.time.TimestampParser in project embulk by embulk.

the class DynamicColumnSetterFactory method newColumnSetter.

public DynamicColumnSetter newColumnSetter(PageBuilder pageBuilder, Column column) {
    Type type = column.getType();
    if (type instanceof BooleanType) {
        return new BooleanColumnSetter(pageBuilder, column, defaultValue);
    } else if (type instanceof LongType) {
        return new LongColumnSetter(pageBuilder, column, defaultValue);
    } else if (type instanceof DoubleType) {
        return new DoubleColumnSetter(pageBuilder, column, defaultValue);
    } else if (type instanceof StringType) {
        TimestampFormatter formatter = TimestampFormatter.of(getTimestampFormatForFormatter(column), getTimeZoneId(column));
        return new StringColumnSetter(pageBuilder, column, defaultValue, formatter);
    } else if (type instanceof TimestampType) {
        // TODO use flexible time format like Ruby's Time.parse
        final TimestampParser parser;
        if (this.useColumnForTimestampMetadata) {
            final TimestampType timestampType = (TimestampType) type;
            // https://github.com/embulk/embulk/issues/935
            parser = TimestampParser.of(getFormatFromTimestampTypeWithDepracationSuppressed(timestampType), getTimeZoneId(column));
        } else {
            parser = TimestampParser.of(getTimestampFormatForParser(column), getTimeZoneId(column));
        }
        return new TimestampColumnSetter(pageBuilder, column, defaultValue, parser);
    } else if (type instanceof JsonType) {
        TimestampFormatter formatter = TimestampFormatter.of(getTimestampFormatForFormatter(column), getTimeZoneId(column));
        return new JsonColumnSetter(pageBuilder, column, defaultValue, formatter);
    }
    throw new ConfigException("Unknown column type: " + type);
}
Also used : TimestampFormatter(org.embulk.spi.time.TimestampFormatter) JsonType(org.embulk.spi.type.JsonType) LongType(org.embulk.spi.type.LongType) LongColumnSetter(org.embulk.spi.util.dynamic.LongColumnSetter) StringType(org.embulk.spi.type.StringType) TimestampParser(org.embulk.spi.time.TimestampParser) BooleanType(org.embulk.spi.type.BooleanType) ConfigException(org.embulk.config.ConfigException) JsonColumnSetter(org.embulk.spi.util.dynamic.JsonColumnSetter) JsonType(org.embulk.spi.type.JsonType) LongType(org.embulk.spi.type.LongType) TimestampType(org.embulk.spi.type.TimestampType) DoubleType(org.embulk.spi.type.DoubleType) Type(org.embulk.spi.type.Type) BooleanType(org.embulk.spi.type.BooleanType) StringType(org.embulk.spi.type.StringType) DoubleColumnSetter(org.embulk.spi.util.dynamic.DoubleColumnSetter) DoubleType(org.embulk.spi.type.DoubleType) TimestampType(org.embulk.spi.type.TimestampType) StringColumnSetter(org.embulk.spi.util.dynamic.StringColumnSetter) BooleanColumnSetter(org.embulk.spi.util.dynamic.BooleanColumnSetter) TimestampColumnSetter(org.embulk.spi.util.dynamic.TimestampColumnSetter)

Example 4 with TimestampParser

use of org.embulk.spi.time.TimestampParser in project embulk by embulk.

the class Timestamps method newTimestampColumnParsers.

public static TimestampParser[] newTimestampColumnParsers(TimestampParser.Task parserTask, SchemaConfig schema) {
    TimestampParser[] parsers = new TimestampParser[schema.getColumnCount()];
    int i = 0;
    for (ColumnConfig column : schema.getColumns()) {
        if (column.getType() instanceof TimestampType) {
            TimestampColumnOption option = column.getOption().loadConfig(TimestampColumnOption.class);
            parsers[i] = TimestampParser.of(parserTask, option);
        }
        i++;
    }
    return parsers;
}
Also used : ColumnConfig(org.embulk.spi.ColumnConfig) TimestampParser(org.embulk.spi.time.TimestampParser) TimestampType(org.embulk.spi.type.TimestampType)

Aggregations

TimestampParser (org.embulk.spi.time.TimestampParser)4 Column (org.embulk.spi.Column)2 ColumnVisitor (org.embulk.spi.ColumnVisitor)2 DataException (org.embulk.spi.DataException)2 PageBuilder (org.embulk.spi.PageBuilder)2 JsonParseException (org.embulk.spi.json.JsonParseException)2 JsonParser (org.embulk.spi.json.JsonParser)2 TimestampParseException (org.embulk.spi.time.TimestampParseException)2 TimestampType (org.embulk.spi.type.TimestampType)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 List (java.util.List)1 ConfigException (org.embulk.config.ConfigException)1 ColumnConfig (org.embulk.spi.ColumnConfig)1 TimestampFormatter (org.embulk.spi.time.TimestampFormatter)1 BooleanType (org.embulk.spi.type.BooleanType)1 DoubleType (org.embulk.spi.type.DoubleType)1 JsonType (org.embulk.spi.type.JsonType)1 LongType (org.embulk.spi.type.LongType)1 StringType (org.embulk.spi.type.StringType)1 Type (org.embulk.spi.type.Type)1