Search in sources :

Example 1 with JsonParser

use of org.embulk.spi.json.JsonParser in project embulk by embulk.

the class ConfigInputPlugin method run.

@Override
public TaskReport run(TaskSource taskSource, Schema schema, int taskIndex, PageOutput output) {
    final PluginTask task = taskSource.loadTask(PluginTask.class);
    final List<List<JsonNode>> taskValues = task.getValues().get(taskIndex);
    final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
    final JsonParser jsonParser = new JsonParser();
    try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
        for (final List<JsonNode> rowValues : taskValues) {
            schema.visitColumns(new ColumnVisitor() {

                public void booleanColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setBoolean(column, value.asBoolean());
                    }
                }

                public void longColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setLong(column, value.asLong());
                    }
                }

                public void doubleColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setDouble(column, value.asDouble());
                    }
                }

                public void stringColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setString(column, value.asText());
                    }
                }

                public void timestampColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        try {
                            pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(value.asText()));
                        } catch (TimestampParseException ex) {
                            throw new DataException(ex);
                        }
                    }
                }

                public void jsonColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        try {
                            pageBuilder.setJson(column, jsonParser.parse(value.toString()));
                        } catch (JsonParseException ex) {
                            throw new DataException(ex);
                        }
                    }
                }
            });
            pageBuilder.addRecord();
        }
        pageBuilder.finish();
    }
    return Exec.newTaskReport();
}
Also used : TimestampParser(org.embulk.spi.time.TimestampParser) JsonNode(com.fasterxml.jackson.databind.JsonNode) PageBuilder(org.embulk.spi.PageBuilder) JsonParseException(org.embulk.spi.json.JsonParseException) TimestampParseException(org.embulk.spi.time.TimestampParseException) DataException(org.embulk.spi.DataException) ColumnVisitor(org.embulk.spi.ColumnVisitor) Column(org.embulk.spi.Column) List(java.util.List) JsonParser(org.embulk.spi.json.JsonParser)

Example 2 with JsonParser

use of org.embulk.spi.json.JsonParser in project embulk by embulk.

the class CsvParserPlugin method run.

@Override
public void run(TaskSource taskSource, final Schema schema, FileInput input, PageOutput output) {
    PluginTask task = taskSource.loadTask(PluginTask.class);
    final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
    final JsonParser jsonParser = new JsonParser();
    final CsvTokenizer tokenizer = new CsvTokenizer(new LineDecoder(input, task), task);
    final boolean allowOptionalColumns = task.getAllowOptionalColumns();
    final boolean allowExtraColumns = task.getAllowExtraColumns();
    final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
    final int skipHeaderLines = task.getSkipHeaderLines();
    try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
        while (tokenizer.nextFile()) {
            // skip the header lines for each file
            for (int skipHeaderLineNumber = skipHeaderLines; skipHeaderLineNumber > 0; skipHeaderLineNumber--) {
                if (!tokenizer.skipHeaderLine()) {
                    break;
                }
            }
            if (!tokenizer.nextRecord()) {
                // empty file
                continue;
            }
            while (true) {
                boolean hasNextRecord;
                try {
                    schema.visitColumns(new ColumnVisitor() {

                        public void booleanColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                pageBuilder.setBoolean(column, TRUE_STRINGS.contains(v));
                            }
                        }

                        public void longColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setLong(column, Long.parseLong(v));
                                } catch (NumberFormatException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        public void doubleColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setDouble(column, Double.parseDouble(v));
                                } catch (NumberFormatException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        public void stringColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                pageBuilder.setString(column, v);
                            }
                        }

                        public void timestampColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(v));
                                } catch (TimestampParseException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        public void jsonColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setJson(column, jsonParser.parse(v));
                                } catch (JsonParseException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        private String nextColumn() {
                            if (allowOptionalColumns && !tokenizer.hasNextColumn()) {
                                // TODO warning
                                return null;
                            }
                            return tokenizer.nextColumnOrNull();
                        }
                    });
                    try {
                        hasNextRecord = tokenizer.nextRecord();
                    } catch (CsvTokenizer.TooManyColumnsException ex) {
                        if (allowExtraColumns) {
                            String tooManyColumnsLine = tokenizer.skipCurrentLine();
                            // TODO warning
                            hasNextRecord = tokenizer.nextRecord();
                        } else {
                            // this line will be skipped at the following catch section
                            throw ex;
                        }
                    }
                    pageBuilder.addRecord();
                } catch (CsvTokenizer.InvalidFormatException | CsvTokenizer.InvalidValueException | CsvRecordValidateException e) {
                    String skippedLine = tokenizer.skipCurrentLine();
                    long lineNumber = tokenizer.getCurrentLineNumber();
                    if (stopOnInvalidRecord) {
                        throw new DataException(String.format("Invalid record at line %d: %s", lineNumber, skippedLine), e);
                    }
                    log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), skippedLine));
                    // exec.notice().skippedLine(skippedLine);
                    hasNextRecord = tokenizer.nextRecord();
                }
                if (!hasNextRecord) {
                    break;
                }
            }
        }
        pageBuilder.finish();
    }
}
Also used : TimestampParser(org.embulk.spi.time.TimestampParser) PageBuilder(org.embulk.spi.PageBuilder) JsonParseException(org.embulk.spi.json.JsonParseException) TimestampParseException(org.embulk.spi.time.TimestampParseException) DataException(org.embulk.spi.DataException) ColumnVisitor(org.embulk.spi.ColumnVisitor) Column(org.embulk.spi.Column) LineDecoder(org.embulk.spi.util.LineDecoder) JsonParser(org.embulk.spi.json.JsonParser)

Aggregations

Column (org.embulk.spi.Column)2 ColumnVisitor (org.embulk.spi.ColumnVisitor)2 DataException (org.embulk.spi.DataException)2 PageBuilder (org.embulk.spi.PageBuilder)2 JsonParseException (org.embulk.spi.json.JsonParseException)2 JsonParser (org.embulk.spi.json.JsonParser)2 TimestampParseException (org.embulk.spi.time.TimestampParseException)2 TimestampParser (org.embulk.spi.time.TimestampParser)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 List (java.util.List)1 LineDecoder (org.embulk.spi.util.LineDecoder)1