Search in sources :

Example 1 with JsonParseException

use of org.embulk.spi.json.JsonParseException in project embulk by embulk.

the class ConfigInputPlugin method run.

@Override
public TaskReport run(TaskSource taskSource, Schema schema, int taskIndex, PageOutput output) {
    final PluginTask task = taskSource.loadTask(PluginTask.class);
    final List<List<JsonNode>> taskValues = task.getValues().get(taskIndex);
    final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
    final JsonParser jsonParser = new JsonParser();
    try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
        for (final List<JsonNode> rowValues : taskValues) {
            schema.visitColumns(new ColumnVisitor() {

                public void booleanColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setBoolean(column, value.asBoolean());
                    }
                }

                public void longColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setLong(column, value.asLong());
                    }
                }

                public void doubleColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setDouble(column, value.asDouble());
                    }
                }

                public void stringColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        pageBuilder.setString(column, value.asText());
                    }
                }

                public void timestampColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        try {
                            pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(value.asText()));
                        } catch (TimestampParseException ex) {
                            throw new DataException(ex);
                        }
                    }
                }

                public void jsonColumn(Column column) {
                    final JsonNode value = rowValues.get(column.getIndex());
                    if (value == null || value.isNull()) {
                        pageBuilder.setNull(column);
                    } else {
                        try {
                            pageBuilder.setJson(column, jsonParser.parse(value.toString()));
                        } catch (JsonParseException ex) {
                            throw new DataException(ex);
                        }
                    }
                }
            });
            pageBuilder.addRecord();
        }
        pageBuilder.finish();
    }
    return Exec.newTaskReport();
}
Also used : TimestampParser(org.embulk.spi.time.TimestampParser) JsonNode(com.fasterxml.jackson.databind.JsonNode) PageBuilder(org.embulk.spi.PageBuilder) JsonParseException(org.embulk.spi.json.JsonParseException) TimestampParseException(org.embulk.spi.time.TimestampParseException) DataException(org.embulk.spi.DataException) ColumnVisitor(org.embulk.spi.ColumnVisitor) Column(org.embulk.spi.Column) List(java.util.List) JsonParser(org.embulk.spi.json.JsonParser)

Example 2 with JsonParseException

use of org.embulk.spi.json.JsonParseException in project embulk by embulk.

the class CsvParserPlugin method run.

@Override
public void run(TaskSource taskSource, final Schema schema, FileInput input, PageOutput output) {
    PluginTask task = taskSource.loadTask(PluginTask.class);
    final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
    final JsonParser jsonParser = new JsonParser();
    final CsvTokenizer tokenizer = new CsvTokenizer(new LineDecoder(input, task), task);
    final boolean allowOptionalColumns = task.getAllowOptionalColumns();
    final boolean allowExtraColumns = task.getAllowExtraColumns();
    final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
    final int skipHeaderLines = task.getSkipHeaderLines();
    try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
        while (tokenizer.nextFile()) {
            // skip the header lines for each file
            for (int skipHeaderLineNumber = skipHeaderLines; skipHeaderLineNumber > 0; skipHeaderLineNumber--) {
                if (!tokenizer.skipHeaderLine()) {
                    break;
                }
            }
            if (!tokenizer.nextRecord()) {
                // empty file
                continue;
            }
            while (true) {
                boolean hasNextRecord;
                try {
                    schema.visitColumns(new ColumnVisitor() {

                        public void booleanColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                pageBuilder.setBoolean(column, TRUE_STRINGS.contains(v));
                            }
                        }

                        public void longColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setLong(column, Long.parseLong(v));
                                } catch (NumberFormatException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        public void doubleColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setDouble(column, Double.parseDouble(v));
                                } catch (NumberFormatException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        public void stringColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                pageBuilder.setString(column, v);
                            }
                        }

                        public void timestampColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(v));
                                } catch (TimestampParseException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        public void jsonColumn(Column column) {
                            String v = nextColumn();
                            if (v == null) {
                                pageBuilder.setNull(column);
                            } else {
                                try {
                                    pageBuilder.setJson(column, jsonParser.parse(v));
                                } catch (JsonParseException e) {
                                    // TODO support default value
                                    throw new CsvRecordValidateException(e);
                                }
                            }
                        }

                        private String nextColumn() {
                            if (allowOptionalColumns && !tokenizer.hasNextColumn()) {
                                // TODO warning
                                return null;
                            }
                            return tokenizer.nextColumnOrNull();
                        }
                    });
                    try {
                        hasNextRecord = tokenizer.nextRecord();
                    } catch (CsvTokenizer.TooManyColumnsException ex) {
                        if (allowExtraColumns) {
                            String tooManyColumnsLine = tokenizer.skipCurrentLine();
                            // TODO warning
                            hasNextRecord = tokenizer.nextRecord();
                        } else {
                            // this line will be skipped at the following catch section
                            throw ex;
                        }
                    }
                    pageBuilder.addRecord();
                } catch (CsvTokenizer.InvalidFormatException | CsvTokenizer.InvalidValueException | CsvRecordValidateException e) {
                    String skippedLine = tokenizer.skipCurrentLine();
                    long lineNumber = tokenizer.getCurrentLineNumber();
                    if (stopOnInvalidRecord) {
                        throw new DataException(String.format("Invalid record at line %d: %s", lineNumber, skippedLine), e);
                    }
                    log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), skippedLine));
                    // exec.notice().skippedLine(skippedLine);
                    hasNextRecord = tokenizer.nextRecord();
                }
                if (!hasNextRecord) {
                    break;
                }
            }
        }
        pageBuilder.finish();
    }
}
Also used : TimestampParser(org.embulk.spi.time.TimestampParser) PageBuilder(org.embulk.spi.PageBuilder) JsonParseException(org.embulk.spi.json.JsonParseException) TimestampParseException(org.embulk.spi.time.TimestampParseException) DataException(org.embulk.spi.DataException) ColumnVisitor(org.embulk.spi.ColumnVisitor) Column(org.embulk.spi.Column) LineDecoder(org.embulk.spi.util.LineDecoder) JsonParser(org.embulk.spi.json.JsonParser)

Example 3 with JsonParseException

use of org.embulk.spi.json.JsonParseException in project embulk by embulk.

the class JsonParserPlugin method run.

@Override
public void run(TaskSource taskSource, Schema schema, FileInput input, PageOutput output) {
    PluginTask task = taskSource.loadTask(PluginTask.class);
    final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
    // record column
    final Column column = schema.getColumn(0);
    try (PageBuilder pageBuilder = newPageBuilder(schema, output);
        FileInputInputStream in = new FileInputInputStream(input)) {
        while (in.nextFile()) {
            boolean evenOneJsonParsed = false;
            try (JsonParser.Stream stream = newJsonStream(in, task)) {
                Value value;
                while ((value = stream.next()) != null) {
                    try {
                        if (!value.isMapValue()) {
                            throw new JsonRecordValidateException(String.format("A Json record must not represent map value but it's %s", value.getValueType().name()));
                        }
                        pageBuilder.setJson(column, value);
                        pageBuilder.addRecord();
                        evenOneJsonParsed = true;
                    } catch (JsonRecordValidateException e) {
                        if (stopOnInvalidRecord) {
                            throw new DataException(String.format("Invalid record: %s", value.toJson()), e);
                        }
                        log.warn(String.format("Skipped record (%s): %s", e.getMessage(), value.toJson()));
                    }
                }
            } catch (IOException | JsonParseException e) {
                if (Exec.isPreview() && evenOneJsonParsed) {
                    // ignore in preview if at least one JSON is already parsed.
                    break;
                }
                throw new DataException(e);
            }
        }
        pageBuilder.finish();
    }
}
Also used : PageBuilder(org.embulk.spi.PageBuilder) IOException(java.io.IOException) JsonParseException(org.embulk.spi.json.JsonParseException) DataException(org.embulk.spi.DataException) FileInputInputStream(org.embulk.spi.util.FileInputInputStream) Column(org.embulk.spi.Column) Value(org.msgpack.value.Value) JsonParser(org.embulk.spi.json.JsonParser)

Aggregations

Column (org.embulk.spi.Column)3 DataException (org.embulk.spi.DataException)3 PageBuilder (org.embulk.spi.PageBuilder)3 JsonParseException (org.embulk.spi.json.JsonParseException)3 JsonParser (org.embulk.spi.json.JsonParser)3 ColumnVisitor (org.embulk.spi.ColumnVisitor)2 TimestampParseException (org.embulk.spi.time.TimestampParseException)2 TimestampParser (org.embulk.spi.time.TimestampParser)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 IOException (java.io.IOException)1 List (java.util.List)1 FileInputInputStream (org.embulk.spi.util.FileInputInputStream)1 LineDecoder (org.embulk.spi.util.LineDecoder)1 Value (org.msgpack.value.Value)1