Search in sources :

Example 1 with PageReader

use of org.embulk.spi.PageReader in project MiscellaneousStudy by mikoto2000.

the class MyPageOutput method add.

@Override
public void add(Page page) {
    System.out.println("start MyPageOutput#add!");
    try (PageReader pageReader = new PageReader(inputSchema)) {
        pageReader.setPage(page);
        System.out.println("record count: " + PageReader.getRecordCount(page));
        int columnCount = inputSchema.getColumnCount();
        while (pageReader.nextRecord()) {
            // 行番号カラム
            pageBuilder.setLong(0, lineNumber);
            // 入力カラムたち
            pageBuilder.setLong(1, pageReader.getLong(0));
            pageBuilder.setLong(2, pageReader.getLong(1));
            pageBuilder.setTimestamp(3, pageReader.getTimestamp(2));
            pageBuilder.setTimestamp(4, pageReader.getTimestamp(3));
            pageBuilder.setString(5, pageReader.getString(4));
            // 追加文字列カラム
            pageBuilder.setString(6, "Additional String" + lineNumber++ + "!");
            // 編集したレコードを追加
            pageBuilder.addRecord();
        }
    }
}
Also used : PageReader(org.embulk.spi.PageReader)

Example 2 with PageReader

use of org.embulk.spi.PageReader in project embulk by embulk.

the class Pages method toObjects.

// TODO use streaming and return Iterable
public static List<Object[]> toObjects(Schema schema, Iterable<Page> pages) {
    ImmutableList.Builder<Object[]> builder = ImmutableList.builder();
    Iterator<Page> ite = pages.iterator();
    try (PageReader reader = new PageReader(schema)) {
        while (ite.hasNext()) {
            reader.setPage(ite.next());
            while (reader.nextRecord()) {
                builder.add(toObjects(reader));
            }
        }
    }
    return builder.build();
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) PageReader(org.embulk.spi.PageReader) Page(org.embulk.spi.Page)

Example 3 with PageReader

use of org.embulk.spi.PageReader in project embulk by embulk.

the class RemoveColumnsFilterPlugin method open.

@Override
public PageOutput open(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output) {
    PluginTask task = taskSource.loadTask(PluginTask.class);
    PageReader pageReader = new PageReader(inputSchema);
    PageBuilder pageBuilder = new PageBuilder(getBufferAllocator(), outputSchema, output);
    return new PageConverter(pageReader, pageBuilder, task.getIndexMapping());
}
Also used : PageReader(org.embulk.spi.PageReader) PageBuilder(org.embulk.spi.PageBuilder)

Example 4 with PageReader

use of org.embulk.spi.PageReader in project embulk by embulk.

the class CsvFormatterPlugin method open.

@Override
public PageOutput open(TaskSource taskSource, final Schema schema, FileOutput output) {
    final PluginTask task = taskSource.loadTask(PluginTask.class);
    final LineEncoder encoder = new LineEncoder(output, task);
    final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
    final char delimiter = task.getDelimiterChar();
    final QuotePolicy quotePolicy = task.getQuotePolicy();
    final char quote = task.getQuoteChar() != '\0' ? task.getQuoteChar() : '"';
    final char escape = task.getEscapeChar().or(quotePolicy == QuotePolicy.NONE ? '\\' : quote);
    final String newlineInField = task.getNewlineInField().getString();
    final String nullString = task.getNullString();
    // create a file
    encoder.nextFile();
    // write header
    if (task.getHeaderLine()) {
        writeHeader(schema, encoder, delimiter, quotePolicy, quote, escape, newlineInField, nullString);
    }
    return new PageOutput() {

        private final PageReader pageReader = new PageReader(schema);

        private final String delimiterString = String.valueOf(delimiter);

        public void add(Page page) {
            pageReader.setPage(page);
            while (pageReader.nextRecord()) {
                schema.visitColumns(new ColumnVisitor() {

                    public void booleanColumn(Column column) {
                        addDelimiter(column);
                        if (!pageReader.isNull(column)) {
                            addValue(Boolean.toString(pageReader.getBoolean(column)));
                        } else {
                            addNullString();
                        }
                    }

                    public void longColumn(Column column) {
                        addDelimiter(column);
                        if (!pageReader.isNull(column)) {
                            addValue(Long.toString(pageReader.getLong(column)));
                        } else {
                            addNullString();
                        }
                    }

                    public void doubleColumn(Column column) {
                        addDelimiter(column);
                        if (!pageReader.isNull(column)) {
                            addValue(Double.toString(pageReader.getDouble(column)));
                        } else {
                            addNullString();
                        }
                    }

                    public void stringColumn(Column column) {
                        addDelimiter(column);
                        if (!pageReader.isNull(column)) {
                            addValue(pageReader.getString(column));
                        } else {
                            addNullString();
                        }
                    }

                    public void timestampColumn(Column column) {
                        addDelimiter(column);
                        if (!pageReader.isNull(column)) {
                            Timestamp value = pageReader.getTimestamp(column);
                            addValue(timestampFormatters[column.getIndex()].format(value));
                        } else {
                            addNullString();
                        }
                    }

                    public void jsonColumn(Column column) {
                        addDelimiter(column);
                        if (!pageReader.isNull(column)) {
                            Value value = pageReader.getJson(column);
                            addValue(value.toJson());
                        } else {
                            addNullString();
                        }
                    }

                    private void addDelimiter(Column column) {
                        if (column.getIndex() != 0) {
                            encoder.addText(delimiterString);
                        }
                    }

                    private void addValue(String v) {
                        encoder.addText(setEscapeAndQuoteValue(v, delimiter, quotePolicy, quote, escape, newlineInField, nullString));
                    }

                    private void addNullString() {
                        encoder.addText(nullString);
                    }
                });
                encoder.addNewLine();
            }
        }

        public void finish() {
            encoder.finish();
        }

        public void close() {
            encoder.close();
        }
    };
}
Also used : TimestampFormatter(org.embulk.spi.time.TimestampFormatter) LineEncoder(org.embulk.spi.util.LineEncoder) PageReader(org.embulk.spi.PageReader) Page(org.embulk.spi.Page) Timestamp(org.embulk.spi.time.Timestamp) ColumnVisitor(org.embulk.spi.ColumnVisitor) PageOutput(org.embulk.spi.PageOutput) Column(org.embulk.spi.Column) Value(org.msgpack.value.Value)

Aggregations

PageReader (org.embulk.spi.PageReader)4 Page (org.embulk.spi.Page)2 ImmutableList (com.google.common.collect.ImmutableList)1 Column (org.embulk.spi.Column)1 ColumnVisitor (org.embulk.spi.ColumnVisitor)1 PageBuilder (org.embulk.spi.PageBuilder)1 PageOutput (org.embulk.spi.PageOutput)1 Timestamp (org.embulk.spi.time.Timestamp)1 TimestampFormatter (org.embulk.spi.time.TimestampFormatter)1 LineEncoder (org.embulk.spi.util.LineEncoder)1 Value (org.msgpack.value.Value)1