use of org.embulk.spi.PageOutput in project embulk by embulk.
the class Filters method open.
public static PageOutput open(List<FilterPlugin> plugins, List<TaskSource> taskSources, List<Schema> filterSchemas, PageOutput output) {
PageOutput out = output;
int pos = plugins.size() - 1;
while (pos >= 0) {
out = plugins.get(pos).open(taskSources.get(pos), filterSchemas.get(pos), filterSchemas.get(pos + 1), out);
pos--;
}
return out;
}
use of org.embulk.spi.PageOutput in project embulk by embulk.
the class GuessExecutor method guessParserConfig.
private ConfigDiff guessParserConfig(Buffer sample, ConfigSource config, List<PluginType> guessPlugins, final int guessParserSampleBufferBytes) {
// repeat guessing upto 10 times
ConfigDiff lastGuessed = Exec.newConfigDiff();
for (int i = 0; i < 10; i++) {
// include last-guessed config to run guess input
ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
ConfigSource guessInputConfig = originalConfig.deepCopy();
guessInputConfig.getNestedOrSetEmpty("parser").set("type", // override in.parser.type so that FileInputRunner.run uses GuessParserPlugin
"system_guess").set("guess_plugins", guessPlugins).set("orig_config", originalConfig).set("guess_parser_sample_buffer_bytes", guessParserSampleBufferBytes);
// run FileInputPlugin
final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
ConfigDiff guessed;
try {
input.transaction(guessInputConfig, new InputPlugin.Control() {
public List<TaskReport> run(TaskSource inputTaskSource, Schema schema, int taskCount) {
if (taskCount == 0) {
throw new NoSampleException("No input files to guess");
}
input.run(inputTaskSource, null, 0, new PageOutput() {
@Override
public void add(Page page) {
// TODO exception class
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration");
}
@Override
public void finish() {
}
@Override
public void close() {
}
});
throw new AssertionError("Guess executor must throw GuessedNoticeError");
}
});
throw new AssertionError("Guess executor must throw GuessedNoticeError");
} catch (GuessedNoticeError error) {
guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
}
// merge to the last-guessed config
if (lastGuessed.equals(guessed)) {
// not changed
return lastGuessed;
}
lastGuessed = guessed;
}
return lastGuessed;
}
use of org.embulk.spi.PageOutput in project embulk by embulk.
the class CsvFormatterPlugin method open.
@Override
public PageOutput open(TaskSource taskSource, final Schema schema, FileOutput output) {
final PluginTask task = taskSource.loadTask(PluginTask.class);
final LineEncoder encoder = new LineEncoder(output, task);
final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
final char delimiter = task.getDelimiterChar();
final QuotePolicy quotePolicy = task.getQuotePolicy();
final char quote = task.getQuoteChar() != '\0' ? task.getQuoteChar() : '"';
final char escape = task.getEscapeChar().or(quotePolicy == QuotePolicy.NONE ? '\\' : quote);
final String newlineInField = task.getNewlineInField().getString();
final String nullString = task.getNullString();
// create a file
encoder.nextFile();
// write header
if (task.getHeaderLine()) {
writeHeader(schema, encoder, delimiter, quotePolicy, quote, escape, newlineInField, nullString);
}
return new PageOutput() {
private final PageReader pageReader = new PageReader(schema);
private final String delimiterString = String.valueOf(delimiter);
public void add(Page page) {
pageReader.setPage(page);
while (pageReader.nextRecord()) {
schema.visitColumns(new ColumnVisitor() {
public void booleanColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
addValue(Boolean.toString(pageReader.getBoolean(column)));
} else {
addNullString();
}
}
public void longColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
addValue(Long.toString(pageReader.getLong(column)));
} else {
addNullString();
}
}
public void doubleColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
addValue(Double.toString(pageReader.getDouble(column)));
} else {
addNullString();
}
}
public void stringColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
addValue(pageReader.getString(column));
} else {
addNullString();
}
}
public void timestampColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
Timestamp value = pageReader.getTimestamp(column);
addValue(timestampFormatters[column.getIndex()].format(value));
} else {
addNullString();
}
}
public void jsonColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
Value value = pageReader.getJson(column);
addValue(value.toJson());
} else {
addNullString();
}
}
private void addDelimiter(Column column) {
if (column.getIndex() != 0) {
encoder.addText(delimiterString);
}
}
private void addValue(String v) {
encoder.addText(setEscapeAndQuoteValue(v, delimiter, quotePolicy, quote, escape, newlineInField, nullString));
}
private void addNullString() {
encoder.addText(nullString);
}
});
encoder.addNewLine();
}
}
public void finish() {
encoder.finish();
}
public void close() {
encoder.close();
}
};
}
use of org.embulk.spi.PageOutput in project embulk by embulk.
the class Executors method process.
public static void process(ExecSession exec, int taskIndex, InputPlugin inputPlugin, Schema inputSchema, TaskSource inputTaskSource, List<FilterPlugin> filterPlugins, List<Schema> filterSchemas, List<TaskSource> filterTaskSources, OutputPlugin outputPlugin, Schema outputSchema, TaskSource outputTaskSource, ProcessStateCallback callback) {
TransactionalPageOutput tran = PluginWrappers.transactionalPageOutput(outputPlugin.open(outputTaskSource, outputSchema, taskIndex));
callback.started();
// essential exception.
try (CloseResource closer = new CloseResource(tran)) {
try (AbortTransactionResource aborter = new AbortTransactionResource(tran)) {
PageOutput filtered = Filters.open(filterPlugins, filterTaskSources, filterSchemas, tran);
closer.closeThis(filtered);
TaskReport inputTaskReport = inputPlugin.run(inputTaskSource, inputSchema, taskIndex, filtered);
if (inputTaskReport == null) {
inputTaskReport = exec.newTaskReport();
}
callback.inputCommitted(inputTaskReport);
TaskReport outputTaskReport = tran.commit();
aborter.dontAbort();
if (outputTaskReport == null) {
outputTaskReport = exec.newTaskReport();
}
// TODO check output.finish() is called. wrap or abstract
callback.outputCommitted(outputTaskReport);
}
}
}
use of org.embulk.spi.PageOutput in project embulk by embulk.
the class PreviewExecutor method doPreview.
@SuppressWarnings("checkstyle:OverloadMethodsDeclarationOrder")
private PreviewResult doPreview(final PreviewTask task, final InputPlugin input, final List<FilterPlugin> filterPlugins) {
try {
input.transaction(task.getInputConfig(), new InputPlugin.Control() {
public List<TaskReport> run(final TaskSource inputTask, Schema inputSchema, final int taskCount) {
Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas) {
Schema inputSchema = filterSchemas.get(0);
Schema outputSchema = filterSchemas.get(filterSchemas.size() - 1);
PageOutput out = new SamplingPageOutput(task.getSampleRows(), outputSchema);
try {
for (int taskIndex = 0; taskIndex < taskCount; taskIndex++) {
try {
out = Filters.open(filterPlugins, filterTasks, filterSchemas, out);
input.run(inputTask, inputSchema, taskIndex, out);
} catch (NoSampleException ex) {
if (taskIndex == taskCount - 1) {
throw ex;
}
}
}
} finally {
out.close();
}
}
});
// program never reaches here because SamplingPageOutput.finish throws an error.
throw new NoSampleException("No input records to preview");
}
});
throw new AssertionError("PreviewExecutor executor must throw PreviewedNoticeError");
} catch (PreviewedNoticeError previewed) {
return previewed.getPreviewResult();
}
}
Aggregations