use of org.embulk.config.ConfigDiff in project embulk by embulk.
the class BulkLoader method doRun.
private ExecutionResult doRun(ConfigSource config) {
final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
final ExecutorPlugin exec = newExecutorPlugin(task);
final ProcessPluginSet plugins = new ProcessPluginSet(task);
final LoaderState state = newLoaderState(Exec.getLogger(BulkLoader.class), plugins);
state.setTransactionStage(TransactionStage.INPUT_BEGIN);
try {
ConfigDiff inputConfigDiff = plugins.getInputPlugin().transaction(task.getInputConfig(), new InputPlugin.Control() {
public List<TaskReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount) {
state.setInputTaskSource(inputTask);
state.setTransactionStage(TransactionStage.FILTER_BEGIN);
Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {
public void run(final List<TaskSource> filterTasks, final List<Schema> schemas) {
state.setSchemas(schemas);
state.setFilterTaskSources(filterTasks);
state.setTransactionStage(TransactionStage.EXECUTOR_BEGIN);
exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {
public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor) {
state.setExecutorSchema(executorSchema);
state.setTransactionStage(TransactionStage.OUTPUT_BEGIN);
@SuppressWarnings("checkstyle:LineLength") ConfigDiff outputConfigDiff = plugins.getOutputPlugin().transaction(task.getOutputConfig(), executorSchema, outputTaskCount, new OutputPlugin.Control() {
public List<TaskReport> run(final TaskSource outputTask) {
state.setOutputTaskSource(outputTask);
state.initialize(inputTaskCount, outputTaskCount);
state.setTransactionStage(TransactionStage.RUN);
if (!state.isAllTasksCommitted()) {
// inputTaskCount == 0
execute(task, executor, state);
}
if (!state.isAllTasksCommitted()) {
throw new RuntimeException(String.format("%d input tasks and %d output tasks failed", state.countUncommittedInputTasks(), state.countUncommittedOutputTasks()));
}
state.setTransactionStage(TransactionStage.OUTPUT_COMMIT);
return state.getAllOutputTaskReports();
}
});
state.setOutputConfigDiff(outputConfigDiff);
state.setTransactionStage(TransactionStage.EXECUTOR_COMMIT);
}
});
state.setTransactionStage(TransactionStage.FILTER_COMMIT);
}
});
state.setTransactionStage(TransactionStage.INPUT_COMMIT);
return state.getAllInputTaskReports();
}
});
state.setInputConfigDiff(inputConfigDiff);
state.setTransactionStage(TransactionStage.CLEANUP);
cleanupCommittedTransaction(config, state);
return state.buildExecuteResult();
} catch (Throwable ex) {
if (isSkippedTransaction(ex)) {
ConfigDiff configDiff = ((SkipTransactionException) ex).getConfigDiff();
return state.buildExecuteResultOfSkippedExecution(configDiff);
} else if (state.isAllTasksCommitted() && state.isAllTransactionsCommitted()) {
// ignore the exception
return state.buildExecuteResultWithWarningException(ex);
}
throw state.buildPartialExecuteException(ex, Exec.session());
}
}
use of org.embulk.config.ConfigDiff in project embulk by embulk.
the class GuessExecutor method doGuess.
private ConfigDiff doGuess(ConfigSource config) {
ConfigSource inputConfig = config.getNested("in");
ConfigSource execConfig = config.getNestedOrGetEmpty("exec");
InputPlugin input = newInputPlugin(inputConfig);
ConfigDiff inputGuessed;
if (input instanceof ConfigurableGuessInputPlugin) {
inputGuessed = ((ConfigurableGuessInputPlugin) input).guess(execConfig, inputConfig);
} else {
try {
inputGuessed = input.guess(inputConfig);
} catch (AbstractMethodError ex) {
// for backward compatibility with embulk v0.4 interface
throw new UnsupportedOperationException(input.getClass().getSimpleName() + ".guess(ConfigSource) is not implemented. This input plugin does not support guessing.");
}
}
ConfigDiff wrapped = Exec.newConfigDiff();
wrapped.getNestedOrSetEmpty("in").merge(inputGuessed);
return wrapped;
}
use of org.embulk.config.ConfigDiff in project embulk by embulk.
the class GuessExecutor method guessParserConfig.
private ConfigDiff guessParserConfig(Buffer sample, ConfigSource config, List<PluginType> guessPlugins, final int guessParserSampleBufferBytes) {
// repeat guessing upto 10 times
ConfigDiff lastGuessed = Exec.newConfigDiff();
for (int i = 0; i < 10; i++) {
// include last-guessed config to run guess input
ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
ConfigSource guessInputConfig = originalConfig.deepCopy();
guessInputConfig.getNestedOrSetEmpty("parser").set("type", // override in.parser.type so that FileInputRunner.run uses GuessParserPlugin
"system_guess").set("guess_plugins", guessPlugins).set("orig_config", originalConfig).set("guess_parser_sample_buffer_bytes", guessParserSampleBufferBytes);
// run FileInputPlugin
final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
ConfigDiff guessed;
try {
input.transaction(guessInputConfig, new InputPlugin.Control() {
public List<TaskReport> run(TaskSource inputTaskSource, Schema schema, int taskCount) {
if (taskCount == 0) {
throw new NoSampleException("No input files to guess");
}
input.run(inputTaskSource, null, 0, new PageOutput() {
@Override
public void add(Page page) {
// TODO exception class
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration");
}
@Override
public void finish() {
}
@Override
public void close() {
}
});
throw new AssertionError("Guess executor must throw GuessedNoticeError");
}
});
throw new AssertionError("Guess executor must throw GuessedNoticeError");
} catch (GuessedNoticeError error) {
guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
}
// merge to the last-guessed config
if (lastGuessed.equals(guessed)) {
// not changed
return lastGuessed;
}
lastGuessed = guessed;
}
return lastGuessed;
}
use of org.embulk.config.ConfigDiff in project embulk by embulk.
the class TestCsvGuessPlugin method assertGuessByResource.
static void assertGuessByResource(TestingEmbulk embulk, String seedYamlResourceName, String sourceCsvResourceName, String resultResourceName) throws IOException {
ConfigSource seed = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + seedYamlResourceName);
ConfigDiff guessed = embulk.parserBuilder().parser(seed).exec(embulk.newConfig().set("exclude_guess_plugins", ImmutableList.of("json"))).inputResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName).guess();
assertThat(guessed, is((DataSource) embulk.loadYamlResource(RESOURCE_NAME_PREFIX + resultResourceName)));
}
use of org.embulk.config.ConfigDiff in project embulk by embulk.
the class TestCsvAllStringsGuessPlugin method testSimple.
@Test
public void testSimple() throws Exception {
ConfigSource exec = embulk.newConfig().set("guess_plugins", ImmutableList.of("csv_all_strings")).set("exclude_guess_plugins", ImmutableList.of("csv"));
ConfigDiff guessed = embulk.parserBuilder().exec(exec).inputResource(RESOURCE_NAME_PREFIX + "test_simple.csv").guess();
assertThat(guessed, is((DataSource) embulk.loadYamlResource(RESOURCE_NAME_PREFIX + "test_simple_guessed.yml")));
}
Aggregations