Search in sources :

Example 1 with ConfigDiff

use of org.embulk.config.ConfigDiff in project embulk by embulk.

the class BulkLoader method doRun.

private ExecutionResult doRun(ConfigSource config) {
    final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
    final ExecutorPlugin exec = newExecutorPlugin(task);
    final ProcessPluginSet plugins = new ProcessPluginSet(task);
    final LoaderState state = newLoaderState(Exec.getLogger(BulkLoader.class), plugins);
    state.setTransactionStage(TransactionStage.INPUT_BEGIN);
    try {
        ConfigDiff inputConfigDiff = plugins.getInputPlugin().transaction(task.getInputConfig(), new InputPlugin.Control() {

            public List<TaskReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount) {
                state.setInputTaskSource(inputTask);
                state.setTransactionStage(TransactionStage.FILTER_BEGIN);
                Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {

                    public void run(final List<TaskSource> filterTasks, final List<Schema> schemas) {
                        state.setSchemas(schemas);
                        state.setFilterTaskSources(filterTasks);
                        state.setTransactionStage(TransactionStage.EXECUTOR_BEGIN);
                        exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {

                            public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor) {
                                state.setExecutorSchema(executorSchema);
                                state.setTransactionStage(TransactionStage.OUTPUT_BEGIN);
                                @SuppressWarnings("checkstyle:LineLength") ConfigDiff outputConfigDiff = plugins.getOutputPlugin().transaction(task.getOutputConfig(), executorSchema, outputTaskCount, new OutputPlugin.Control() {

                                    public List<TaskReport> run(final TaskSource outputTask) {
                                        state.setOutputTaskSource(outputTask);
                                        state.initialize(inputTaskCount, outputTaskCount);
                                        state.setTransactionStage(TransactionStage.RUN);
                                        if (!state.isAllTasksCommitted()) {
                                            // inputTaskCount == 0
                                            execute(task, executor, state);
                                        }
                                        if (!state.isAllTasksCommitted()) {
                                            throw new RuntimeException(String.format("%d input tasks and %d output tasks failed", state.countUncommittedInputTasks(), state.countUncommittedOutputTasks()));
                                        }
                                        state.setTransactionStage(TransactionStage.OUTPUT_COMMIT);
                                        return state.getAllOutputTaskReports();
                                    }
                                });
                                state.setOutputConfigDiff(outputConfigDiff);
                                state.setTransactionStage(TransactionStage.EXECUTOR_COMMIT);
                            }
                        });
                        state.setTransactionStage(TransactionStage.FILTER_COMMIT);
                    }
                });
                state.setTransactionStage(TransactionStage.INPUT_COMMIT);
                return state.getAllInputTaskReports();
            }
        });
        state.setInputConfigDiff(inputConfigDiff);
        state.setTransactionStage(TransactionStage.CLEANUP);
        cleanupCommittedTransaction(config, state);
        return state.buildExecuteResult();
    } catch (Throwable ex) {
        if (isSkippedTransaction(ex)) {
            ConfigDiff configDiff = ((SkipTransactionException) ex).getConfigDiff();
            return state.buildExecuteResultOfSkippedExecution(configDiff);
        } else if (state.isAllTasksCommitted() && state.isAllTransactionsCommitted()) {
            // ignore the exception
            return state.buildExecuteResultWithWarningException(ex);
        }
        throw state.buildPartialExecuteException(ex, Exec.session());
    }
}
Also used : InputPlugin(org.embulk.spi.InputPlugin) TaskReport(org.embulk.config.TaskReport) ExecutorPlugin(org.embulk.spi.ExecutorPlugin) Schema(org.embulk.spi.Schema) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConfigDiff(org.embulk.config.ConfigDiff) TaskSource(org.embulk.config.TaskSource)

Example 2 with ConfigDiff

use of org.embulk.config.ConfigDiff in project embulk by embulk.

the class GuessExecutor method doGuess.

private ConfigDiff doGuess(ConfigSource config) {
    ConfigSource inputConfig = config.getNested("in");
    ConfigSource execConfig = config.getNestedOrGetEmpty("exec");
    InputPlugin input = newInputPlugin(inputConfig);
    ConfigDiff inputGuessed;
    if (input instanceof ConfigurableGuessInputPlugin) {
        inputGuessed = ((ConfigurableGuessInputPlugin) input).guess(execConfig, inputConfig);
    } else {
        try {
            inputGuessed = input.guess(inputConfig);
        } catch (AbstractMethodError ex) {
            // for backward compatibility with embulk v0.4 interface
            throw new UnsupportedOperationException(input.getClass().getSimpleName() + ".guess(ConfigSource) is not implemented. This input plugin does not support guessing.");
        }
    }
    ConfigDiff wrapped = Exec.newConfigDiff();
    wrapped.getNestedOrSetEmpty("in").merge(inputGuessed);
    return wrapped;
}
Also used : ConfigSource(org.embulk.config.ConfigSource) InputPlugin(org.embulk.spi.InputPlugin) ConfigDiff(org.embulk.config.ConfigDiff)

Example 3 with ConfigDiff

use of org.embulk.config.ConfigDiff in project embulk by embulk.

the class GuessExecutor method guessParserConfig.

private ConfigDiff guessParserConfig(Buffer sample, ConfigSource config, List<PluginType> guessPlugins, final int guessParserSampleBufferBytes) {
    // repeat guessing upto 10 times
    ConfigDiff lastGuessed = Exec.newConfigDiff();
    for (int i = 0; i < 10; i++) {
        // include last-guessed config to run guess input
        ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
        ConfigSource guessInputConfig = originalConfig.deepCopy();
        guessInputConfig.getNestedOrSetEmpty("parser").set("type", // override in.parser.type so that FileInputRunner.run uses GuessParserPlugin
        "system_guess").set("guess_plugins", guessPlugins).set("orig_config", originalConfig).set("guess_parser_sample_buffer_bytes", guessParserSampleBufferBytes);
        // run FileInputPlugin
        final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
        ConfigDiff guessed;
        try {
            input.transaction(guessInputConfig, new InputPlugin.Control() {

                public List<TaskReport> run(TaskSource inputTaskSource, Schema schema, int taskCount) {
                    if (taskCount == 0) {
                        throw new NoSampleException("No input files to guess");
                    }
                    input.run(inputTaskSource, null, 0, new PageOutput() {

                        @Override
                        public void add(Page page) {
                            // TODO exception class
                            throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration");
                        }

                        @Override
                        public void finish() {
                        }

                        @Override
                        public void close() {
                        }
                    });
                    throw new AssertionError("Guess executor must throw GuessedNoticeError");
                }
            });
            throw new AssertionError("Guess executor must throw GuessedNoticeError");
        } catch (GuessedNoticeError error) {
            guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
        }
        // merge to the last-guessed config
        if (lastGuessed.equals(guessed)) {
            // not changed
            return lastGuessed;
        }
        lastGuessed = guessed;
    }
    return lastGuessed;
}
Also used : InputPlugin(org.embulk.spi.InputPlugin) FileInputRunner(org.embulk.spi.FileInputRunner) Schema(org.embulk.spi.Schema) Page(org.embulk.spi.Page) ConfigSource(org.embulk.config.ConfigSource) PageOutput(org.embulk.spi.PageOutput) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConfigDiff(org.embulk.config.ConfigDiff) TaskSource(org.embulk.config.TaskSource)

Example 4 with ConfigDiff

use of org.embulk.config.ConfigDiff in project embulk by embulk.

the class TestCsvGuessPlugin method assertGuessByResource.

static void assertGuessByResource(TestingEmbulk embulk, String seedYamlResourceName, String sourceCsvResourceName, String resultResourceName) throws IOException {
    ConfigSource seed = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + seedYamlResourceName);
    ConfigDiff guessed = embulk.parserBuilder().parser(seed).exec(embulk.newConfig().set("exclude_guess_plugins", ImmutableList.of("json"))).inputResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName).guess();
    assertThat(guessed, is((DataSource) embulk.loadYamlResource(RESOURCE_NAME_PREFIX + resultResourceName)));
}
Also used : ConfigSource(org.embulk.config.ConfigSource) ConfigDiff(org.embulk.config.ConfigDiff) DataSource(org.embulk.config.DataSource)

Example 5 with ConfigDiff

use of org.embulk.config.ConfigDiff in project embulk by embulk.

the class TestCsvAllStringsGuessPlugin method testSimple.

@Test
public void testSimple() throws Exception {
    ConfigSource exec = embulk.newConfig().set("guess_plugins", ImmutableList.of("csv_all_strings")).set("exclude_guess_plugins", ImmutableList.of("csv"));
    ConfigDiff guessed = embulk.parserBuilder().exec(exec).inputResource(RESOURCE_NAME_PREFIX + "test_simple.csv").guess();
    assertThat(guessed, is((DataSource) embulk.loadYamlResource(RESOURCE_NAME_PREFIX + "test_simple_guessed.yml")));
}
Also used : ConfigSource(org.embulk.config.ConfigSource) ConfigDiff(org.embulk.config.ConfigDiff) DataSource(org.embulk.config.DataSource) Test(org.junit.Test)

Aggregations

ConfigDiff (org.embulk.config.ConfigDiff)9 ConfigSource (org.embulk.config.ConfigSource)6 InputPlugin (org.embulk.spi.InputPlugin)4 ImmutableList (com.google.common.collect.ImmutableList)3 List (java.util.List)3 TaskSource (org.embulk.config.TaskSource)3 Schema (org.embulk.spi.Schema)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 DataSource (org.embulk.config.DataSource)2 TaskReport (org.embulk.config.TaskReport)2 ExecutorPlugin (org.embulk.spi.ExecutorPlugin)2 ExecutionResult (org.embulk.exec.ExecutionResult)1 FileInputRunner (org.embulk.spi.FileInputRunner)1 Page (org.embulk.spi.Page)1 PageOutput (org.embulk.spi.PageOutput)1 Test (org.junit.Test)1