Search in sources :

Example 6 with Schema

use of org.embulk.spi.Schema in project embulk by embulk.

the class TestRenameFilterPlugin method checkConfigExceptionIfUnknownRenamingOperatorName.

@Test
public void checkConfigExceptionIfUnknownRenamingOperatorName() {
    ConfigSource pluginConfig = Exec.newConfigSource().set("rules", ImmutableList.of(ImmutableMap.of("rule", "some_unknown_renaming_operator")));
    try {
        filter.transaction(pluginConfig, SCHEMA, new FilterPlugin.Control() {

            public void run(TaskSource task, Schema schema) {
            }
        });
        fail();
    } catch (Throwable t) {
        assertTrue(t instanceof ConfigException);
    }
}
Also used : ConfigSource(org.embulk.config.ConfigSource) FilterPlugin(org.embulk.spi.FilterPlugin) Schema(org.embulk.spi.Schema) ConfigException(org.embulk.config.ConfigException) SchemaConfigException(org.embulk.spi.SchemaConfigException) TaskSource(org.embulk.config.TaskSource) Test(org.junit.Test)

Example 7 with Schema

use of org.embulk.spi.Schema in project embulk by embulk.

the class TestRenameFilterPlugin method throwSchemaConfigExceptionIfColumnNotFound.

@Test
public void throwSchemaConfigExceptionIfColumnNotFound() {
    ConfigSource pluginConfig = Exec.newConfigSource().set("columns", ImmutableMap.of("not_found", "any_name"));
    try {
        filter.transaction(pluginConfig, SCHEMA, new FilterPlugin.Control() {

            public void run(TaskSource task, Schema schema) {
            }
        });
        fail();
    } catch (Throwable t) {
        assertTrue(t instanceof SchemaConfigException);
    }
}
Also used : ConfigSource(org.embulk.config.ConfigSource) FilterPlugin(org.embulk.spi.FilterPlugin) Schema(org.embulk.spi.Schema) TaskSource(org.embulk.config.TaskSource) SchemaConfigException(org.embulk.spi.SchemaConfigException) Test(org.junit.Test)

Example 8 with Schema

use of org.embulk.spi.Schema in project embulk by embulk.

the class TestRenameFilterPlugin method checkConfigExceptionIfUnknownListTypeOfRenamingOperator.

@Test
public void checkConfigExceptionIfUnknownListTypeOfRenamingOperator() {
    // A list [] shouldn't come as a renaming rule.
    ConfigSource pluginConfig = Exec.newConfigSource().set("rules", ImmutableList.of(ImmutableList.of("listed_operator1", "listed_operator2")));
    try {
        filter.transaction(pluginConfig, SCHEMA, new FilterPlugin.Control() {

            public void run(TaskSource task, Schema schema) {
            }
        });
        fail();
    } catch (Throwable t) {
        assertTrue(t instanceof ConfigException);
    }
}
Also used : ConfigSource(org.embulk.config.ConfigSource) FilterPlugin(org.embulk.spi.FilterPlugin) Schema(org.embulk.spi.Schema) ConfigException(org.embulk.config.ConfigException) SchemaConfigException(org.embulk.spi.SchemaConfigException) TaskSource(org.embulk.config.TaskSource) Test(org.junit.Test)

Example 9 with Schema

use of org.embulk.spi.Schema in project embulk by embulk.

the class BulkLoader method doRun.

private ExecutionResult doRun(ConfigSource config) {
    final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
    final ExecutorPlugin exec = newExecutorPlugin(task);
    final ProcessPluginSet plugins = new ProcessPluginSet(task);
    final LoaderState state = newLoaderState(Exec.getLogger(BulkLoader.class), plugins);
    state.setTransactionStage(TransactionStage.INPUT_BEGIN);
    try {
        ConfigDiff inputConfigDiff = plugins.getInputPlugin().transaction(task.getInputConfig(), new InputPlugin.Control() {

            public List<TaskReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount) {
                state.setInputTaskSource(inputTask);
                state.setTransactionStage(TransactionStage.FILTER_BEGIN);
                Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {

                    public void run(final List<TaskSource> filterTasks, final List<Schema> schemas) {
                        state.setSchemas(schemas);
                        state.setFilterTaskSources(filterTasks);
                        state.setTransactionStage(TransactionStage.EXECUTOR_BEGIN);
                        exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {

                            public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor) {
                                state.setExecutorSchema(executorSchema);
                                state.setTransactionStage(TransactionStage.OUTPUT_BEGIN);
                                @SuppressWarnings("checkstyle:LineLength") ConfigDiff outputConfigDiff = plugins.getOutputPlugin().transaction(task.getOutputConfig(), executorSchema, outputTaskCount, new OutputPlugin.Control() {

                                    public List<TaskReport> run(final TaskSource outputTask) {
                                        state.setOutputTaskSource(outputTask);
                                        state.initialize(inputTaskCount, outputTaskCount);
                                        state.setTransactionStage(TransactionStage.RUN);
                                        if (!state.isAllTasksCommitted()) {
                                            // inputTaskCount == 0
                                            execute(task, executor, state);
                                        }
                                        if (!state.isAllTasksCommitted()) {
                                            throw new RuntimeException(String.format("%d input tasks and %d output tasks failed", state.countUncommittedInputTasks(), state.countUncommittedOutputTasks()));
                                        }
                                        state.setTransactionStage(TransactionStage.OUTPUT_COMMIT);
                                        return state.getAllOutputTaskReports();
                                    }
                                });
                                state.setOutputConfigDiff(outputConfigDiff);
                                state.setTransactionStage(TransactionStage.EXECUTOR_COMMIT);
                            }
                        });
                        state.setTransactionStage(TransactionStage.FILTER_COMMIT);
                    }
                });
                state.setTransactionStage(TransactionStage.INPUT_COMMIT);
                return state.getAllInputTaskReports();
            }
        });
        state.setInputConfigDiff(inputConfigDiff);
        state.setTransactionStage(TransactionStage.CLEANUP);
        cleanupCommittedTransaction(config, state);
        return state.buildExecuteResult();
    } catch (Throwable ex) {
        if (isSkippedTransaction(ex)) {
            ConfigDiff configDiff = ((SkipTransactionException) ex).getConfigDiff();
            return state.buildExecuteResultOfSkippedExecution(configDiff);
        } else if (state.isAllTasksCommitted() && state.isAllTransactionsCommitted()) {
            // ignore the exception
            return state.buildExecuteResultWithWarningException(ex);
        }
        throw state.buildPartialExecuteException(ex, Exec.session());
    }
}
Also used : InputPlugin(org.embulk.spi.InputPlugin) TaskReport(org.embulk.config.TaskReport) ExecutorPlugin(org.embulk.spi.ExecutorPlugin) Schema(org.embulk.spi.Schema) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConfigDiff(org.embulk.config.ConfigDiff) TaskSource(org.embulk.config.TaskSource)

Example 10 with Schema

use of org.embulk.spi.Schema in project embulk by embulk.

the class GuessExecutor method guessParserConfig.

private ConfigDiff guessParserConfig(Buffer sample, ConfigSource config, List<PluginType> guessPlugins, final int guessParserSampleBufferBytes) {
    // repeat guessing upto 10 times
    ConfigDiff lastGuessed = Exec.newConfigDiff();
    for (int i = 0; i < 10; i++) {
        // include last-guessed config to run guess input
        ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
        ConfigSource guessInputConfig = originalConfig.deepCopy();
        guessInputConfig.getNestedOrSetEmpty("parser").set("type", // override in.parser.type so that FileInputRunner.run uses GuessParserPlugin
        "system_guess").set("guess_plugins", guessPlugins).set("orig_config", originalConfig).set("guess_parser_sample_buffer_bytes", guessParserSampleBufferBytes);
        // run FileInputPlugin
        final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
        ConfigDiff guessed;
        try {
            input.transaction(guessInputConfig, new InputPlugin.Control() {

                public List<TaskReport> run(TaskSource inputTaskSource, Schema schema, int taskCount) {
                    if (taskCount == 0) {
                        throw new NoSampleException("No input files to guess");
                    }
                    input.run(inputTaskSource, null, 0, new PageOutput() {

                        @Override
                        public void add(Page page) {
                            // TODO exception class
                            throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration");
                        }

                        @Override
                        public void finish() {
                        }

                        @Override
                        public void close() {
                        }
                    });
                    throw new AssertionError("Guess executor must throw GuessedNoticeError");
                }
            });
            throw new AssertionError("Guess executor must throw GuessedNoticeError");
        } catch (GuessedNoticeError error) {
            guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
        }
        // merge to the last-guessed config
        if (lastGuessed.equals(guessed)) {
            // not changed
            return lastGuessed;
        }
        lastGuessed = guessed;
    }
    return lastGuessed;
}
Also used : InputPlugin(org.embulk.spi.InputPlugin) FileInputRunner(org.embulk.spi.FileInputRunner) Schema(org.embulk.spi.Schema) Page(org.embulk.spi.Page) ConfigSource(org.embulk.config.ConfigSource) PageOutput(org.embulk.spi.PageOutput) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConfigDiff(org.embulk.config.ConfigDiff) TaskSource(org.embulk.config.TaskSource)

Aggregations

Schema (org.embulk.spi.Schema)22 TaskSource (org.embulk.config.TaskSource)12 Column (org.embulk.spi.Column)10 ConfigSource (org.embulk.config.ConfigSource)9 ConfigException (org.embulk.config.ConfigException)8 List (java.util.List)7 FilterPlugin (org.embulk.spi.FilterPlugin)7 Test (org.junit.Test)6 ImmutableList (com.google.common.collect.ImmutableList)5 InputPlugin (org.embulk.spi.InputPlugin)5 SchemaConfigException (org.embulk.spi.SchemaConfigException)5 ArrayList (java.util.ArrayList)4 ConfigDiff (org.embulk.config.ConfigDiff)3 PageOutput (org.embulk.spi.PageOutput)3 HashMap (java.util.HashMap)2 TaskReport (org.embulk.config.TaskReport)2 ExecutorPlugin (org.embulk.spi.ExecutorPlugin)2 Page (org.embulk.spi.Page)2 LineDecoder (org.embulk.spi.util.LineDecoder)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1