Search in sources :

Example 6 with TaskSource

use of org.embulk.config.TaskSource in project embulk by embulk.

the class BulkLoader method doCleanup.

public void doCleanup(ConfigSource config, ResumeState resume) {
    BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
    // TODO don't create filter plugins
    ProcessPluginSet plugins = new ProcessPluginSet(task);
    ImmutableList.Builder<TaskReport> successfulInputTaskReports = ImmutableList.builder();
    ImmutableList.Builder<TaskReport> successfulOutputTaskReports = ImmutableList.builder();
    for (Optional<TaskReport> inputTaskReport : resume.getInputTaskReports()) {
        if (inputTaskReport.isPresent()) {
            successfulInputTaskReports.add(inputTaskReport.get());
        }
    }
    for (Optional<TaskReport> outputTaskReport : resume.getOutputTaskReports()) {
        if (outputTaskReport.isPresent()) {
            successfulOutputTaskReports.add(outputTaskReport.get());
        }
    }
    final TaskSource inputTaskSource;
    if (plugins.getInputPlugin() instanceof FileInputRunner) {
        inputTaskSource = FileInputRunner.getFileInputTaskSource(resume.getInputTaskSource());
    } else {
        inputTaskSource = resume.getInputTaskSource();
    }
    plugins.getInputPlugin().cleanup(inputTaskSource, resume.getInputSchema(), resume.getInputTaskReports().size(), successfulInputTaskReports.build());
    final TaskSource outputTaskSource;
    if (plugins.getOutputPlugin() instanceof FileOutputRunner) {
        outputTaskSource = FileOutputRunner.getFileOutputTaskSource(resume.getOutputTaskSource());
    } else {
        outputTaskSource = resume.getOutputTaskSource();
    }
    plugins.getOutputPlugin().cleanup(outputTaskSource, resume.getOutputSchema(), resume.getOutputTaskReports().size(), successfulOutputTaskReports.build());
}
Also used : TaskReport(org.embulk.config.TaskReport) ImmutableList(com.google.common.collect.ImmutableList) FileInputRunner(org.embulk.spi.FileInputRunner) FileOutputRunner(org.embulk.spi.FileOutputRunner) TaskSource(org.embulk.config.TaskSource)

Example 7 with TaskSource

use of org.embulk.config.TaskSource in project embulk by embulk.

the class BulkLoader method doRun.

private ExecutionResult doRun(ConfigSource config) {
    final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
    final ExecutorPlugin exec = newExecutorPlugin(task);
    final ProcessPluginSet plugins = new ProcessPluginSet(task);
    final LoaderState state = newLoaderState(Exec.getLogger(BulkLoader.class), plugins);
    state.setTransactionStage(TransactionStage.INPUT_BEGIN);
    try {
        ConfigDiff inputConfigDiff = plugins.getInputPlugin().transaction(task.getInputConfig(), new InputPlugin.Control() {

            public List<TaskReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount) {
                state.setInputTaskSource(inputTask);
                state.setTransactionStage(TransactionStage.FILTER_BEGIN);
                Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {

                    public void run(final List<TaskSource> filterTasks, final List<Schema> schemas) {
                        state.setSchemas(schemas);
                        state.setFilterTaskSources(filterTasks);
                        state.setTransactionStage(TransactionStage.EXECUTOR_BEGIN);
                        exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {

                            public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor) {
                                state.setExecutorSchema(executorSchema);
                                state.setTransactionStage(TransactionStage.OUTPUT_BEGIN);
                                @SuppressWarnings("checkstyle:LineLength") ConfigDiff outputConfigDiff = plugins.getOutputPlugin().transaction(task.getOutputConfig(), executorSchema, outputTaskCount, new OutputPlugin.Control() {

                                    public List<TaskReport> run(final TaskSource outputTask) {
                                        state.setOutputTaskSource(outputTask);
                                        state.initialize(inputTaskCount, outputTaskCount);
                                        state.setTransactionStage(TransactionStage.RUN);
                                        if (!state.isAllTasksCommitted()) {
                                            // inputTaskCount == 0
                                            execute(task, executor, state);
                                        }
                                        if (!state.isAllTasksCommitted()) {
                                            throw new RuntimeException(String.format("%d input tasks and %d output tasks failed", state.countUncommittedInputTasks(), state.countUncommittedOutputTasks()));
                                        }
                                        state.setTransactionStage(TransactionStage.OUTPUT_COMMIT);
                                        return state.getAllOutputTaskReports();
                                    }
                                });
                                state.setOutputConfigDiff(outputConfigDiff);
                                state.setTransactionStage(TransactionStage.EXECUTOR_COMMIT);
                            }
                        });
                        state.setTransactionStage(TransactionStage.FILTER_COMMIT);
                    }
                });
                state.setTransactionStage(TransactionStage.INPUT_COMMIT);
                return state.getAllInputTaskReports();
            }
        });
        state.setInputConfigDiff(inputConfigDiff);
        state.setTransactionStage(TransactionStage.CLEANUP);
        cleanupCommittedTransaction(config, state);
        return state.buildExecuteResult();
    } catch (Throwable ex) {
        if (isSkippedTransaction(ex)) {
            ConfigDiff configDiff = ((SkipTransactionException) ex).getConfigDiff();
            return state.buildExecuteResultOfSkippedExecution(configDiff);
        } else if (state.isAllTasksCommitted() && state.isAllTransactionsCommitted()) {
            // ignore the exception
            return state.buildExecuteResultWithWarningException(ex);
        }
        throw state.buildPartialExecuteException(ex, Exec.session());
    }
}
Also used : InputPlugin(org.embulk.spi.InputPlugin) TaskReport(org.embulk.config.TaskReport) ExecutorPlugin(org.embulk.spi.ExecutorPlugin) Schema(org.embulk.spi.Schema) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConfigDiff(org.embulk.config.ConfigDiff) TaskSource(org.embulk.config.TaskSource)

Example 8 with TaskSource

use of org.embulk.config.TaskSource in project embulk by embulk.

the class GuessExecutor method guessParserConfig.

private ConfigDiff guessParserConfig(Buffer sample, ConfigSource config, List<PluginType> guessPlugins, final int guessParserSampleBufferBytes) {
    // repeat guessing upto 10 times
    ConfigDiff lastGuessed = Exec.newConfigDiff();
    for (int i = 0; i < 10; i++) {
        // include last-guessed config to run guess input
        ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
        ConfigSource guessInputConfig = originalConfig.deepCopy();
        guessInputConfig.getNestedOrSetEmpty("parser").set("type", // override in.parser.type so that FileInputRunner.run uses GuessParserPlugin
        "system_guess").set("guess_plugins", guessPlugins).set("orig_config", originalConfig).set("guess_parser_sample_buffer_bytes", guessParserSampleBufferBytes);
        // run FileInputPlugin
        final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
        ConfigDiff guessed;
        try {
            input.transaction(guessInputConfig, new InputPlugin.Control() {

                public List<TaskReport> run(TaskSource inputTaskSource, Schema schema, int taskCount) {
                    if (taskCount == 0) {
                        throw new NoSampleException("No input files to guess");
                    }
                    input.run(inputTaskSource, null, 0, new PageOutput() {

                        @Override
                        public void add(Page page) {
                            // TODO exception class
                            throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration");
                        }

                        @Override
                        public void finish() {
                        }

                        @Override
                        public void close() {
                        }
                    });
                    throw new AssertionError("Guess executor must throw GuessedNoticeError");
                }
            });
            throw new AssertionError("Guess executor must throw GuessedNoticeError");
        } catch (GuessedNoticeError error) {
            guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
        }
        // merge to the last-guessed config
        if (lastGuessed.equals(guessed)) {
            // not changed
            return lastGuessed;
        }
        lastGuessed = guessed;
    }
    return lastGuessed;
}
Also used : InputPlugin(org.embulk.spi.InputPlugin) FileInputRunner(org.embulk.spi.FileInputRunner) Schema(org.embulk.spi.Schema) Page(org.embulk.spi.Page) ConfigSource(org.embulk.config.ConfigSource) PageOutput(org.embulk.spi.PageOutput) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConfigDiff(org.embulk.config.ConfigDiff) TaskSource(org.embulk.config.TaskSource)

Example 9 with TaskSource

use of org.embulk.config.TaskSource in project embulk by embulk.

the class TestRenameFilterPlugin method checkConfigExceptionIfUnknownStringTypeOfRenamingOperator.

@Test
public void checkConfigExceptionIfUnknownStringTypeOfRenamingOperator() {
    // A simple string shouldn't come as a renaming rule.
    ConfigSource pluginConfig = Exec.newConfigSource().set("rules", ImmutableList.of("string_rule"));
    try {
        filter.transaction(pluginConfig, SCHEMA, new FilterPlugin.Control() {

            public void run(TaskSource task, Schema schema) {
            }
        });
        fail();
    } catch (Throwable t) {
        assertTrue(t instanceof ConfigException);
    }
}
Also used : ConfigSource(org.embulk.config.ConfigSource) FilterPlugin(org.embulk.spi.FilterPlugin) Schema(org.embulk.spi.Schema) ConfigException(org.embulk.config.ConfigException) SchemaConfigException(org.embulk.spi.SchemaConfigException) TaskSource(org.embulk.config.TaskSource) Test(org.junit.Test)

Example 10 with TaskSource

use of org.embulk.config.TaskSource in project embulk by embulk.

the class TestRenameFilterPlugin method checkUniqueNumberSuffixRuleInternal.

private void checkUniqueNumberSuffixRuleInternal(final String[] originalColumnNames, final String[] expectedColumnNames, String delimiter, int digits, int max_length) {
    Schema.Builder originalSchemaBuilder = Schema.builder();
    for (String originalColumnName : originalColumnNames) {
        originalSchemaBuilder.add(originalColumnName, STRING);
    }
    final Schema originalSchema = originalSchemaBuilder.build();
    HashMap<String, Object> parameters = new HashMap<>();
    parameters.put("rule", "unique_number_suffix");
    if (!delimiter.equals(DEFAULT)) {
        parameters.put("delimiter", delimiter);
    }
    if (digits >= 0) {
        parameters.put("digits", digits);
    }
    if (max_length != -1) {
        parameters.put("max_length", max_length);
    }
    ConfigSource pluginConfig = Exec.newConfigSource().set("rules", ImmutableList.of(ImmutableMap.copyOf(parameters)));
    filter.transaction(pluginConfig, originalSchema, new FilterPlugin.Control() {

        @Override
        public void run(TaskSource task, Schema newSchema) {
            ArrayList<String> resolvedColumnNamesList = new ArrayList<>(newSchema.size());
            for (Column resolvedColumn : newSchema.getColumns()) {
                resolvedColumnNamesList.add(resolvedColumn.getName());
            }
            String[] resolvedColumnNames = Iterables.toArray(resolvedColumnNamesList, String.class);
            assertEquals(expectedColumnNames, resolvedColumnNames);
            for (int i = 0; i < expectedColumnNames.length; ++i) {
                Column original = originalSchema.getColumn(i);
                Column resolved = newSchema.getColumn(i);
                assertEquals(original.getType(), resolved.getType());
            }
        }
    });
}
Also used : FilterPlugin(org.embulk.spi.FilterPlugin) HashMap(java.util.HashMap) Schema(org.embulk.spi.Schema) ArrayList(java.util.ArrayList) ConfigSource(org.embulk.config.ConfigSource) Column(org.embulk.spi.Column) TaskSource(org.embulk.config.TaskSource)

Aggregations

TaskSource (org.embulk.config.TaskSource)17 ConfigSource (org.embulk.config.ConfigSource)12 Schema (org.embulk.spi.Schema)12 List (java.util.List)9 Test (org.junit.Test)9 ImmutableList (com.google.common.collect.ImmutableList)8 ArrayList (java.util.ArrayList)7 FilterPlugin (org.embulk.spi.FilterPlugin)7 TaskReport (org.embulk.config.TaskReport)5 InputPlugin (org.embulk.spi.InputPlugin)5 SchemaConfigException (org.embulk.spi.SchemaConfigException)4 ConfigDiff (org.embulk.config.ConfigDiff)3 ConfigException (org.embulk.config.ConfigException)3 PageOutput (org.embulk.spi.PageOutput)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 LinkedList (java.util.LinkedList)2 Column (org.embulk.spi.Column)2 ExecutorPlugin (org.embulk.spi.ExecutorPlugin)2 FileInputRunner (org.embulk.spi.FileInputRunner)2 Page (org.embulk.spi.Page)2