Search in sources :

Example 16 with Schema

use of org.embulk.spi.Schema in project embulk by embulk.

the class RenameFilterPlugin method applyFirstCharacterTypesRule.

private Schema applyFirstCharacterTypesRule(Schema inputSchema, FirstCharacterTypesRule rule) {
    final Optional<String> replace = rule.getReplace();
    final List<String> passTypes = rule.getPassTypes();
    final String passCharacters = rule.getPassCharacters();
    final Optional<String> prefix = rule.getPrefix();
    if (replace.isPresent() && replace.get().length() != 1) {
        throw new ConfigException("\"replace\" in \"first_character_types\" must contain just 1 character if specified");
    }
    if (prefix.isPresent() && prefix.get().length() != 1) {
        throw new ConfigException("\"prefix\" in \"first_character_types\" must contain just 1 character if specified");
    }
    if (prefix.isPresent() && replace.isPresent()) {
        throw new ConfigException("\"replace\" and \"prefix\" in \"first_character_types\" must not be specified together");
    }
    if ((!prefix.isPresent()) && (!replace.isPresent())) {
        throw new ConfigException("Either of \"replace\" or \"prefix\" must be specified in \"first_character_types\"");
    }
    // TODO(dmikurube): Revisit this for better escaping.
    if (passCharacters.contains("\\E")) {
        throw new ConfigException("\"pass_characters\" in \"first_character_types\" must not contain \"\\E\"");
    }
    StringBuilder regexBuilder = new StringBuilder();
    regexBuilder.append("^[^");
    for (String target : passTypes) {
        if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
            regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
        } else {
            throw new ConfigException("\"" + target + "\" is an unknown character type keyword");
        }
    }
    if (!passCharacters.isEmpty()) {
        regexBuilder.append("\\Q");
        regexBuilder.append(passCharacters);
        regexBuilder.append("\\E");
    }
    regexBuilder.append("].*");
    Schema.Builder schemaBuidler = Schema.builder();
    for (Column column : inputSchema.getColumns()) {
        String name = column.getName();
        if (name.matches(regexBuilder.toString())) {
            if (replace.isPresent()) {
                name = replace.get() + name.substring(1);
            } else if (prefix.isPresent()) {
                name = prefix.get() + name;
            }
        }
        schemaBuidler.add(name, column.getType());
    }
    return schemaBuidler.build();
}
Also used : Column(org.embulk.spi.Column) Schema(org.embulk.spi.Schema) ConfigException(org.embulk.config.ConfigException)

Example 17 with Schema

use of org.embulk.spi.Schema in project embulk by embulk.

the class RenameFilterPlugin method applyCharacterTypesRule.

private Schema applyCharacterTypesRule(Schema inputSchema, CharacterTypesRule rule) {
    final List<String> passTypes = rule.getPassTypes();
    final String passCharacters = rule.getPassCharacters();
    final String replace = rule.getReplace();
    if (replace.isEmpty()) {
        throw new ConfigException("\"replace\" in \"character_types\" must not be explicitly empty");
    }
    if (replace.length() != 1) {
        throw new ConfigException("\"replace\" in \"character_types\" must contain just 1 character");
    }
    // TODO(dmikurube): Revisit this for better escaping.
    if (passCharacters.contains("\\E")) {
        throw new ConfigException("\"pass_characters\" in \"character_types\" must not contain \"\\E\"");
    }
    StringBuilder regexBuilder = new StringBuilder();
    regexBuilder.append("[^");
    for (String target : passTypes) {
        if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
            regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
        } else {
            throw new ConfigException("\"" + target + "\" is an unknown character type keyword");
        }
    }
    if (!passCharacters.isEmpty()) {
        regexBuilder.append("\\Q");
        regexBuilder.append(passCharacters);
        regexBuilder.append("\\E");
    }
    regexBuilder.append("]");
    Schema.Builder schemaBuilder = Schema.builder();
    for (Column column : inputSchema.getColumns()) {
        schemaBuilder.add(column.getName().replaceAll(regexBuilder.toString(), replace), column.getType());
    }
    return schemaBuilder.build();
}
Also used : Column(org.embulk.spi.Column) Schema(org.embulk.spi.Schema) ConfigException(org.embulk.config.ConfigException)

Example 18 with Schema

use of org.embulk.spi.Schema in project embulk by embulk.

the class RenameFilterPlugin method transaction.

@Override
public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control) {
    PluginTask task = config.loadConfig(PluginTask.class);
    Map<String, String> renameMap = task.getRenameMap();
    List<ConfigSource> rulesList = task.getRulesList();
    // Check if the given column in "columns" exists or not.
    for (String columnName : renameMap.keySet()) {
        // throws SchemaConfigException
        inputSchema.lookupColumn(columnName);
    }
    // Rename by "columns": to be applied before "rules".
    Schema.Builder builder = Schema.builder();
    for (Column column : inputSchema.getColumns()) {
        String name = column.getName();
        if (renameMap.containsKey(name)) {
            name = renameMap.get(name);
        }
        builder.add(name, column.getType());
    }
    Schema intermediateSchema = builder.build();
    // Rename by "rules".
    Schema outputSchema = intermediateSchema;
    for (ConfigSource rule : rulesList) {
        outputSchema = applyRule(rule, intermediateSchema);
        intermediateSchema = outputSchema;
    }
    control.run(task.dump(), outputSchema);
}
Also used : ConfigSource(org.embulk.config.ConfigSource) Column(org.embulk.spi.Column) Schema(org.embulk.spi.Schema)

Example 19 with Schema

use of org.embulk.spi.Schema in project embulk by embulk.

the class BulkLoader method doResume.

private ExecutionResult doResume(ConfigSource config, final ResumeState resume) {
    final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
    final ExecutorPlugin exec = newExecutorPlugin(task);
    final ProcessPluginSet plugins = new ProcessPluginSet(task);
    final LoaderState state = newLoaderState(Exec.getLogger(BulkLoader.class), plugins);
    state.setTransactionStage(TransactionStage.INPUT_BEGIN);
    try {
        @SuppressWarnings("checkstyle:LineLength") ConfigDiff inputConfigDiff = plugins.getInputPlugin().resume(resume.getInputTaskSource(), resume.getInputSchema(), resume.getInputTaskReports().size(), new InputPlugin.Control() {

            public List<TaskReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount) {
                // TODO validate inputTask?
                // TODO validate inputSchema
                state.setInputTaskSource(inputTask);
                state.setTransactionStage(TransactionStage.FILTER_BEGIN);
                Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {

                    public void run(final List<TaskSource> filterTasks, final List<Schema> schemas) {
                        state.setSchemas(schemas);
                        state.setFilterTaskSources(filterTasks);
                        state.setTransactionStage(TransactionStage.EXECUTOR_BEGIN);
                        exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {

                            public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor) {
                                // TODO validate executorSchema
                                state.setExecutorSchema(executorSchema);
                                state.setTransactionStage(TransactionStage.OUTPUT_BEGIN);
                                @SuppressWarnings("checkstyle:LineLength") ConfigDiff outputConfigDiff = plugins.getOutputPlugin().resume(resume.getOutputTaskSource(), executorSchema, outputTaskCount, new OutputPlugin.Control() {

                                    public List<TaskReport> run(final TaskSource outputTask) {
                                        // TODO validate outputTask?
                                        state.setOutputTaskSource(outputTask);
                                        restoreResumedTaskReports(resume, state);
                                        state.setTransactionStage(TransactionStage.RUN);
                                        if (!state.isAllTasksCommitted()) {
                                            execute(task, executor, state);
                                        }
                                        if (!state.isAllTasksCommitted()) {
                                            throw new RuntimeException(String.format("%d input tasks and %d output tasks failed", state.countUncommittedInputTasks(), state.countUncommittedOutputTasks()));
                                        }
                                        state.setTransactionStage(TransactionStage.OUTPUT_COMMIT);
                                        return state.getAllOutputTaskReports();
                                    }
                                });
                                state.setOutputConfigDiff(outputConfigDiff);
                                state.setTransactionStage(TransactionStage.EXECUTOR_COMMIT);
                            }
                        });
                        state.setTransactionStage(TransactionStage.FILTER_COMMIT);
                    }
                });
                state.setTransactionStage(TransactionStage.INPUT_COMMIT);
                return state.getAllInputTaskReports();
            }
        });
        state.setInputConfigDiff(inputConfigDiff);
        state.setTransactionStage(TransactionStage.CLEANUP);
        cleanupCommittedTransaction(config, state);
        return state.buildExecuteResult();
    } catch (Throwable ex) {
        if (isSkippedTransaction(ex)) {
            ConfigDiff configDiff = ((SkipTransactionException) ex).getConfigDiff();
            return state.buildExecuteResultOfSkippedExecution(configDiff);
        } else if (state.isAllTasksCommitted() && state.isAllTransactionsCommitted()) {
            // ignore the exception
            return state.buildExecuteResultWithWarningException(ex);
        }
        throw state.buildPartialExecuteException(ex, Exec.session());
    }
}
Also used : InputPlugin(org.embulk.spi.InputPlugin) TaskReport(org.embulk.config.TaskReport) ExecutorPlugin(org.embulk.spi.ExecutorPlugin) Schema(org.embulk.spi.Schema) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConfigDiff(org.embulk.config.ConfigDiff) TaskSource(org.embulk.config.TaskSource)

Example 20 with Schema

use of org.embulk.spi.Schema in project embulk by embulk.

the class PreviewExecutor method doPreview.

@SuppressWarnings("checkstyle:OverloadMethodsDeclarationOrder")
private PreviewResult doPreview(final PreviewTask task, final InputPlugin input, final List<FilterPlugin> filterPlugins) {
    try {
        input.transaction(task.getInputConfig(), new InputPlugin.Control() {

            public List<TaskReport> run(final TaskSource inputTask, Schema inputSchema, final int taskCount) {
                Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {

                    public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas) {
                        Schema inputSchema = filterSchemas.get(0);
                        Schema outputSchema = filterSchemas.get(filterSchemas.size() - 1);
                        PageOutput out = new SamplingPageOutput(task.getSampleRows(), outputSchema);
                        try {
                            for (int taskIndex = 0; taskIndex < taskCount; taskIndex++) {
                                try {
                                    out = Filters.open(filterPlugins, filterTasks, filterSchemas, out);
                                    input.run(inputTask, inputSchema, taskIndex, out);
                                } catch (NoSampleException ex) {
                                    if (taskIndex == taskCount - 1) {
                                        throw ex;
                                    }
                                }
                            }
                        } finally {
                            out.close();
                        }
                    }
                });
                // program never reaches here because SamplingPageOutput.finish throws an error.
                throw new NoSampleException("No input records to preview");
            }
        });
        throw new AssertionError("PreviewExecutor executor must throw PreviewedNoticeError");
    } catch (PreviewedNoticeError previewed) {
        return previewed.getPreviewResult();
    }
}
Also used : InputPlugin(org.embulk.spi.InputPlugin) PageOutput(org.embulk.spi.PageOutput) Schema(org.embulk.spi.Schema) ArrayList(java.util.ArrayList) List(java.util.List) TaskSource(org.embulk.config.TaskSource)

Aggregations

Schema (org.embulk.spi.Schema)22 TaskSource (org.embulk.config.TaskSource)12 Column (org.embulk.spi.Column)10 ConfigSource (org.embulk.config.ConfigSource)9 ConfigException (org.embulk.config.ConfigException)8 List (java.util.List)7 FilterPlugin (org.embulk.spi.FilterPlugin)7 Test (org.junit.Test)6 ImmutableList (com.google.common.collect.ImmutableList)5 InputPlugin (org.embulk.spi.InputPlugin)5 SchemaConfigException (org.embulk.spi.SchemaConfigException)5 ArrayList (java.util.ArrayList)4 ConfigDiff (org.embulk.config.ConfigDiff)3 PageOutput (org.embulk.spi.PageOutput)3 HashMap (java.util.HashMap)2 TaskReport (org.embulk.config.TaskReport)2 ExecutorPlugin (org.embulk.spi.ExecutorPlugin)2 Page (org.embulk.spi.Page)2 LineDecoder (org.embulk.spi.util.LineDecoder)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1