use of org.embulk.spi.Schema in project embulk by embulk.
the class RenameFilterPlugin method applyFirstCharacterTypesRule.
private Schema applyFirstCharacterTypesRule(Schema inputSchema, FirstCharacterTypesRule rule) {
final Optional<String> replace = rule.getReplace();
final List<String> passTypes = rule.getPassTypes();
final String passCharacters = rule.getPassCharacters();
final Optional<String> prefix = rule.getPrefix();
if (replace.isPresent() && replace.get().length() != 1) {
throw new ConfigException("\"replace\" in \"first_character_types\" must contain just 1 character if specified");
}
if (prefix.isPresent() && prefix.get().length() != 1) {
throw new ConfigException("\"prefix\" in \"first_character_types\" must contain just 1 character if specified");
}
if (prefix.isPresent() && replace.isPresent()) {
throw new ConfigException("\"replace\" and \"prefix\" in \"first_character_types\" must not be specified together");
}
if ((!prefix.isPresent()) && (!replace.isPresent())) {
throw new ConfigException("Either of \"replace\" or \"prefix\" must be specified in \"first_character_types\"");
}
// TODO(dmikurube): Revisit this for better escaping.
if (passCharacters.contains("\\E")) {
throw new ConfigException("\"pass_characters\" in \"first_character_types\" must not contain \"\\E\"");
}
StringBuilder regexBuilder = new StringBuilder();
regexBuilder.append("^[^");
for (String target : passTypes) {
if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
} else {
throw new ConfigException("\"" + target + "\" is an unknown character type keyword");
}
}
if (!passCharacters.isEmpty()) {
regexBuilder.append("\\Q");
regexBuilder.append(passCharacters);
regexBuilder.append("\\E");
}
regexBuilder.append("].*");
Schema.Builder schemaBuidler = Schema.builder();
for (Column column : inputSchema.getColumns()) {
String name = column.getName();
if (name.matches(regexBuilder.toString())) {
if (replace.isPresent()) {
name = replace.get() + name.substring(1);
} else if (prefix.isPresent()) {
name = prefix.get() + name;
}
}
schemaBuidler.add(name, column.getType());
}
return schemaBuidler.build();
}
use of org.embulk.spi.Schema in project embulk by embulk.
the class RenameFilterPlugin method applyCharacterTypesRule.
private Schema applyCharacterTypesRule(Schema inputSchema, CharacterTypesRule rule) {
final List<String> passTypes = rule.getPassTypes();
final String passCharacters = rule.getPassCharacters();
final String replace = rule.getReplace();
if (replace.isEmpty()) {
throw new ConfigException("\"replace\" in \"character_types\" must not be explicitly empty");
}
if (replace.length() != 1) {
throw new ConfigException("\"replace\" in \"character_types\" must contain just 1 character");
}
// TODO(dmikurube): Revisit this for better escaping.
if (passCharacters.contains("\\E")) {
throw new ConfigException("\"pass_characters\" in \"character_types\" must not contain \"\\E\"");
}
StringBuilder regexBuilder = new StringBuilder();
regexBuilder.append("[^");
for (String target : passTypes) {
if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
} else {
throw new ConfigException("\"" + target + "\" is an unknown character type keyword");
}
}
if (!passCharacters.isEmpty()) {
regexBuilder.append("\\Q");
regexBuilder.append(passCharacters);
regexBuilder.append("\\E");
}
regexBuilder.append("]");
Schema.Builder schemaBuilder = Schema.builder();
for (Column column : inputSchema.getColumns()) {
schemaBuilder.add(column.getName().replaceAll(regexBuilder.toString(), replace), column.getType());
}
return schemaBuilder.build();
}
use of org.embulk.spi.Schema in project embulk by embulk.
the class RenameFilterPlugin method transaction.
@Override
public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control) {
PluginTask task = config.loadConfig(PluginTask.class);
Map<String, String> renameMap = task.getRenameMap();
List<ConfigSource> rulesList = task.getRulesList();
// Check if the given column in "columns" exists or not.
for (String columnName : renameMap.keySet()) {
// throws SchemaConfigException
inputSchema.lookupColumn(columnName);
}
// Rename by "columns": to be applied before "rules".
Schema.Builder builder = Schema.builder();
for (Column column : inputSchema.getColumns()) {
String name = column.getName();
if (renameMap.containsKey(name)) {
name = renameMap.get(name);
}
builder.add(name, column.getType());
}
Schema intermediateSchema = builder.build();
// Rename by "rules".
Schema outputSchema = intermediateSchema;
for (ConfigSource rule : rulesList) {
outputSchema = applyRule(rule, intermediateSchema);
intermediateSchema = outputSchema;
}
control.run(task.dump(), outputSchema);
}
use of org.embulk.spi.Schema in project embulk by embulk.
the class BulkLoader method doResume.
private ExecutionResult doResume(ConfigSource config, final ResumeState resume) {
final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
final ExecutorPlugin exec = newExecutorPlugin(task);
final ProcessPluginSet plugins = new ProcessPluginSet(task);
final LoaderState state = newLoaderState(Exec.getLogger(BulkLoader.class), plugins);
state.setTransactionStage(TransactionStage.INPUT_BEGIN);
try {
@SuppressWarnings("checkstyle:LineLength") ConfigDiff inputConfigDiff = plugins.getInputPlugin().resume(resume.getInputTaskSource(), resume.getInputSchema(), resume.getInputTaskReports().size(), new InputPlugin.Control() {
public List<TaskReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount) {
// TODO validate inputTask?
// TODO validate inputSchema
state.setInputTaskSource(inputTask);
state.setTransactionStage(TransactionStage.FILTER_BEGIN);
Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {
public void run(final List<TaskSource> filterTasks, final List<Schema> schemas) {
state.setSchemas(schemas);
state.setFilterTaskSources(filterTasks);
state.setTransactionStage(TransactionStage.EXECUTOR_BEGIN);
exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {
public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor) {
// TODO validate executorSchema
state.setExecutorSchema(executorSchema);
state.setTransactionStage(TransactionStage.OUTPUT_BEGIN);
@SuppressWarnings("checkstyle:LineLength") ConfigDiff outputConfigDiff = plugins.getOutputPlugin().resume(resume.getOutputTaskSource(), executorSchema, outputTaskCount, new OutputPlugin.Control() {
public List<TaskReport> run(final TaskSource outputTask) {
// TODO validate outputTask?
state.setOutputTaskSource(outputTask);
restoreResumedTaskReports(resume, state);
state.setTransactionStage(TransactionStage.RUN);
if (!state.isAllTasksCommitted()) {
execute(task, executor, state);
}
if (!state.isAllTasksCommitted()) {
throw new RuntimeException(String.format("%d input tasks and %d output tasks failed", state.countUncommittedInputTasks(), state.countUncommittedOutputTasks()));
}
state.setTransactionStage(TransactionStage.OUTPUT_COMMIT);
return state.getAllOutputTaskReports();
}
});
state.setOutputConfigDiff(outputConfigDiff);
state.setTransactionStage(TransactionStage.EXECUTOR_COMMIT);
}
});
state.setTransactionStage(TransactionStage.FILTER_COMMIT);
}
});
state.setTransactionStage(TransactionStage.INPUT_COMMIT);
return state.getAllInputTaskReports();
}
});
state.setInputConfigDiff(inputConfigDiff);
state.setTransactionStage(TransactionStage.CLEANUP);
cleanupCommittedTransaction(config, state);
return state.buildExecuteResult();
} catch (Throwable ex) {
if (isSkippedTransaction(ex)) {
ConfigDiff configDiff = ((SkipTransactionException) ex).getConfigDiff();
return state.buildExecuteResultOfSkippedExecution(configDiff);
} else if (state.isAllTasksCommitted() && state.isAllTransactionsCommitted()) {
// ignore the exception
return state.buildExecuteResultWithWarningException(ex);
}
throw state.buildPartialExecuteException(ex, Exec.session());
}
}
use of org.embulk.spi.Schema in project embulk by embulk.
the class PreviewExecutor method doPreview.
@SuppressWarnings("checkstyle:OverloadMethodsDeclarationOrder")
private PreviewResult doPreview(final PreviewTask task, final InputPlugin input, final List<FilterPlugin> filterPlugins) {
try {
input.transaction(task.getInputConfig(), new InputPlugin.Control() {
public List<TaskReport> run(final TaskSource inputTask, Schema inputSchema, final int taskCount) {
Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas) {
Schema inputSchema = filterSchemas.get(0);
Schema outputSchema = filterSchemas.get(filterSchemas.size() - 1);
PageOutput out = new SamplingPageOutput(task.getSampleRows(), outputSchema);
try {
for (int taskIndex = 0; taskIndex < taskCount; taskIndex++) {
try {
out = Filters.open(filterPlugins, filterTasks, filterSchemas, out);
input.run(inputTask, inputSchema, taskIndex, out);
} catch (NoSampleException ex) {
if (taskIndex == taskCount - 1) {
throw ex;
}
}
}
} finally {
out.close();
}
}
});
// program never reaches here because SamplingPageOutput.finish throws an error.
throw new NoSampleException("No input records to preview");
}
});
throw new AssertionError("PreviewExecutor executor must throw PreviewedNoticeError");
} catch (PreviewedNoticeError previewed) {
return previewed.getPreviewResult();
}
}
Aggregations