use of org.embulk.config.TaskSource in project embulk by embulk.
the class BulkLoader method doCleanup.
public void doCleanup(ConfigSource config, ResumeState resume) {
BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
// TODO don't create filter plugins
ProcessPluginSet plugins = new ProcessPluginSet(task);
ImmutableList.Builder<TaskReport> successfulInputTaskReports = ImmutableList.builder();
ImmutableList.Builder<TaskReport> successfulOutputTaskReports = ImmutableList.builder();
for (Optional<TaskReport> inputTaskReport : resume.getInputTaskReports()) {
if (inputTaskReport.isPresent()) {
successfulInputTaskReports.add(inputTaskReport.get());
}
}
for (Optional<TaskReport> outputTaskReport : resume.getOutputTaskReports()) {
if (outputTaskReport.isPresent()) {
successfulOutputTaskReports.add(outputTaskReport.get());
}
}
final TaskSource inputTaskSource;
if (plugins.getInputPlugin() instanceof FileInputRunner) {
inputTaskSource = FileInputRunner.getFileInputTaskSource(resume.getInputTaskSource());
} else {
inputTaskSource = resume.getInputTaskSource();
}
plugins.getInputPlugin().cleanup(inputTaskSource, resume.getInputSchema(), resume.getInputTaskReports().size(), successfulInputTaskReports.build());
final TaskSource outputTaskSource;
if (plugins.getOutputPlugin() instanceof FileOutputRunner) {
outputTaskSource = FileOutputRunner.getFileOutputTaskSource(resume.getOutputTaskSource());
} else {
outputTaskSource = resume.getOutputTaskSource();
}
plugins.getOutputPlugin().cleanup(outputTaskSource, resume.getOutputSchema(), resume.getOutputTaskReports().size(), successfulOutputTaskReports.build());
}
use of org.embulk.config.TaskSource in project embulk by embulk.
the class BulkLoader method doRun.
private ExecutionResult doRun(ConfigSource config) {
final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
final ExecutorPlugin exec = newExecutorPlugin(task);
final ProcessPluginSet plugins = new ProcessPluginSet(task);
final LoaderState state = newLoaderState(Exec.getLogger(BulkLoader.class), plugins);
state.setTransactionStage(TransactionStage.INPUT_BEGIN);
try {
ConfigDiff inputConfigDiff = plugins.getInputPlugin().transaction(task.getInputConfig(), new InputPlugin.Control() {
public List<TaskReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount) {
state.setInputTaskSource(inputTask);
state.setTransactionStage(TransactionStage.FILTER_BEGIN);
Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {
public void run(final List<TaskSource> filterTasks, final List<Schema> schemas) {
state.setSchemas(schemas);
state.setFilterTaskSources(filterTasks);
state.setTransactionStage(TransactionStage.EXECUTOR_BEGIN);
exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {
public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor) {
state.setExecutorSchema(executorSchema);
state.setTransactionStage(TransactionStage.OUTPUT_BEGIN);
@SuppressWarnings("checkstyle:LineLength") ConfigDiff outputConfigDiff = plugins.getOutputPlugin().transaction(task.getOutputConfig(), executorSchema, outputTaskCount, new OutputPlugin.Control() {
public List<TaskReport> run(final TaskSource outputTask) {
state.setOutputTaskSource(outputTask);
state.initialize(inputTaskCount, outputTaskCount);
state.setTransactionStage(TransactionStage.RUN);
if (!state.isAllTasksCommitted()) {
// inputTaskCount == 0
execute(task, executor, state);
}
if (!state.isAllTasksCommitted()) {
throw new RuntimeException(String.format("%d input tasks and %d output tasks failed", state.countUncommittedInputTasks(), state.countUncommittedOutputTasks()));
}
state.setTransactionStage(TransactionStage.OUTPUT_COMMIT);
return state.getAllOutputTaskReports();
}
});
state.setOutputConfigDiff(outputConfigDiff);
state.setTransactionStage(TransactionStage.EXECUTOR_COMMIT);
}
});
state.setTransactionStage(TransactionStage.FILTER_COMMIT);
}
});
state.setTransactionStage(TransactionStage.INPUT_COMMIT);
return state.getAllInputTaskReports();
}
});
state.setInputConfigDiff(inputConfigDiff);
state.setTransactionStage(TransactionStage.CLEANUP);
cleanupCommittedTransaction(config, state);
return state.buildExecuteResult();
} catch (Throwable ex) {
if (isSkippedTransaction(ex)) {
ConfigDiff configDiff = ((SkipTransactionException) ex).getConfigDiff();
return state.buildExecuteResultOfSkippedExecution(configDiff);
} else if (state.isAllTasksCommitted() && state.isAllTransactionsCommitted()) {
// ignore the exception
return state.buildExecuteResultWithWarningException(ex);
}
throw state.buildPartialExecuteException(ex, Exec.session());
}
}
use of org.embulk.config.TaskSource in project embulk by embulk.
the class GuessExecutor method guessParserConfig.
private ConfigDiff guessParserConfig(Buffer sample, ConfigSource config, List<PluginType> guessPlugins, final int guessParserSampleBufferBytes) {
// repeat guessing upto 10 times
ConfigDiff lastGuessed = Exec.newConfigDiff();
for (int i = 0; i < 10; i++) {
// include last-guessed config to run guess input
ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
ConfigSource guessInputConfig = originalConfig.deepCopy();
guessInputConfig.getNestedOrSetEmpty("parser").set("type", // override in.parser.type so that FileInputRunner.run uses GuessParserPlugin
"system_guess").set("guess_plugins", guessPlugins).set("orig_config", originalConfig).set("guess_parser_sample_buffer_bytes", guessParserSampleBufferBytes);
// run FileInputPlugin
final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
ConfigDiff guessed;
try {
input.transaction(guessInputConfig, new InputPlugin.Control() {
public List<TaskReport> run(TaskSource inputTaskSource, Schema schema, int taskCount) {
if (taskCount == 0) {
throw new NoSampleException("No input files to guess");
}
input.run(inputTaskSource, null, 0, new PageOutput() {
@Override
public void add(Page page) {
// TODO exception class
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration");
}
@Override
public void finish() {
}
@Override
public void close() {
}
});
throw new AssertionError("Guess executor must throw GuessedNoticeError");
}
});
throw new AssertionError("Guess executor must throw GuessedNoticeError");
} catch (GuessedNoticeError error) {
guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
}
// merge to the last-guessed config
if (lastGuessed.equals(guessed)) {
// not changed
return lastGuessed;
}
lastGuessed = guessed;
}
return lastGuessed;
}
use of org.embulk.config.TaskSource in project embulk by embulk.
the class TestRenameFilterPlugin method checkConfigExceptionIfUnknownStringTypeOfRenamingOperator.
@Test
public void checkConfigExceptionIfUnknownStringTypeOfRenamingOperator() {
// A simple string shouldn't come as a renaming rule.
ConfigSource pluginConfig = Exec.newConfigSource().set("rules", ImmutableList.of("string_rule"));
try {
filter.transaction(pluginConfig, SCHEMA, new FilterPlugin.Control() {
public void run(TaskSource task, Schema schema) {
}
});
fail();
} catch (Throwable t) {
assertTrue(t instanceof ConfigException);
}
}
use of org.embulk.config.TaskSource in project embulk by embulk.
the class TestRenameFilterPlugin method checkUniqueNumberSuffixRuleInternal.
private void checkUniqueNumberSuffixRuleInternal(final String[] originalColumnNames, final String[] expectedColumnNames, String delimiter, int digits, int max_length) {
Schema.Builder originalSchemaBuilder = Schema.builder();
for (String originalColumnName : originalColumnNames) {
originalSchemaBuilder.add(originalColumnName, STRING);
}
final Schema originalSchema = originalSchemaBuilder.build();
HashMap<String, Object> parameters = new HashMap<>();
parameters.put("rule", "unique_number_suffix");
if (!delimiter.equals(DEFAULT)) {
parameters.put("delimiter", delimiter);
}
if (digits >= 0) {
parameters.put("digits", digits);
}
if (max_length != -1) {
parameters.put("max_length", max_length);
}
ConfigSource pluginConfig = Exec.newConfigSource().set("rules", ImmutableList.of(ImmutableMap.copyOf(parameters)));
filter.transaction(pluginConfig, originalSchema, new FilterPlugin.Control() {
@Override
public void run(TaskSource task, Schema newSchema) {
ArrayList<String> resolvedColumnNamesList = new ArrayList<>(newSchema.size());
for (Column resolvedColumn : newSchema.getColumns()) {
resolvedColumnNamesList.add(resolvedColumn.getName());
}
String[] resolvedColumnNames = Iterables.toArray(resolvedColumnNamesList, String.class);
assertEquals(expectedColumnNames, resolvedColumnNames);
for (int i = 0; i < expectedColumnNames.length; ++i) {
Column original = originalSchema.getColumn(i);
Column resolved = newSchema.getColumn(i);
assertEquals(original.getType(), resolved.getType());
}
}
});
}
Aggregations