Search in sources :

Example 1 with TaskReport

use of org.embulk.config.TaskReport in project embulk by embulk.

the class LocalFileOutputPlugin method open.

@Override
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex) {
    PluginTask task = taskSource.loadTask(PluginTask.class);
    final String pathPrefix = task.getPathPrefix();
    final String pathSuffix = task.getFileNameExtension();
    final String sequenceFormat = task.getSequenceFormat();
    return new TransactionalFileOutput() {

        private final List<String> fileNames = new ArrayList<>();

        private int fileIndex = 0;

        private FileOutputStream output = null;

        public void nextFile() {
            closeFile();
            String path = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix;
            log.info("Writing local file '{}'", path);
            fileNames.add(path);
            try {
                output = new FileOutputStream(new File(path));
            } catch (FileNotFoundException ex) {
                // TODO exception class
                throw new RuntimeException(ex);
            }
            fileIndex++;
        }

        private void closeFile() {
            if (output != null) {
                try {
                    output.close();
                } catch (IOException ex) {
                    throw new RuntimeException(ex);
                }
            }
        }

        public void add(Buffer buffer) {
            try {
                output.write(buffer.array(), buffer.offset(), buffer.limit());
            } catch (IOException ex) {
                throw new RuntimeException(ex);
            } finally {
                buffer.release();
            }
        }

        public void finish() {
            closeFile();
        }

        public void close() {
            closeFile();
        }

        public void abort() {
        }

        public TaskReport commit() {
            TaskReport report = Exec.newTaskReport();
            // report.set("file_sizes", fileSizes);
            return report;
        }
    };
}
Also used : Buffer(org.embulk.spi.Buffer) TaskReport(org.embulk.config.TaskReport) FileOutputStream(java.io.FileOutputStream) TransactionalFileOutput(org.embulk.spi.TransactionalFileOutput) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) List(java.util.List) IOException(java.io.IOException) File(java.io.File)

Example 2 with TaskReport

use of org.embulk.config.TaskReport in project embulk by embulk.

the class BulkLoader method doCleanup.

public void doCleanup(ConfigSource config, ResumeState resume) {
    BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
    // TODO don't create filter plugins
    ProcessPluginSet plugins = new ProcessPluginSet(task);
    ImmutableList.Builder<TaskReport> successfulInputTaskReports = ImmutableList.builder();
    ImmutableList.Builder<TaskReport> successfulOutputTaskReports = ImmutableList.builder();
    for (Optional<TaskReport> inputTaskReport : resume.getInputTaskReports()) {
        if (inputTaskReport.isPresent()) {
            successfulInputTaskReports.add(inputTaskReport.get());
        }
    }
    for (Optional<TaskReport> outputTaskReport : resume.getOutputTaskReports()) {
        if (outputTaskReport.isPresent()) {
            successfulOutputTaskReports.add(outputTaskReport.get());
        }
    }
    final TaskSource inputTaskSource;
    if (plugins.getInputPlugin() instanceof FileInputRunner) {
        inputTaskSource = FileInputRunner.getFileInputTaskSource(resume.getInputTaskSource());
    } else {
        inputTaskSource = resume.getInputTaskSource();
    }
    plugins.getInputPlugin().cleanup(inputTaskSource, resume.getInputSchema(), resume.getInputTaskReports().size(), successfulInputTaskReports.build());
    final TaskSource outputTaskSource;
    if (plugins.getOutputPlugin() instanceof FileOutputRunner) {
        outputTaskSource = FileOutputRunner.getFileOutputTaskSource(resume.getOutputTaskSource());
    } else {
        outputTaskSource = resume.getOutputTaskSource();
    }
    plugins.getOutputPlugin().cleanup(outputTaskSource, resume.getOutputSchema(), resume.getOutputTaskReports().size(), successfulOutputTaskReports.build());
}
Also used : TaskReport(org.embulk.config.TaskReport) ImmutableList(com.google.common.collect.ImmutableList) FileInputRunner(org.embulk.spi.FileInputRunner) FileOutputRunner(org.embulk.spi.FileOutputRunner) TaskSource(org.embulk.config.TaskSource)

Example 3 with TaskReport

use of org.embulk.config.TaskReport in project embulk by embulk.

the class BulkLoader method doRun.

private ExecutionResult doRun(ConfigSource config) {
    final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
    final ExecutorPlugin exec = newExecutorPlugin(task);
    final ProcessPluginSet plugins = new ProcessPluginSet(task);
    final LoaderState state = newLoaderState(Exec.getLogger(BulkLoader.class), plugins);
    state.setTransactionStage(TransactionStage.INPUT_BEGIN);
    try {
        ConfigDiff inputConfigDiff = plugins.getInputPlugin().transaction(task.getInputConfig(), new InputPlugin.Control() {

            public List<TaskReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount) {
                state.setInputTaskSource(inputTask);
                state.setTransactionStage(TransactionStage.FILTER_BEGIN);
                Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {

                    public void run(final List<TaskSource> filterTasks, final List<Schema> schemas) {
                        state.setSchemas(schemas);
                        state.setFilterTaskSources(filterTasks);
                        state.setTransactionStage(TransactionStage.EXECUTOR_BEGIN);
                        exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {

                            public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor) {
                                state.setExecutorSchema(executorSchema);
                                state.setTransactionStage(TransactionStage.OUTPUT_BEGIN);
                                @SuppressWarnings("checkstyle:LineLength") ConfigDiff outputConfigDiff = plugins.getOutputPlugin().transaction(task.getOutputConfig(), executorSchema, outputTaskCount, new OutputPlugin.Control() {

                                    public List<TaskReport> run(final TaskSource outputTask) {
                                        state.setOutputTaskSource(outputTask);
                                        state.initialize(inputTaskCount, outputTaskCount);
                                        state.setTransactionStage(TransactionStage.RUN);
                                        if (!state.isAllTasksCommitted()) {
                                            // inputTaskCount == 0
                                            execute(task, executor, state);
                                        }
                                        if (!state.isAllTasksCommitted()) {
                                            throw new RuntimeException(String.format("%d input tasks and %d output tasks failed", state.countUncommittedInputTasks(), state.countUncommittedOutputTasks()));
                                        }
                                        state.setTransactionStage(TransactionStage.OUTPUT_COMMIT);
                                        return state.getAllOutputTaskReports();
                                    }
                                });
                                state.setOutputConfigDiff(outputConfigDiff);
                                state.setTransactionStage(TransactionStage.EXECUTOR_COMMIT);
                            }
                        });
                        state.setTransactionStage(TransactionStage.FILTER_COMMIT);
                    }
                });
                state.setTransactionStage(TransactionStage.INPUT_COMMIT);
                return state.getAllInputTaskReports();
            }
        });
        state.setInputConfigDiff(inputConfigDiff);
        state.setTransactionStage(TransactionStage.CLEANUP);
        cleanupCommittedTransaction(config, state);
        return state.buildExecuteResult();
    } catch (Throwable ex) {
        if (isSkippedTransaction(ex)) {
            ConfigDiff configDiff = ((SkipTransactionException) ex).getConfigDiff();
            return state.buildExecuteResultOfSkippedExecution(configDiff);
        } else if (state.isAllTasksCommitted() && state.isAllTransactionsCommitted()) {
            // ignore the exception
            return state.buildExecuteResultWithWarningException(ex);
        }
        throw state.buildPartialExecuteException(ex, Exec.session());
    }
}
Also used : InputPlugin(org.embulk.spi.InputPlugin) TaskReport(org.embulk.config.TaskReport) ExecutorPlugin(org.embulk.spi.ExecutorPlugin) Schema(org.embulk.spi.Schema) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConfigDiff(org.embulk.config.ConfigDiff) TaskSource(org.embulk.config.TaskSource)

Example 4 with TaskReport

use of org.embulk.config.TaskReport in project embulk by embulk.

the class TestFileOutputRunner method testMockFormatterIteration.

@Test
public void testMockFormatterIteration() {
    MockFileOutputPlugin fileOutputPlugin = new MockFileOutputPlugin();
    final FileOutputRunner runner = new FileOutputRunner(fileOutputPlugin);
    ImmutableList<ImmutableMap<String, Object>> columns = ImmutableList.of(ImmutableMap.<String, Object>of("name", "col1", "type", "boolean", "option", ImmutableMap.of()), ImmutableMap.<String, Object>of("name", "col2", "type", "long", "option", ImmutableMap.of()), ImmutableMap.<String, Object>of("name", "col3", "type", "double", "option", ImmutableMap.of()), ImmutableMap.<String, Object>of("name", "col4", "type", "string", "option", ImmutableMap.of()), ImmutableMap.<String, Object>of("name", "col5", "type", "timestamp", "option", ImmutableMap.of()), ImmutableMap.<String, Object>of("name", "col6", "type", "json", "option", ImmutableMap.of()));
    ConfigSource config = Exec.newConfigSource().set("type", "unused?").set("formatter", ImmutableMap.of("type", "mock", "columns", columns));
    final Schema schema = config.getNested("formatter").loadConfig(MockParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
    runner.transaction(config, schema, 1, new OutputPlugin.Control() {

        public List<TaskReport> run(final TaskSource outputTask) {
            TransactionalPageOutput tran = runner.open(outputTask, schema, 1);
            boolean committed = false;
            try {
                ImmutableMapValue jsonValue = newMap(newString("_c1"), newBoolean(true), newString("_c2"), newInteger(10), newString("_c3"), newString("embulk"), newString("_c4"), newMap(newString("k"), newString("v")));
                for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, true, 2L, 3.0D, "45", Timestamp.ofEpochMilli(678L), jsonValue, true, 2L, 3.0D, "45", Timestamp.ofEpochMilli(678L), jsonValue)) {
                    tran.add(page);
                }
                tran.commit();
                committed = true;
            } finally {
                if (!committed) {
                    tran.abort();
                }
                tran.close();
            }
            return new ArrayList<TaskReport>();
        }
    });
    assertEquals(true, fileOutputPlugin.transactionCompleted);
    assertEquals(2, MockFormatterPlugin.records.size());
    for (List<Object> record : MockFormatterPlugin.records) {
        assertEquals(Boolean.TRUE, record.get(0));
        assertEquals(2L, record.get(1));
        assertEquals(3.0D, (Double) record.get(2), 0.1D);
        assertEquals("45", record.get(3));
        assertEquals(678L, ((Timestamp) record.get(4)).toEpochMilli());
        assertEquals("{\"_c1\":true,\"_c2\":10,\"_c3\":\"embulk\",\"_c4\":{\"k\":\"v\"}}", record.get(5).toString());
    }
}
Also used : TaskReport(org.embulk.config.TaskReport) ImmutableMap(com.google.common.collect.ImmutableMap) ConfigSource(org.embulk.config.ConfigSource) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ImmutableMapValue(org.msgpack.value.ImmutableMapValue) TaskSource(org.embulk.config.TaskSource) Test(org.junit.Test)

Example 5 with TaskReport

use of org.embulk.config.TaskReport in project embulk by embulk.

the class TestFileOutputRunner method testTransactionAborted.

@Test
public void testTransactionAborted() {
    MockFileOutputPlugin fileOutputPlugin = new MockFileOutputPlugin();
    final FileOutputRunner runner = new FileOutputRunner(fileOutputPlugin);
    ImmutableList<ImmutableMap<String, Object>> columns = ImmutableList.of(ImmutableMap.<String, Object>of("name", "col1", "type", "boolean", "option", ImmutableMap.of()), ImmutableMap.<String, Object>of("name", "col2", "type", "long", "option", ImmutableMap.of()), ImmutableMap.<String, Object>of("name", "col3", "type", "double", "option", ImmutableMap.of()), ImmutableMap.<String, Object>of("name", "col4", "type", "string", "option", ImmutableMap.of()), ImmutableMap.<String, Object>of("name", "col5", "type", "timestamp", "option", ImmutableMap.of()), ImmutableMap.<String, Object>of("name", "col6", "type", "json", "option", ImmutableMap.of()));
    ConfigSource config = Exec.newConfigSource().set("type", "unused?").set("formatter", ImmutableMap.of("type", "mock", "columns", columns));
    final Schema schema = config.getNested("formatter").loadConfig(MockParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
    try {
        runner.transaction(config, schema, 1, new OutputPlugin.Control() {

            public List<TaskReport> run(final TaskSource outputTask) {
                TransactionalPageOutput tran = runner.open(outputTask, schema, 1);
                boolean committed = false;
                try {
                    tran.add(null);
                    tran.commit();
                    committed = true;
                } finally {
                    if (!committed) {
                        tran.abort();
                    }
                    tran.close();
                }
                return new ArrayList<TaskReport>();
            }
        });
    } catch (NullPointerException npe) {
    // Just passing through.
    }
    assertEquals(false, fileOutputPlugin.transactionCompleted);
}
Also used : TaskReport(org.embulk.config.TaskReport) ImmutableMap(com.google.common.collect.ImmutableMap) ConfigSource(org.embulk.config.ConfigSource) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) TaskSource(org.embulk.config.TaskSource) Test(org.junit.Test)

Aggregations

TaskReport (org.embulk.config.TaskReport)9 ImmutableList (com.google.common.collect.ImmutableList)5 List (java.util.List)5 TaskSource (org.embulk.config.TaskSource)5 ArrayList (java.util.ArrayList)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 ConfigDiff (org.embulk.config.ConfigDiff)2 ConfigSource (org.embulk.config.ConfigSource)2 ExecutorPlugin (org.embulk.spi.ExecutorPlugin)2 InputPlugin (org.embulk.spi.InputPlugin)2 Schema (org.embulk.spi.Schema)2 Test (org.junit.Test)2 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 SamplingParserPlugin (org.embulk.exec.SamplingParserPlugin)1 AbortTransactionResource (org.embulk.spi.AbortTransactionResource)1 Buffer (org.embulk.spi.Buffer)1 CloseResource (org.embulk.spi.CloseResource)1