Search in sources :

Example 1 with Task

use of org.dkpro.lab.task.Task in project dkpro-lab by dkpro.

the class MultiThreadBatchTaskEngine method executeConfiguration.

@Override
protected void executeConfiguration(BatchTask aConfiguration, TaskContext aContext, Map<String, Object> aConfig, Set<String> aExecutedSubtasks) throws ExecutionException, LifeCycleException {
    if (log.isTraceEnabled()) {
        // Show all subtasks executed so far
        for (String est : aExecutedSubtasks) {
            log.trace("-- Already executed: " + est);
        }
    }
    // Set up initial scope used by sub-batch-tasks using the inherited scope. The scope is
    // extended as the subtasks of this batch are executed with the present configuration.
    // FIXME: That means that sub-batch-tasks in two different configurations cannot see
    // each other. Is that intended? Mind that the "executedSubtasks" set is intentionally
    // maintained *across* configurations, so maybe the scope should also be maintained
    // *across* configurations? - REC 2014-06-15
    Set<String> scope = new HashSet<>();
    if (aConfiguration.getScope() != null) {
        scope.addAll(aConfiguration.getScope());
    }
    // Configure subtasks
    for (Task task : aConfiguration.getTasks()) {
        // Now the setup is complete
        aContext.getLifeCycleManager().configure(aContext, task, aConfig);
    }
    Queue<Task> queue = new LinkedList<>(aConfiguration.getTasks());
    // keeps track of the execution threads;
    // TODO MW: do we really need this or can we work with the futures list only?
    Map<Task, ExecutionThread> threads = new HashMap<>();
    // keeps track of submitted Futures and their associated tasks
    Map<Future<?>, Task> futures = new HashMap<Future<?>, Task>();
    // will be instantiated with all exceptions from current loop
    ConcurrentMap<Task, Throwable> exceptionsFromLastLoop = null;
    ConcurrentMap<Task, Throwable> exceptionsFromCurrentLoop = new ConcurrentHashMap<>();
    int outerLoopCounter = 0;
    // main loop
    do {
        outerLoopCounter++;
        threads.clear();
        futures.clear();
        ExecutorService executor = Executors.newFixedThreadPool(maxThreads);
        // set the exceptions from the last loop
        exceptionsFromLastLoop = new ConcurrentHashMap<>(exceptionsFromCurrentLoop);
        // Fix MW: Clear exceptionsFromCurrentLoop; otherwise the loop with run at most twice.
        exceptionsFromCurrentLoop.clear();
        // process all tasks from the queue
        while (!queue.isEmpty()) {
            Task task = queue.poll();
            TaskContextMetadata execution = getExistingExecution(aConfiguration, aContext, task, aConfig, aExecutedSubtasks);
            // does already exist ...
            if (execution == null) {
                // ... otherwise execute it with the present configuration
                log.info("Executing task [" + task.getType() + "]");
                // set scope here so that the inherited scopes are considered
                if (task instanceof BatchTask) {
                    ((BatchTask) task).setScope(scope);
                }
                ExecutionThread thread = new ExecutionThread(aContext, task, aConfig, aExecutedSubtasks);
                threads.put(task, thread);
                futures.put(executor.submit(thread), task);
            } else {
                log.debug("Using existing execution [" + execution.getId() + "]");
                // Record new/existing execution
                aExecutedSubtasks.add(execution.getId());
                scope.add(execution.getId());
            }
        }
        // try and get results from all futures to check for failed executions
        for (Map.Entry<Future<?>, Task> entry : futures.entrySet()) {
            try {
                entry.getKey().get();
            } catch (java.util.concurrent.ExecutionException ex) {
                Task task = entry.getValue();
                // TODO MW: add a retry-counter here to prevent endless loops?
                log.info("Task exec failed for [" + task.getType() + "]");
                // record the failed task, so that it can be re-added to the queue
                exceptionsFromCurrentLoop.put(task, ex);
            } catch (InterruptedException ex) {
                // thread interrupted, exit
                throw new RuntimeException(ex);
            }
        }
        log.debug("Calling shutdown");
        executor.shutdown();
        log.debug("All threads finished");
        // collect the results
        for (Map.Entry<Task, ExecutionThread> entry : threads.entrySet()) {
            Task task = entry.getKey();
            ExecutionThread thread = entry.getValue();
            TaskContextMetadata execution = thread.getTaskContextMetadata();
            // probably failed
            if (execution == null) {
                Throwable exception = exceptionsFromCurrentLoop.get(task);
                if (!(exception instanceof UnresolvedImportException) && !(exception instanceof java.util.concurrent.ExecutionException)) {
                    throw new RuntimeException(exception);
                }
                exceptionsFromCurrentLoop.put(task, exception);
                // re-add to the queue
                queue.add(task);
            } else {
                // Record new/existing execution
                aExecutedSubtasks.add(execution.getId());
                scope.add(execution.getId());
            }
        }
    } while (// finish if the same tasks failed again
    !exceptionsFromCurrentLoop.keySet().equals(exceptionsFromLastLoop.keySet()));
    if (!exceptionsFromCurrentLoop.isEmpty()) {
        // collect all details
        StringBuilder details = new StringBuilder();
        for (Throwable throwable : exceptionsFromCurrentLoop.values()) {
            details.append("\n -");
            details.append(throwable.getMessage());
        }
        // we re-throw the first exception
        Throwable next = exceptionsFromCurrentLoop.values().iterator().next();
        if (next instanceof RuntimeException) {
            throw (RuntimeException) next;
        }
        // otherwise wrap it
        throw new RuntimeException(details.toString(), next);
    }
    log.info("MultiThreadBatchTask completed successfully. Total number of outer loop runs: " + outerLoopCounter);
}
Also used : Task(org.dkpro.lab.task.Task) BatchTask(org.dkpro.lab.task.BatchTask) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TaskContextMetadata(org.dkpro.lab.task.TaskContextMetadata) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashSet(java.util.HashSet) BatchTask(org.dkpro.lab.task.BatchTask) LinkedList(java.util.LinkedList) UnresolvedImportException(org.dkpro.lab.storage.UnresolvedImportException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) Map(java.util.Map)

Example 2 with Task

use of org.dkpro.lab.task.Task in project dkpro-lab by dkpro.

the class FoldDimensionBundleTest method testFoldInjection.

@Test
public void testFoldInjection() throws Exception {
    File repo = new File("target/repository/" + getClass().getSimpleName() + "/" + name.getMethodName());
    FileUtils.deleteDirectory(repo);
    repo.mkdirs();
    ((FileSystemStorageService) Lab.getInstance().getStorageService()).setStorageRoot(repo);
    Dimension<String> baseData = Dimension.create("base", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10");
    FoldDimensionBundle<String> foldBundle = new FoldDimensionBundle<String>("fold", baseData, 3);
    String expected = "0 - [1, 4, 7, 10] [2, 5, 8, 3, 6, 9]\n" + "1 - [2, 5, 8] [1, 4, 7, 10, 3, 6, 9]\n" + "2 - [3, 6, 9] [1, 4, 7, 10, 2, 5, 8]\n";
    ParameterSpace pSpace = new ParameterSpace(foldBundle);
    final StringBuilder actual = new StringBuilder();
    Task testTask = new ExecutableTaskBase() {

        int n = 0;

        @Discriminator
        Collection<String> fold_validation;

        @Discriminator
        Collection<String> fold_training;

        @Override
        public void execute(TaskContext aContext) throws Exception {
            System.out.printf("%d training  : %s\n", n, fold_training);
            System.out.printf("%d validation: %s\n", n, fold_validation);
            actual.append(String.format("%d - %s %s\n", n, fold_validation, fold_training));
            n++;
        }
    };
    DefaultBatchTask batchTask = new DefaultBatchTask();
    batchTask.setParameterSpace(pSpace);
    batchTask.addTask(testTask);
    Lab.getInstance().run(batchTask);
    assertEquals(3, pSpace.getStepCount());
    assertEquals(expected, actual.toString());
}
Also used : Task(org.dkpro.lab.task.Task) DefaultBatchTask(org.dkpro.lab.task.impl.DefaultBatchTask) FoldDimensionBundle(org.dkpro.lab.task.impl.FoldDimensionBundle) TaskContext(org.dkpro.lab.engine.TaskContext) ExecutableTaskBase(org.dkpro.lab.task.impl.ExecutableTaskBase) ParameterSpace(org.dkpro.lab.task.ParameterSpace) Collection(java.util.Collection) File(java.io.File) FileSystemStorageService(org.dkpro.lab.storage.filesystem.FileSystemStorageService) DefaultBatchTask(org.dkpro.lab.task.impl.DefaultBatchTask) Test(org.junit.Test)

Example 3 with Task

use of org.dkpro.lab.task.Task in project dkpro-lab by dkpro.

the class MultiThreadTaskPerformanceTest method testRandomWiring.

@Test
public void testRandomWiring() throws Exception {
    Random random = new Random(0);
    List<List<Task>> layersOfTasks = new ArrayList<>();
    int layerSize = 100;
    int layersNumber = 4;
    int importsInEachLayer = 100;
    // create three layers with tasks
    for (int j = 0; j < layersNumber; j++) {
        // add a new layer if needed
        if (layersOfTasks.size() <= j) {
            layersOfTasks.add(new ArrayList<Task>());
        }
        for (int i = 0; i < layerSize; i++) {
            Task t = new DummyTask();
            ((ExecutableTaskBase) t).setType(String.format("%d-%d", j, i));
            layersOfTasks.get(j).add(t);
        }
    }
    // wire tasks in layers
    for (int l = 1; l < layersNumber; l++) {
        for (int j = l - 1; j >= 0; j--) {
            for (int i = 0; i < importsInEachLayer; i++) {
                Task t1 = layersOfTasks.get(l).get(random.nextInt(layerSize));
                Task t2 = layersOfTasks.get(j).get(random.nextInt(layerSize));
                t1.addImport(t2, "DATA");
            }
        }
    }
    // shuffle all tasks
    List<Task> allTasksShuffled = new ArrayList<>();
    for (List<Task> tasks : layersOfTasks) {
        allTasksShuffled.addAll(tasks);
    }
    Collections.shuffle(allTasksShuffled);
    for (Task t : allTasksShuffled) {
        batchTask.addTask(t);
    }
    Lab.getInstance().run(batchTask);
}
Also used : Task(org.dkpro.lab.task.Task) DefaultBatchTask(org.dkpro.lab.task.impl.DefaultBatchTask) ExecutableTaskBase(org.dkpro.lab.task.impl.ExecutableTaskBase) Test(org.junit.Test)

Example 4 with Task

use of org.dkpro.lab.task.Task in project dkpro-lab by dkpro.

the class BatchTaskEngine method executeConfiguration.

/**
 * Locate the latest task execution compatible with the given task configuration.
 *
 * @param aContext
 *            the context of the current batch task.
 * @param aConfig
 *            the current parameter configuration.
 * @param aExecutedSubtasks
 *            already executed subtasks.
 */
protected void executeConfiguration(BatchTask aConfiguration, TaskContext aContext, Map<String, Object> aConfig, Set<String> aExecutedSubtasks) throws ExecutionException, LifeCycleException {
    if (log.isTraceEnabled()) {
        // Show all subtasks executed so far
        for (String est : aExecutedSubtasks) {
            log.trace("-- Already executed: " + est);
        }
    }
    // Set up initial scope used by sub-batch-tasks using the inherited scope. The scope is
    // extended as the subtasks of this batch are executed with the present configuration.
    // FIXME: That means that sub-batch-tasks in two different configurations cannot see
    // each other. Is that intended? Mind that the "executedSubtasks" set is intentionally
    // maintained *across* configurations, so maybe the scope should also be maintained
    // *across* configurations? - REC 2014-06-15
    Set<String> scope = new HashSet<String>();
    if (aConfiguration.getScope() != null) {
        scope.addAll(aConfiguration.getScope());
    }
    // Configure subtasks
    for (Task task : aConfiguration.getTasks()) {
        aContext.getLifeCycleManager().configure(aContext, task, aConfig);
    }
    Queue<Task> queue = new LinkedList<Task>(aConfiguration.getTasks());
    Set<Task> loopDetection = new HashSet<Task>();
    List<UnresolvedImportException> deferralReasons = new ArrayList<UnresolvedImportException>();
    while (!queue.isEmpty()) {
        Task task = queue.poll();
        try {
            // Check if a subtask execution compatible with the present configuration has
            // does already exist ...
            TaskContextMetadata execution = getExistingExecution(aConfiguration, aContext, task, aConfig, aExecutedSubtasks);
            if (execution == null) {
                // ... otherwise execute it with the present configuration
                log.info("Executing task [" + task.getType() + "]");
                // set scope here so that tasks added to scope in this loop are considered
                if (task instanceof BatchTask) {
                    ((BatchTask) task).setScope(scope);
                }
                execution = runNewExecution(aContext, task, aConfig, aExecutedSubtasks);
            } else {
                log.debug("Using existing execution [" + execution.getId() + "]");
            }
            // Record new/existing execution
            aExecutedSubtasks.add(execution.getId());
            scope.add(execution.getId());
            loopDetection.clear();
            deferralReasons.clear();
        } catch (UnresolvedImportException e) {
            // Add task back to queue
            log.debug("Deferring execution of task [" + task.getType() + "]: " + e.getMessage());
            queue.add(task);
            // Detect endless loop
            if (loopDetection.contains(task)) {
                StringBuilder details = new StringBuilder();
                for (UnresolvedImportException r : deferralReasons) {
                    details.append("\n -");
                    details.append(r.getMessage());
                }
                // needs to be executed first
                throw new UnresolvedImportException(e, details.toString());
            }
            // Record failed execution
            loopDetection.add(task);
            deferralReasons.add(e);
        }
    }
}
Also used : Task(org.dkpro.lab.task.Task) BatchTask(org.dkpro.lab.task.BatchTask) BatchTask(org.dkpro.lab.task.BatchTask) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) UnresolvedImportException(org.dkpro.lab.storage.UnresolvedImportException) TaskContextMetadata(org.dkpro.lab.task.TaskContextMetadata) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 5 with Task

use of org.dkpro.lab.task.Task in project dkpro-lab by dkpro.

the class PosExampleCrf method run.

@Test
public void run() throws Exception {
    // Route logging through log4j
    System.setProperty("org.apache.uima.logger.class", "org.apache.uima.util.impl.Log4jLogger_impl");
    clean();
    Task preprocessingTask = new UimaTaskBase() {

        @Discriminator
        String corpusPath;

        {
            setType("Preprocessing");
        }

        @Override
        public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
            return createReader(NegraExportReader.class, NegraExportReader.PARAM_SOURCE_LOCATION, corpusPath, NegraExportReader.PARAM_LANGUAGE, "de");
        }

        @Override
        public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
            File xmiDir = aContext.getFolder("XMI", AccessMode.READWRITE);
            return createEngine(createEngine(SnowballStemmer.class), createEngine(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, xmiDir.getAbsolutePath(), XmiWriter.PARAM_COMPRESSION, CompressionMethod.GZIP));
        }
    };
    Task featureExtractionTask = new UimaTaskBase() {

        {
            setType("FeatureExtraction");
        }

        @Override
        public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
            File xmiDir = aContext.getFolder("XMI", AccessMode.READONLY);
            return createReader(XmiReader.class, XmiReader.PARAM_SOURCE_LOCATION, xmiDir.getAbsolutePath(), XmiReader.PARAM_PATTERNS, new String[] { "[+]**/*.xmi.gz" });
        }

        @Override
        public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
            File modelDir = aContext.getFolder("MODEL", AccessMode.READWRITE);
            return createEngine(createEngineDescription(ExamplePosAnnotator.class, ExamplePosAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME, DefaultMalletCRFDataWriterFactory.class.getName(), DefaultMalletCRFDataWriterFactory.PARAM_OUTPUT_DIRECTORY, modelDir.getAbsolutePath()));
        }
    };
    Task trainingTask = new ExecutableTaskBase() {

        {
            setType("TrainingTask");
        }

        @Override
        public void execute(TaskContext aContext) throws Exception {
            File dir = aContext.getFolder("MODEL", AccessMode.READWRITE);
            JarClassifierBuilder<?> classifierBuilder = JarClassifierBuilder.fromTrainingDirectory(dir);
            classifierBuilder.trainClassifier(dir, new String[0]);
            classifierBuilder.packageClassifier(dir);
        }
    };
    Task analysisTask = new UimaTaskBase() {

        {
            setType("AnalysisTask");
        }

        @Override
        public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
            return createReaderDescription(TextReader.class, TextReader.PARAM_SOURCE_LOCATION, "src/test/resources/text", TextReader.PARAM_PATTERNS, new String[] { "[+]**/*.txt" }, TextReader.PARAM_LANGUAGE, "de");
        }

        @Override
        public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
            File model = new File(aContext.getFolder("MODEL", AccessMode.READONLY), "model.jar");
            File tsv = new File(aContext.getFolder("TSV", AccessMode.READWRITE), "output.tsv");
            return createEngine(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(SnowballStemmer.class), createEngineDescription(ExamplePosAnnotator.class, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, model.getAbsolutePath()), createEngineDescription(ImsCwbWriter.class, ImsCwbWriter.PARAM_TARGET_LOCATION, tsv));
        }
    };
    ParameterSpace pSpace = new ParameterSpace(Dimension.create("corpusPath", CORPUS_PATH));
    featureExtractionTask.addImport(preprocessingTask, "XMI");
    trainingTask.addImport(featureExtractionTask, "MODEL");
    analysisTask.addImport(trainingTask, "MODEL");
    DefaultBatchTask batch = new DefaultBatchTask();
    batch.setParameterSpace(pSpace);
    batch.setExecutionPolicy(ExecutionPolicy.USE_EXISTING);
    batch.addTask(preprocessingTask);
    batch.addTask(featureExtractionTask);
    batch.addTask(trainingTask);
    batch.addTask(analysisTask);
    Lab.getInstance().run(batch);
}
Also used : Task(org.dkpro.lab.task.Task) DefaultBatchTask(org.dkpro.lab.task.impl.DefaultBatchTask) UimaTaskBase(org.dkpro.lab.uima.task.impl.UimaTaskBase) TaskContext(org.dkpro.lab.engine.TaskContext) SnowballStemmer(de.tudarmstadt.ukp.dkpro.core.snowball.SnowballStemmer) XmiWriter(de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiWriter) ExecutableTaskBase(org.dkpro.lab.task.impl.ExecutableTaskBase) ExamplePosAnnotator(org.dkpro.lab.ml.example.ExamplePosAnnotator) ParameterSpace(org.dkpro.lab.task.ParameterSpace) BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) ImsCwbWriter(de.tudarmstadt.ukp.dkpro.core.io.imscwb.ImsCwbWriter) File(java.io.File) DefaultBatchTask(org.dkpro.lab.task.impl.DefaultBatchTask) Test(org.junit.Test)

Aggregations

Task (org.dkpro.lab.task.Task)8 DefaultBatchTask (org.dkpro.lab.task.impl.DefaultBatchTask)6 ExecutableTaskBase (org.dkpro.lab.task.impl.ExecutableTaskBase)6 Test (org.junit.Test)6 TaskContext (org.dkpro.lab.engine.TaskContext)5 ParameterSpace (org.dkpro.lab.task.ParameterSpace)4 File (java.io.File)3 ImsCwbWriter (de.tudarmstadt.ukp.dkpro.core.io.imscwb.ImsCwbWriter)2 XmiWriter (de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiWriter)2 SnowballStemmer (de.tudarmstadt.ukp.dkpro.core.snowball.SnowballStemmer)2 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)2 HashSet (java.util.HashSet)2 LinkedList (java.util.LinkedList)2 ExamplePosAnnotator (org.dkpro.lab.ml.example.ExamplePosAnnotator)2 UnresolvedImportException (org.dkpro.lab.storage.UnresolvedImportException)2 BatchTask (org.dkpro.lab.task.BatchTask)2 TaskContextMetadata (org.dkpro.lab.task.TaskContextMetadata)2 UimaTaskBase (org.dkpro.lab.uima.task.impl.UimaTaskBase)2 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1