Search in sources :

Example 1 with BatchTask

use of org.dkpro.lab.task.BatchTask in project dkpro-lab by dkpro.

the class MultiThreadBatchTaskEngine method executeConfiguration.

@Override
protected void executeConfiguration(BatchTask aConfiguration, TaskContext aContext, Map<String, Object> aConfig, Set<String> aExecutedSubtasks) throws ExecutionException, LifeCycleException {
    if (log.isTraceEnabled()) {
        // Show all subtasks executed so far
        for (String est : aExecutedSubtasks) {
            log.trace("-- Already executed: " + est);
        }
    }
    // Set up initial scope used by sub-batch-tasks using the inherited scope. The scope is
    // extended as the subtasks of this batch are executed with the present configuration.
    // FIXME: That means that sub-batch-tasks in two different configurations cannot see
    // each other. Is that intended? Mind that the "executedSubtasks" set is intentionally
    // maintained *across* configurations, so maybe the scope should also be maintained
    // *across* configurations? - REC 2014-06-15
    Set<String> scope = new HashSet<>();
    if (aConfiguration.getScope() != null) {
        scope.addAll(aConfiguration.getScope());
    }
    // Configure subtasks
    for (Task task : aConfiguration.getTasks()) {
        // Now the setup is complete
        aContext.getLifeCycleManager().configure(aContext, task, aConfig);
    }
    Queue<Task> queue = new LinkedList<>(aConfiguration.getTasks());
    // keeps track of the execution threads;
    // TODO MW: do we really need this or can we work with the futures list only?
    Map<Task, ExecutionThread> threads = new HashMap<>();
    // keeps track of submitted Futures and their associated tasks
    Map<Future<?>, Task> futures = new HashMap<Future<?>, Task>();
    // will be instantiated with all exceptions from current loop
    ConcurrentMap<Task, Throwable> exceptionsFromLastLoop = null;
    ConcurrentMap<Task, Throwable> exceptionsFromCurrentLoop = new ConcurrentHashMap<>();
    int outerLoopCounter = 0;
    // main loop
    do {
        outerLoopCounter++;
        threads.clear();
        futures.clear();
        ExecutorService executor = Executors.newFixedThreadPool(maxThreads);
        // set the exceptions from the last loop
        exceptionsFromLastLoop = new ConcurrentHashMap<>(exceptionsFromCurrentLoop);
        // Fix MW: Clear exceptionsFromCurrentLoop; otherwise the loop with run at most twice.
        exceptionsFromCurrentLoop.clear();
        // process all tasks from the queue
        while (!queue.isEmpty()) {
            Task task = queue.poll();
            TaskContextMetadata execution = getExistingExecution(aConfiguration, aContext, task, aConfig, aExecutedSubtasks);
            // does already exist ...
            if (execution == null) {
                // ... otherwise execute it with the present configuration
                log.info("Executing task [" + task.getType() + "]");
                // set scope here so that the inherited scopes are considered
                if (task instanceof BatchTask) {
                    ((BatchTask) task).setScope(scope);
                }
                ExecutionThread thread = new ExecutionThread(aContext, task, aConfig, aExecutedSubtasks);
                threads.put(task, thread);
                futures.put(executor.submit(thread), task);
            } else {
                log.debug("Using existing execution [" + execution.getId() + "]");
                // Record new/existing execution
                aExecutedSubtasks.add(execution.getId());
                scope.add(execution.getId());
            }
        }
        // try and get results from all futures to check for failed executions
        for (Map.Entry<Future<?>, Task> entry : futures.entrySet()) {
            try {
                entry.getKey().get();
            } catch (java.util.concurrent.ExecutionException ex) {
                Task task = entry.getValue();
                // TODO MW: add a retry-counter here to prevent endless loops?
                log.info("Task exec failed for [" + task.getType() + "]");
                // record the failed task, so that it can be re-added to the queue
                exceptionsFromCurrentLoop.put(task, ex);
            } catch (InterruptedException ex) {
                // thread interrupted, exit
                throw new RuntimeException(ex);
            }
        }
        log.debug("Calling shutdown");
        executor.shutdown();
        log.debug("All threads finished");
        // collect the results
        for (Map.Entry<Task, ExecutionThread> entry : threads.entrySet()) {
            Task task = entry.getKey();
            ExecutionThread thread = entry.getValue();
            TaskContextMetadata execution = thread.getTaskContextMetadata();
            // probably failed
            if (execution == null) {
                Throwable exception = exceptionsFromCurrentLoop.get(task);
                if (!(exception instanceof UnresolvedImportException) && !(exception instanceof java.util.concurrent.ExecutionException)) {
                    throw new RuntimeException(exception);
                }
                exceptionsFromCurrentLoop.put(task, exception);
                // re-add to the queue
                queue.add(task);
            } else {
                // Record new/existing execution
                aExecutedSubtasks.add(execution.getId());
                scope.add(execution.getId());
            }
        }
    } while (// finish if the same tasks failed again
    !exceptionsFromCurrentLoop.keySet().equals(exceptionsFromLastLoop.keySet()));
    if (!exceptionsFromCurrentLoop.isEmpty()) {
        // collect all details
        StringBuilder details = new StringBuilder();
        for (Throwable throwable : exceptionsFromCurrentLoop.values()) {
            details.append("\n -");
            details.append(throwable.getMessage());
        }
        // we re-throw the first exception
        Throwable next = exceptionsFromCurrentLoop.values().iterator().next();
        if (next instanceof RuntimeException) {
            throw (RuntimeException) next;
        }
        // otherwise wrap it
        throw new RuntimeException(details.toString(), next);
    }
    log.info("MultiThreadBatchTask completed successfully. Total number of outer loop runs: " + outerLoopCounter);
}
Also used : Task(org.dkpro.lab.task.Task) BatchTask(org.dkpro.lab.task.BatchTask) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TaskContextMetadata(org.dkpro.lab.task.TaskContextMetadata) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashSet(java.util.HashSet) BatchTask(org.dkpro.lab.task.BatchTask) LinkedList(java.util.LinkedList) UnresolvedImportException(org.dkpro.lab.storage.UnresolvedImportException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) Map(java.util.Map)

Example 2 with BatchTask

use of org.dkpro.lab.task.BatchTask in project dkpro-lab by dkpro.

the class BatchTaskEngine method executeConfiguration.

/**
 * Locate the latest task execution compatible with the given task configuration.
 *
 * @param aContext
 *            the context of the current batch task.
 * @param aConfig
 *            the current parameter configuration.
 * @param aExecutedSubtasks
 *            already executed subtasks.
 */
protected void executeConfiguration(BatchTask aConfiguration, TaskContext aContext, Map<String, Object> aConfig, Set<String> aExecutedSubtasks) throws ExecutionException, LifeCycleException {
    if (log.isTraceEnabled()) {
        // Show all subtasks executed so far
        for (String est : aExecutedSubtasks) {
            log.trace("-- Already executed: " + est);
        }
    }
    // Set up initial scope used by sub-batch-tasks using the inherited scope. The scope is
    // extended as the subtasks of this batch are executed with the present configuration.
    // FIXME: That means that sub-batch-tasks in two different configurations cannot see
    // each other. Is that intended? Mind that the "executedSubtasks" set is intentionally
    // maintained *across* configurations, so maybe the scope should also be maintained
    // *across* configurations? - REC 2014-06-15
    Set<String> scope = new HashSet<String>();
    if (aConfiguration.getScope() != null) {
        scope.addAll(aConfiguration.getScope());
    }
    // Configure subtasks
    for (Task task : aConfiguration.getTasks()) {
        aContext.getLifeCycleManager().configure(aContext, task, aConfig);
    }
    Queue<Task> queue = new LinkedList<Task>(aConfiguration.getTasks());
    Set<Task> loopDetection = new HashSet<Task>();
    List<UnresolvedImportException> deferralReasons = new ArrayList<UnresolvedImportException>();
    while (!queue.isEmpty()) {
        Task task = queue.poll();
        try {
            // Check if a subtask execution compatible with the present configuration has
            // does already exist ...
            TaskContextMetadata execution = getExistingExecution(aConfiguration, aContext, task, aConfig, aExecutedSubtasks);
            if (execution == null) {
                // ... otherwise execute it with the present configuration
                log.info("Executing task [" + task.getType() + "]");
                // set scope here so that tasks added to scope in this loop are considered
                if (task instanceof BatchTask) {
                    ((BatchTask) task).setScope(scope);
                }
                execution = runNewExecution(aContext, task, aConfig, aExecutedSubtasks);
            } else {
                log.debug("Using existing execution [" + execution.getId() + "]");
            }
            // Record new/existing execution
            aExecutedSubtasks.add(execution.getId());
            scope.add(execution.getId());
            loopDetection.clear();
            deferralReasons.clear();
        } catch (UnresolvedImportException e) {
            // Add task back to queue
            log.debug("Deferring execution of task [" + task.getType() + "]: " + e.getMessage());
            queue.add(task);
            // Detect endless loop
            if (loopDetection.contains(task)) {
                StringBuilder details = new StringBuilder();
                for (UnresolvedImportException r : deferralReasons) {
                    details.append("\n -");
                    details.append(r.getMessage());
                }
                // needs to be executed first
                throw new UnresolvedImportException(e, details.toString());
            }
            // Record failed execution
            loopDetection.add(task);
            deferralReasons.add(e);
        }
    }
}
Also used : Task(org.dkpro.lab.task.Task) BatchTask(org.dkpro.lab.task.BatchTask) BatchTask(org.dkpro.lab.task.BatchTask) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) UnresolvedImportException(org.dkpro.lab.storage.UnresolvedImportException) TaskContextMetadata(org.dkpro.lab.task.TaskContextMetadata) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 3 with BatchTask

use of org.dkpro.lab.task.BatchTask in project dkpro-lab by dkpro.

the class BatchTaskEngine method run.

@Override
public String run(Task aConfiguration) throws ExecutionException, LifeCycleException {
    if (!(aConfiguration instanceof BatchTask)) {
        throw new ExecutionException("This engine can only execute [" + BatchTask.class.getName() + "]");
    }
    // Create persistence service for injection into analysis components
    TaskContext ctx = null;
    try {
        ctx = contextFactory.createContext(aConfiguration);
        // Now the setup is complete
        ctx.getLifeCycleManager().initialize(ctx, aConfiguration);
        // Start recording
        ctx.getLifeCycleManager().begin(ctx, aConfiguration);
        try {
            BatchTask cfg = (BatchTask) aConfiguration;
            ParameterSpace parameterSpace = cfg.getParameterSpace();
            // Try to calculate the parameter space size.
            int estimatedSize = 1;
            for (Dimension<?> d : parameterSpace.getDimensions()) {
                if (d instanceof FixedSizeDimension) {
                    FixedSizeDimension fsd = (FixedSizeDimension) d;
                    if (fsd.size() > 0) {
                        estimatedSize *= fsd.size();
                    }
                }
            }
            // A subtask execution may apply to multiple parameter space coordinates!
            Set<String> executedSubtasks = new LinkedHashSet<String>();
            ProgressMeter progress = new ProgressMeter(estimatedSize);
            for (Map<String, Object> config : parameterSpace) {
                if (cfg.getConfiguration() != null) {
                    for (Entry<String, Object> e : cfg.getConfiguration().entrySet()) {
                        if (!config.containsKey(e.getKey())) {
                            config.put(e.getKey(), e.getValue());
                        }
                    }
                }
                log.info("== Running new configuration [" + ctx.getId() + "] ==");
                List<String> keys = new ArrayList<String>(config.keySet());
                for (String key : keys) {
                    log.info("[" + key + "]: [" + StringUtils.abbreviateMiddle(Util.toString(config.get(key)), "…", 150) + "]");
                }
                executeConfiguration(cfg, ctx, config, executedSubtasks);
                progress.next();
                log.info("Completed configuration " + progress);
            }
            // Set the subtask property and persist again, so the property is available to
            // reports
            cfg.setAttribute(SUBTASKS_KEY, executedSubtasks.toString());
            cfg.persist(ctx);
        } catch (LifeCycleException e) {
            ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
            throw e;
        } catch (UnresolvedImportException e) {
            // HACK - pass unresolved import exceptions up to the outer batch task
            ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
            throw e;
        } catch (Throwable e) {
            ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
            throw new ExecutionException(e);
        }
        // End recording (here the reports will nbe done)
        ctx.getLifeCycleManager().complete(ctx, aConfiguration);
        return ctx.getId();
    } finally {
        if (ctx != null) {
            ctx.getLifeCycleManager().destroy(ctx, aConfiguration);
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TaskContext(org.dkpro.lab.engine.TaskContext) BatchTask(org.dkpro.lab.task.BatchTask) ArrayList(java.util.ArrayList) LifeCycleException(org.dkpro.lab.engine.LifeCycleException) FixedSizeDimension(org.dkpro.lab.task.FixedSizeDimension) UnresolvedImportException(org.dkpro.lab.storage.UnresolvedImportException) ParameterSpace(org.dkpro.lab.task.ParameterSpace) ProgressMeter(org.dkpro.lab.ProgressMeter) ExecutionException(org.dkpro.lab.engine.ExecutionException)

Aggregations

UnresolvedImportException (org.dkpro.lab.storage.UnresolvedImportException)3 BatchTask (org.dkpro.lab.task.BatchTask)3 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 LinkedHashSet (java.util.LinkedHashSet)2 LinkedList (java.util.LinkedList)2 Task (org.dkpro.lab.task.Task)2 TaskContextMetadata (org.dkpro.lab.task.TaskContextMetadata)2 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1 ProgressMeter (org.dkpro.lab.ProgressMeter)1 ExecutionException (org.dkpro.lab.engine.ExecutionException)1 LifeCycleException (org.dkpro.lab.engine.LifeCycleException)1 TaskContext (org.dkpro.lab.engine.TaskContext)1 FixedSizeDimension (org.dkpro.lab.task.FixedSizeDimension)1 ParameterSpace (org.dkpro.lab.task.ParameterSpace)1