use of org.dkpro.lab.task.BatchTask in project dkpro-lab by dkpro.
the class MultiThreadBatchTaskEngine method executeConfiguration.
@Override
protected void executeConfiguration(BatchTask aConfiguration, TaskContext aContext, Map<String, Object> aConfig, Set<String> aExecutedSubtasks) throws ExecutionException, LifeCycleException {
if (log.isTraceEnabled()) {
// Show all subtasks executed so far
for (String est : aExecutedSubtasks) {
log.trace("-- Already executed: " + est);
}
}
// Set up initial scope used by sub-batch-tasks using the inherited scope. The scope is
// extended as the subtasks of this batch are executed with the present configuration.
// FIXME: That means that sub-batch-tasks in two different configurations cannot see
// each other. Is that intended? Mind that the "executedSubtasks" set is intentionally
// maintained *across* configurations, so maybe the scope should also be maintained
// *across* configurations? - REC 2014-06-15
Set<String> scope = new HashSet<>();
if (aConfiguration.getScope() != null) {
scope.addAll(aConfiguration.getScope());
}
// Configure subtasks
for (Task task : aConfiguration.getTasks()) {
// Now the setup is complete
aContext.getLifeCycleManager().configure(aContext, task, aConfig);
}
Queue<Task> queue = new LinkedList<>(aConfiguration.getTasks());
// keeps track of the execution threads;
// TODO MW: do we really need this or can we work with the futures list only?
Map<Task, ExecutionThread> threads = new HashMap<>();
// keeps track of submitted Futures and their associated tasks
Map<Future<?>, Task> futures = new HashMap<Future<?>, Task>();
// will be instantiated with all exceptions from current loop
ConcurrentMap<Task, Throwable> exceptionsFromLastLoop = null;
ConcurrentMap<Task, Throwable> exceptionsFromCurrentLoop = new ConcurrentHashMap<>();
int outerLoopCounter = 0;
// main loop
do {
outerLoopCounter++;
threads.clear();
futures.clear();
ExecutorService executor = Executors.newFixedThreadPool(maxThreads);
// set the exceptions from the last loop
exceptionsFromLastLoop = new ConcurrentHashMap<>(exceptionsFromCurrentLoop);
// Fix MW: Clear exceptionsFromCurrentLoop; otherwise the loop with run at most twice.
exceptionsFromCurrentLoop.clear();
// process all tasks from the queue
while (!queue.isEmpty()) {
Task task = queue.poll();
TaskContextMetadata execution = getExistingExecution(aConfiguration, aContext, task, aConfig, aExecutedSubtasks);
// does already exist ...
if (execution == null) {
// ... otherwise execute it with the present configuration
log.info("Executing task [" + task.getType() + "]");
// set scope here so that the inherited scopes are considered
if (task instanceof BatchTask) {
((BatchTask) task).setScope(scope);
}
ExecutionThread thread = new ExecutionThread(aContext, task, aConfig, aExecutedSubtasks);
threads.put(task, thread);
futures.put(executor.submit(thread), task);
} else {
log.debug("Using existing execution [" + execution.getId() + "]");
// Record new/existing execution
aExecutedSubtasks.add(execution.getId());
scope.add(execution.getId());
}
}
// try and get results from all futures to check for failed executions
for (Map.Entry<Future<?>, Task> entry : futures.entrySet()) {
try {
entry.getKey().get();
} catch (java.util.concurrent.ExecutionException ex) {
Task task = entry.getValue();
// TODO MW: add a retry-counter here to prevent endless loops?
log.info("Task exec failed for [" + task.getType() + "]");
// record the failed task, so that it can be re-added to the queue
exceptionsFromCurrentLoop.put(task, ex);
} catch (InterruptedException ex) {
// thread interrupted, exit
throw new RuntimeException(ex);
}
}
log.debug("Calling shutdown");
executor.shutdown();
log.debug("All threads finished");
// collect the results
for (Map.Entry<Task, ExecutionThread> entry : threads.entrySet()) {
Task task = entry.getKey();
ExecutionThread thread = entry.getValue();
TaskContextMetadata execution = thread.getTaskContextMetadata();
// probably failed
if (execution == null) {
Throwable exception = exceptionsFromCurrentLoop.get(task);
if (!(exception instanceof UnresolvedImportException) && !(exception instanceof java.util.concurrent.ExecutionException)) {
throw new RuntimeException(exception);
}
exceptionsFromCurrentLoop.put(task, exception);
// re-add to the queue
queue.add(task);
} else {
// Record new/existing execution
aExecutedSubtasks.add(execution.getId());
scope.add(execution.getId());
}
}
} while (// finish if the same tasks failed again
!exceptionsFromCurrentLoop.keySet().equals(exceptionsFromLastLoop.keySet()));
if (!exceptionsFromCurrentLoop.isEmpty()) {
// collect all details
StringBuilder details = new StringBuilder();
for (Throwable throwable : exceptionsFromCurrentLoop.values()) {
details.append("\n -");
details.append(throwable.getMessage());
}
// we re-throw the first exception
Throwable next = exceptionsFromCurrentLoop.values().iterator().next();
if (next instanceof RuntimeException) {
throw (RuntimeException) next;
}
// otherwise wrap it
throw new RuntimeException(details.toString(), next);
}
log.info("MultiThreadBatchTask completed successfully. Total number of outer loop runs: " + outerLoopCounter);
}
use of org.dkpro.lab.task.BatchTask in project dkpro-lab by dkpro.
the class BatchTaskEngine method executeConfiguration.
/**
* Locate the latest task execution compatible with the given task configuration.
*
* @param aContext
* the context of the current batch task.
* @param aConfig
* the current parameter configuration.
* @param aExecutedSubtasks
* already executed subtasks.
*/
protected void executeConfiguration(BatchTask aConfiguration, TaskContext aContext, Map<String, Object> aConfig, Set<String> aExecutedSubtasks) throws ExecutionException, LifeCycleException {
if (log.isTraceEnabled()) {
// Show all subtasks executed so far
for (String est : aExecutedSubtasks) {
log.trace("-- Already executed: " + est);
}
}
// Set up initial scope used by sub-batch-tasks using the inherited scope. The scope is
// extended as the subtasks of this batch are executed with the present configuration.
// FIXME: That means that sub-batch-tasks in two different configurations cannot see
// each other. Is that intended? Mind that the "executedSubtasks" set is intentionally
// maintained *across* configurations, so maybe the scope should also be maintained
// *across* configurations? - REC 2014-06-15
Set<String> scope = new HashSet<String>();
if (aConfiguration.getScope() != null) {
scope.addAll(aConfiguration.getScope());
}
// Configure subtasks
for (Task task : aConfiguration.getTasks()) {
aContext.getLifeCycleManager().configure(aContext, task, aConfig);
}
Queue<Task> queue = new LinkedList<Task>(aConfiguration.getTasks());
Set<Task> loopDetection = new HashSet<Task>();
List<UnresolvedImportException> deferralReasons = new ArrayList<UnresolvedImportException>();
while (!queue.isEmpty()) {
Task task = queue.poll();
try {
// Check if a subtask execution compatible with the present configuration has
// does already exist ...
TaskContextMetadata execution = getExistingExecution(aConfiguration, aContext, task, aConfig, aExecutedSubtasks);
if (execution == null) {
// ... otherwise execute it with the present configuration
log.info("Executing task [" + task.getType() + "]");
// set scope here so that tasks added to scope in this loop are considered
if (task instanceof BatchTask) {
((BatchTask) task).setScope(scope);
}
execution = runNewExecution(aContext, task, aConfig, aExecutedSubtasks);
} else {
log.debug("Using existing execution [" + execution.getId() + "]");
}
// Record new/existing execution
aExecutedSubtasks.add(execution.getId());
scope.add(execution.getId());
loopDetection.clear();
deferralReasons.clear();
} catch (UnresolvedImportException e) {
// Add task back to queue
log.debug("Deferring execution of task [" + task.getType() + "]: " + e.getMessage());
queue.add(task);
// Detect endless loop
if (loopDetection.contains(task)) {
StringBuilder details = new StringBuilder();
for (UnresolvedImportException r : deferralReasons) {
details.append("\n -");
details.append(r.getMessage());
}
// needs to be executed first
throw new UnresolvedImportException(e, details.toString());
}
// Record failed execution
loopDetection.add(task);
deferralReasons.add(e);
}
}
}
use of org.dkpro.lab.task.BatchTask in project dkpro-lab by dkpro.
the class BatchTaskEngine method run.
@Override
public String run(Task aConfiguration) throws ExecutionException, LifeCycleException {
if (!(aConfiguration instanceof BatchTask)) {
throw new ExecutionException("This engine can only execute [" + BatchTask.class.getName() + "]");
}
// Create persistence service for injection into analysis components
TaskContext ctx = null;
try {
ctx = contextFactory.createContext(aConfiguration);
// Now the setup is complete
ctx.getLifeCycleManager().initialize(ctx, aConfiguration);
// Start recording
ctx.getLifeCycleManager().begin(ctx, aConfiguration);
try {
BatchTask cfg = (BatchTask) aConfiguration;
ParameterSpace parameterSpace = cfg.getParameterSpace();
// Try to calculate the parameter space size.
int estimatedSize = 1;
for (Dimension<?> d : parameterSpace.getDimensions()) {
if (d instanceof FixedSizeDimension) {
FixedSizeDimension fsd = (FixedSizeDimension) d;
if (fsd.size() > 0) {
estimatedSize *= fsd.size();
}
}
}
// A subtask execution may apply to multiple parameter space coordinates!
Set<String> executedSubtasks = new LinkedHashSet<String>();
ProgressMeter progress = new ProgressMeter(estimatedSize);
for (Map<String, Object> config : parameterSpace) {
if (cfg.getConfiguration() != null) {
for (Entry<String, Object> e : cfg.getConfiguration().entrySet()) {
if (!config.containsKey(e.getKey())) {
config.put(e.getKey(), e.getValue());
}
}
}
log.info("== Running new configuration [" + ctx.getId() + "] ==");
List<String> keys = new ArrayList<String>(config.keySet());
for (String key : keys) {
log.info("[" + key + "]: [" + StringUtils.abbreviateMiddle(Util.toString(config.get(key)), "…", 150) + "]");
}
executeConfiguration(cfg, ctx, config, executedSubtasks);
progress.next();
log.info("Completed configuration " + progress);
}
// Set the subtask property and persist again, so the property is available to
// reports
cfg.setAttribute(SUBTASKS_KEY, executedSubtasks.toString());
cfg.persist(ctx);
} catch (LifeCycleException e) {
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw e;
} catch (UnresolvedImportException e) {
// HACK - pass unresolved import exceptions up to the outer batch task
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw e;
} catch (Throwable e) {
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw new ExecutionException(e);
}
// End recording (here the reports will nbe done)
ctx.getLifeCycleManager().complete(ctx, aConfiguration);
return ctx.getId();
} finally {
if (ctx != null) {
ctx.getLifeCycleManager().destroy(ctx, aConfiguration);
}
}
}
Aggregations