use of org.dkpro.lab.task.Task in project dkpro-lab by dkpro.
the class MultiThreadBatchTaskEngine method executeConfiguration.
@Override
protected void executeConfiguration(BatchTask aConfiguration, TaskContext aContext, Map<String, Object> aConfig, Set<String> aExecutedSubtasks) throws ExecutionException, LifeCycleException {
if (log.isTraceEnabled()) {
// Show all subtasks executed so far
for (String est : aExecutedSubtasks) {
log.trace("-- Already executed: " + est);
}
}
// Set up initial scope used by sub-batch-tasks using the inherited scope. The scope is
// extended as the subtasks of this batch are executed with the present configuration.
// FIXME: That means that sub-batch-tasks in two different configurations cannot see
// each other. Is that intended? Mind that the "executedSubtasks" set is intentionally
// maintained *across* configurations, so maybe the scope should also be maintained
// *across* configurations? - REC 2014-06-15
Set<String> scope = new HashSet<>();
if (aConfiguration.getScope() != null) {
scope.addAll(aConfiguration.getScope());
}
// Configure subtasks
for (Task task : aConfiguration.getTasks()) {
// Now the setup is complete
aContext.getLifeCycleManager().configure(aContext, task, aConfig);
}
Queue<Task> queue = new LinkedList<>(aConfiguration.getTasks());
// keeps track of the execution threads;
// TODO MW: do we really need this or can we work with the futures list only?
Map<Task, ExecutionThread> threads = new HashMap<>();
// keeps track of submitted Futures and their associated tasks
Map<Future<?>, Task> futures = new HashMap<Future<?>, Task>();
// will be instantiated with all exceptions from current loop
ConcurrentMap<Task, Throwable> exceptionsFromLastLoop = null;
ConcurrentMap<Task, Throwable> exceptionsFromCurrentLoop = new ConcurrentHashMap<>();
int outerLoopCounter = 0;
// main loop
do {
outerLoopCounter++;
threads.clear();
futures.clear();
ExecutorService executor = Executors.newFixedThreadPool(maxThreads);
// set the exceptions from the last loop
exceptionsFromLastLoop = new ConcurrentHashMap<>(exceptionsFromCurrentLoop);
// Fix MW: Clear exceptionsFromCurrentLoop; otherwise the loop with run at most twice.
exceptionsFromCurrentLoop.clear();
// process all tasks from the queue
while (!queue.isEmpty()) {
Task task = queue.poll();
TaskContextMetadata execution = getExistingExecution(aConfiguration, aContext, task, aConfig, aExecutedSubtasks);
// does already exist ...
if (execution == null) {
// ... otherwise execute it with the present configuration
log.info("Executing task [" + task.getType() + "]");
// set scope here so that the inherited scopes are considered
if (task instanceof BatchTask) {
((BatchTask) task).setScope(scope);
}
ExecutionThread thread = new ExecutionThread(aContext, task, aConfig, aExecutedSubtasks);
threads.put(task, thread);
futures.put(executor.submit(thread), task);
} else {
log.debug("Using existing execution [" + execution.getId() + "]");
// Record new/existing execution
aExecutedSubtasks.add(execution.getId());
scope.add(execution.getId());
}
}
// try and get results from all futures to check for failed executions
for (Map.Entry<Future<?>, Task> entry : futures.entrySet()) {
try {
entry.getKey().get();
} catch (java.util.concurrent.ExecutionException ex) {
Task task = entry.getValue();
// TODO MW: add a retry-counter here to prevent endless loops?
log.info("Task exec failed for [" + task.getType() + "]");
// record the failed task, so that it can be re-added to the queue
exceptionsFromCurrentLoop.put(task, ex);
} catch (InterruptedException ex) {
// thread interrupted, exit
throw new RuntimeException(ex);
}
}
log.debug("Calling shutdown");
executor.shutdown();
log.debug("All threads finished");
// collect the results
for (Map.Entry<Task, ExecutionThread> entry : threads.entrySet()) {
Task task = entry.getKey();
ExecutionThread thread = entry.getValue();
TaskContextMetadata execution = thread.getTaskContextMetadata();
// probably failed
if (execution == null) {
Throwable exception = exceptionsFromCurrentLoop.get(task);
if (!(exception instanceof UnresolvedImportException) && !(exception instanceof java.util.concurrent.ExecutionException)) {
throw new RuntimeException(exception);
}
exceptionsFromCurrentLoop.put(task, exception);
// re-add to the queue
queue.add(task);
} else {
// Record new/existing execution
aExecutedSubtasks.add(execution.getId());
scope.add(execution.getId());
}
}
} while (// finish if the same tasks failed again
!exceptionsFromCurrentLoop.keySet().equals(exceptionsFromLastLoop.keySet()));
if (!exceptionsFromCurrentLoop.isEmpty()) {
// collect all details
StringBuilder details = new StringBuilder();
for (Throwable throwable : exceptionsFromCurrentLoop.values()) {
details.append("\n -");
details.append(throwable.getMessage());
}
// we re-throw the first exception
Throwable next = exceptionsFromCurrentLoop.values().iterator().next();
if (next instanceof RuntimeException) {
throw (RuntimeException) next;
}
// otherwise wrap it
throw new RuntimeException(details.toString(), next);
}
log.info("MultiThreadBatchTask completed successfully. Total number of outer loop runs: " + outerLoopCounter);
}
use of org.dkpro.lab.task.Task in project dkpro-lab by dkpro.
the class FoldDimensionBundleTest method testFoldInjection.
@Test
public void testFoldInjection() throws Exception {
File repo = new File("target/repository/" + getClass().getSimpleName() + "/" + name.getMethodName());
FileUtils.deleteDirectory(repo);
repo.mkdirs();
((FileSystemStorageService) Lab.getInstance().getStorageService()).setStorageRoot(repo);
Dimension<String> baseData = Dimension.create("base", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10");
FoldDimensionBundle<String> foldBundle = new FoldDimensionBundle<String>("fold", baseData, 3);
String expected = "0 - [1, 4, 7, 10] [2, 5, 8, 3, 6, 9]\n" + "1 - [2, 5, 8] [1, 4, 7, 10, 3, 6, 9]\n" + "2 - [3, 6, 9] [1, 4, 7, 10, 2, 5, 8]\n";
ParameterSpace pSpace = new ParameterSpace(foldBundle);
final StringBuilder actual = new StringBuilder();
Task testTask = new ExecutableTaskBase() {
int n = 0;
@Discriminator
Collection<String> fold_validation;
@Discriminator
Collection<String> fold_training;
@Override
public void execute(TaskContext aContext) throws Exception {
System.out.printf("%d training : %s\n", n, fold_training);
System.out.printf("%d validation: %s\n", n, fold_validation);
actual.append(String.format("%d - %s %s\n", n, fold_validation, fold_training));
n++;
}
};
DefaultBatchTask batchTask = new DefaultBatchTask();
batchTask.setParameterSpace(pSpace);
batchTask.addTask(testTask);
Lab.getInstance().run(batchTask);
assertEquals(3, pSpace.getStepCount());
assertEquals(expected, actual.toString());
}
use of org.dkpro.lab.task.Task in project dkpro-lab by dkpro.
the class MultiThreadTaskPerformanceTest method testRandomWiring.
@Test
public void testRandomWiring() throws Exception {
Random random = new Random(0);
List<List<Task>> layersOfTasks = new ArrayList<>();
int layerSize = 100;
int layersNumber = 4;
int importsInEachLayer = 100;
// create three layers with tasks
for (int j = 0; j < layersNumber; j++) {
// add a new layer if needed
if (layersOfTasks.size() <= j) {
layersOfTasks.add(new ArrayList<Task>());
}
for (int i = 0; i < layerSize; i++) {
Task t = new DummyTask();
((ExecutableTaskBase) t).setType(String.format("%d-%d", j, i));
layersOfTasks.get(j).add(t);
}
}
// wire tasks in layers
for (int l = 1; l < layersNumber; l++) {
for (int j = l - 1; j >= 0; j--) {
for (int i = 0; i < importsInEachLayer; i++) {
Task t1 = layersOfTasks.get(l).get(random.nextInt(layerSize));
Task t2 = layersOfTasks.get(j).get(random.nextInt(layerSize));
t1.addImport(t2, "DATA");
}
}
}
// shuffle all tasks
List<Task> allTasksShuffled = new ArrayList<>();
for (List<Task> tasks : layersOfTasks) {
allTasksShuffled.addAll(tasks);
}
Collections.shuffle(allTasksShuffled);
for (Task t : allTasksShuffled) {
batchTask.addTask(t);
}
Lab.getInstance().run(batchTask);
}
use of org.dkpro.lab.task.Task in project dkpro-lab by dkpro.
the class BatchTaskEngine method executeConfiguration.
/**
* Locate the latest task execution compatible with the given task configuration.
*
* @param aContext
* the context of the current batch task.
* @param aConfig
* the current parameter configuration.
* @param aExecutedSubtasks
* already executed subtasks.
*/
protected void executeConfiguration(BatchTask aConfiguration, TaskContext aContext, Map<String, Object> aConfig, Set<String> aExecutedSubtasks) throws ExecutionException, LifeCycleException {
if (log.isTraceEnabled()) {
// Show all subtasks executed so far
for (String est : aExecutedSubtasks) {
log.trace("-- Already executed: " + est);
}
}
// Set up initial scope used by sub-batch-tasks using the inherited scope. The scope is
// extended as the subtasks of this batch are executed with the present configuration.
// FIXME: That means that sub-batch-tasks in two different configurations cannot see
// each other. Is that intended? Mind that the "executedSubtasks" set is intentionally
// maintained *across* configurations, so maybe the scope should also be maintained
// *across* configurations? - REC 2014-06-15
Set<String> scope = new HashSet<String>();
if (aConfiguration.getScope() != null) {
scope.addAll(aConfiguration.getScope());
}
// Configure subtasks
for (Task task : aConfiguration.getTasks()) {
aContext.getLifeCycleManager().configure(aContext, task, aConfig);
}
Queue<Task> queue = new LinkedList<Task>(aConfiguration.getTasks());
Set<Task> loopDetection = new HashSet<Task>();
List<UnresolvedImportException> deferralReasons = new ArrayList<UnresolvedImportException>();
while (!queue.isEmpty()) {
Task task = queue.poll();
try {
// Check if a subtask execution compatible with the present configuration has
// does already exist ...
TaskContextMetadata execution = getExistingExecution(aConfiguration, aContext, task, aConfig, aExecutedSubtasks);
if (execution == null) {
// ... otherwise execute it with the present configuration
log.info("Executing task [" + task.getType() + "]");
// set scope here so that tasks added to scope in this loop are considered
if (task instanceof BatchTask) {
((BatchTask) task).setScope(scope);
}
execution = runNewExecution(aContext, task, aConfig, aExecutedSubtasks);
} else {
log.debug("Using existing execution [" + execution.getId() + "]");
}
// Record new/existing execution
aExecutedSubtasks.add(execution.getId());
scope.add(execution.getId());
loopDetection.clear();
deferralReasons.clear();
} catch (UnresolvedImportException e) {
// Add task back to queue
log.debug("Deferring execution of task [" + task.getType() + "]: " + e.getMessage());
queue.add(task);
// Detect endless loop
if (loopDetection.contains(task)) {
StringBuilder details = new StringBuilder();
for (UnresolvedImportException r : deferralReasons) {
details.append("\n -");
details.append(r.getMessage());
}
// needs to be executed first
throw new UnresolvedImportException(e, details.toString());
}
// Record failed execution
loopDetection.add(task);
deferralReasons.add(e);
}
}
}
use of org.dkpro.lab.task.Task in project dkpro-lab by dkpro.
the class PosExampleCrf method run.
@Test
public void run() throws Exception {
// Route logging through log4j
System.setProperty("org.apache.uima.logger.class", "org.apache.uima.util.impl.Log4jLogger_impl");
clean();
Task preprocessingTask = new UimaTaskBase() {
@Discriminator
String corpusPath;
{
setType("Preprocessing");
}
@Override
public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
return createReader(NegraExportReader.class, NegraExportReader.PARAM_SOURCE_LOCATION, corpusPath, NegraExportReader.PARAM_LANGUAGE, "de");
}
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File xmiDir = aContext.getFolder("XMI", AccessMode.READWRITE);
return createEngine(createEngine(SnowballStemmer.class), createEngine(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, xmiDir.getAbsolutePath(), XmiWriter.PARAM_COMPRESSION, CompressionMethod.GZIP));
}
};
Task featureExtractionTask = new UimaTaskBase() {
{
setType("FeatureExtraction");
}
@Override
public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File xmiDir = aContext.getFolder("XMI", AccessMode.READONLY);
return createReader(XmiReader.class, XmiReader.PARAM_SOURCE_LOCATION, xmiDir.getAbsolutePath(), XmiReader.PARAM_PATTERNS, new String[] { "[+]**/*.xmi.gz" });
}
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File modelDir = aContext.getFolder("MODEL", AccessMode.READWRITE);
return createEngine(createEngineDescription(ExamplePosAnnotator.class, ExamplePosAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME, DefaultMalletCRFDataWriterFactory.class.getName(), DefaultMalletCRFDataWriterFactory.PARAM_OUTPUT_DIRECTORY, modelDir.getAbsolutePath()));
}
};
Task trainingTask = new ExecutableTaskBase() {
{
setType("TrainingTask");
}
@Override
public void execute(TaskContext aContext) throws Exception {
File dir = aContext.getFolder("MODEL", AccessMode.READWRITE);
JarClassifierBuilder<?> classifierBuilder = JarClassifierBuilder.fromTrainingDirectory(dir);
classifierBuilder.trainClassifier(dir, new String[0]);
classifierBuilder.packageClassifier(dir);
}
};
Task analysisTask = new UimaTaskBase() {
{
setType("AnalysisTask");
}
@Override
public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
return createReaderDescription(TextReader.class, TextReader.PARAM_SOURCE_LOCATION, "src/test/resources/text", TextReader.PARAM_PATTERNS, new String[] { "[+]**/*.txt" }, TextReader.PARAM_LANGUAGE, "de");
}
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File model = new File(aContext.getFolder("MODEL", AccessMode.READONLY), "model.jar");
File tsv = new File(aContext.getFolder("TSV", AccessMode.READWRITE), "output.tsv");
return createEngine(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(SnowballStemmer.class), createEngineDescription(ExamplePosAnnotator.class, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, model.getAbsolutePath()), createEngineDescription(ImsCwbWriter.class, ImsCwbWriter.PARAM_TARGET_LOCATION, tsv));
}
};
ParameterSpace pSpace = new ParameterSpace(Dimension.create("corpusPath", CORPUS_PATH));
featureExtractionTask.addImport(preprocessingTask, "XMI");
trainingTask.addImport(featureExtractionTask, "MODEL");
analysisTask.addImport(trainingTask, "MODEL");
DefaultBatchTask batch = new DefaultBatchTask();
batch.setParameterSpace(pSpace);
batch.setExecutionPolicy(ExecutionPolicy.USE_EXISTING);
batch.addTask(preprocessingTask);
batch.addTask(featureExtractionTask);
batch.addTask(trainingTask);
batch.addTask(analysisTask);
Lab.getInstance().run(batch);
}
Aggregations