use of org.dkpro.lab.task.impl.ExecutableTaskBase in project dkpro-lab by dkpro.
the class FoldDimensionBundleTest method testFoldInjection.
@Test
public void testFoldInjection() throws Exception {
File repo = new File("target/repository/" + getClass().getSimpleName() + "/" + name.getMethodName());
FileUtils.deleteDirectory(repo);
repo.mkdirs();
((FileSystemStorageService) Lab.getInstance().getStorageService()).setStorageRoot(repo);
Dimension<String> baseData = Dimension.create("base", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10");
FoldDimensionBundle<String> foldBundle = new FoldDimensionBundle<String>("fold", baseData, 3);
String expected = "0 - [1, 4, 7, 10] [2, 5, 8, 3, 6, 9]\n" + "1 - [2, 5, 8] [1, 4, 7, 10, 3, 6, 9]\n" + "2 - [3, 6, 9] [1, 4, 7, 10, 2, 5, 8]\n";
ParameterSpace pSpace = new ParameterSpace(foldBundle);
final StringBuilder actual = new StringBuilder();
Task testTask = new ExecutableTaskBase() {
int n = 0;
@Discriminator
Collection<String> fold_validation;
@Discriminator
Collection<String> fold_training;
@Override
public void execute(TaskContext aContext) throws Exception {
System.out.printf("%d training : %s\n", n, fold_training);
System.out.printf("%d validation: %s\n", n, fold_validation);
actual.append(String.format("%d - %s %s\n", n, fold_validation, fold_training));
n++;
}
};
DefaultBatchTask batchTask = new DefaultBatchTask();
batchTask.setParameterSpace(pSpace);
batchTask.addTask(testTask);
Lab.getInstance().run(batchTask);
assertEquals(3, pSpace.getStepCount());
assertEquals(expected, actual.toString());
}
use of org.dkpro.lab.task.impl.ExecutableTaskBase in project dkpro-lab by dkpro.
the class MultiThreadBatchTaskTest method testUnresolvable.
@Test(expected = UnresolvedImportException.class)
public void testUnresolvable() throws Exception {
Dimension<String> dim = Dimension.create("param", "1", "2", "3");
ParameterSpace pSpace = new ParameterSpace(dim);
Task task1 = new ExecutableTaskBase() {
@Discriminator
private String param;
@Override
public void execute(TaskContext aContext) throws Exception {
// Nothing to do
}
};
Task task2 = new ExecutableTaskBase() {
@Discriminator
private String param;
@Override
public void execute(TaskContext aContext) throws Exception {
// Nothing to do
}
};
task2.addImport(task1, "DUMMY");
task1.addImport(task2, "DUMMY");
DefaultBatchTask batchTask = new DefaultBatchTask();
batchTask.setParameterSpace(pSpace);
batchTask.addTask(task1);
batchTask.addTask(task2);
Lab.getInstance().run(batchTask);
}
use of org.dkpro.lab.task.impl.ExecutableTaskBase in project dkpro-lab by dkpro.
the class MultiThreadTaskPerformanceTest method testRandomWiring.
@Test
public void testRandomWiring() throws Exception {
Random random = new Random(0);
List<List<Task>> layersOfTasks = new ArrayList<>();
int layerSize = 100;
int layersNumber = 4;
int importsInEachLayer = 100;
// create three layers with tasks
for (int j = 0; j < layersNumber; j++) {
// add a new layer if needed
if (layersOfTasks.size() <= j) {
layersOfTasks.add(new ArrayList<Task>());
}
for (int i = 0; i < layerSize; i++) {
Task t = new DummyTask();
((ExecutableTaskBase) t).setType(String.format("%d-%d", j, i));
layersOfTasks.get(j).add(t);
}
}
// wire tasks in layers
for (int l = 1; l < layersNumber; l++) {
for (int j = l - 1; j >= 0; j--) {
for (int i = 0; i < importsInEachLayer; i++) {
Task t1 = layersOfTasks.get(l).get(random.nextInt(layerSize));
Task t2 = layersOfTasks.get(j).get(random.nextInt(layerSize));
t1.addImport(t2, "DATA");
}
}
}
// shuffle all tasks
List<Task> allTasksShuffled = new ArrayList<>();
for (List<Task> tasks : layersOfTasks) {
allTasksShuffled.addAll(tasks);
}
Collections.shuffle(allTasksShuffled);
for (Task t : allTasksShuffled) {
batchTask.addTask(t);
}
Lab.getInstance().run(batchTask);
}
use of org.dkpro.lab.task.impl.ExecutableTaskBase in project dkpro-lab by dkpro.
the class PosExampleCrf method run.
@Test
public void run() throws Exception {
// Route logging through log4j
System.setProperty("org.apache.uima.logger.class", "org.apache.uima.util.impl.Log4jLogger_impl");
clean();
Task preprocessingTask = new UimaTaskBase() {
@Discriminator
String corpusPath;
{
setType("Preprocessing");
}
@Override
public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
return createReader(NegraExportReader.class, NegraExportReader.PARAM_SOURCE_LOCATION, corpusPath, NegraExportReader.PARAM_LANGUAGE, "de");
}
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File xmiDir = aContext.getFolder("XMI", AccessMode.READWRITE);
return createEngine(createEngine(SnowballStemmer.class), createEngine(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, xmiDir.getAbsolutePath(), XmiWriter.PARAM_COMPRESSION, CompressionMethod.GZIP));
}
};
Task featureExtractionTask = new UimaTaskBase() {
{
setType("FeatureExtraction");
}
@Override
public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File xmiDir = aContext.getFolder("XMI", AccessMode.READONLY);
return createReader(XmiReader.class, XmiReader.PARAM_SOURCE_LOCATION, xmiDir.getAbsolutePath(), XmiReader.PARAM_PATTERNS, new String[] { "[+]**/*.xmi.gz" });
}
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File modelDir = aContext.getFolder("MODEL", AccessMode.READWRITE);
return createEngine(createEngineDescription(ExamplePosAnnotator.class, ExamplePosAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME, DefaultMalletCRFDataWriterFactory.class.getName(), DefaultMalletCRFDataWriterFactory.PARAM_OUTPUT_DIRECTORY, modelDir.getAbsolutePath()));
}
};
Task trainingTask = new ExecutableTaskBase() {
{
setType("TrainingTask");
}
@Override
public void execute(TaskContext aContext) throws Exception {
File dir = aContext.getFolder("MODEL", AccessMode.READWRITE);
JarClassifierBuilder<?> classifierBuilder = JarClassifierBuilder.fromTrainingDirectory(dir);
classifierBuilder.trainClassifier(dir, new String[0]);
classifierBuilder.packageClassifier(dir);
}
};
Task analysisTask = new UimaTaskBase() {
{
setType("AnalysisTask");
}
@Override
public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
return createReaderDescription(TextReader.class, TextReader.PARAM_SOURCE_LOCATION, "src/test/resources/text", TextReader.PARAM_PATTERNS, new String[] { "[+]**/*.txt" }, TextReader.PARAM_LANGUAGE, "de");
}
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File model = new File(aContext.getFolder("MODEL", AccessMode.READONLY), "model.jar");
File tsv = new File(aContext.getFolder("TSV", AccessMode.READWRITE), "output.tsv");
return createEngine(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(SnowballStemmer.class), createEngineDescription(ExamplePosAnnotator.class, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, model.getAbsolutePath()), createEngineDescription(ImsCwbWriter.class, ImsCwbWriter.PARAM_TARGET_LOCATION, tsv));
}
};
ParameterSpace pSpace = new ParameterSpace(Dimension.create("corpusPath", CORPUS_PATH));
featureExtractionTask.addImport(preprocessingTask, "XMI");
trainingTask.addImport(featureExtractionTask, "MODEL");
analysisTask.addImport(trainingTask, "MODEL");
DefaultBatchTask batch = new DefaultBatchTask();
batch.setParameterSpace(pSpace);
batch.setExecutionPolicy(ExecutionPolicy.USE_EXISTING);
batch.addTask(preprocessingTask);
batch.addTask(featureExtractionTask);
batch.addTask(trainingTask);
batch.addTask(analysisTask);
Lab.getInstance().run(batch);
}
use of org.dkpro.lab.task.impl.ExecutableTaskBase in project dkpro-lab by dkpro.
the class PosExampleMaxEnt method run.
@Test
public void run() throws Exception {
// Route logging through log4j
System.setProperty("org.apache.uima.logger.class", "org.apache.uima.util.impl.Log4jLogger_impl");
clean();
Task preprocessingTask = new UimaTaskBase() {
@Discriminator
String corpusPath;
{
setType("Preprocessing");
}
@Override
public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
return createReader(NegraExportReader.class, NegraExportReader.PARAM_SOURCE_LOCATION, corpusPath, NegraExportReader.PARAM_LANGUAGE, "de");
}
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File xmiDir = aContext.getFolder("XMI", AccessMode.READWRITE);
return createEngine(createEngine(SnowballStemmer.class), createEngine(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, xmiDir.getAbsolutePath(), XmiWriter.PARAM_COMPRESSION, CompressionMethod.GZIP));
}
};
Task featureExtractionTask = new UimaTaskBase() {
{
setType("FeatureExtraction");
}
@Override
public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File xmiDir = aContext.getFolder("XMI", AccessMode.READONLY);
return createReader(XmiReader.class, XmiReader.PARAM_SOURCE_LOCATION, xmiDir.getAbsolutePath(), XmiReader.PARAM_PATTERNS, new String[] { "[+]**/*.xmi.gz" });
}
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File modelDir = aContext.getFolder("MODEL", AccessMode.READWRITE);
return createEngine(createEngineDescription(ExamplePosAnnotator.class, ExamplePosAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME, ViterbiDataWriterFactory.class.getName(), ViterbiDataWriterFactory.PARAM_OUTPUT_DIRECTORY, modelDir.getAbsolutePath(), ViterbiDataWriterFactory.PARAM_DELEGATED_DATA_WRITER_FACTORY_CLASS, DefaultMaxentDataWriterFactory.class.getName()));
}
};
Task trainingTask = new ExecutableTaskBase() {
@Discriminator
private int iterations;
@Discriminator
private int cutoff;
{
setType("TrainingTask");
}
@Override
public void execute(TaskContext aContext) throws Exception {
File dir = aContext.getFolder("MODEL", AccessMode.READWRITE);
JarClassifierBuilder<?> classifierBuilder = JarClassifierBuilder.fromTrainingDirectory(dir);
classifierBuilder.trainClassifier(dir, new String[] { String.valueOf(iterations), String.valueOf(cutoff) });
classifierBuilder.packageClassifier(dir);
}
};
Task analysisTask = new UimaTaskBase() {
{
setType("AnalysisTask");
}
@Override
public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
return createReaderDescription(TextReader.class, TextReader.PARAM_SOURCE_LOCATION, "src/test/resources/text/**/*.txt", TextReader.PARAM_LANGUAGE, "de");
}
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
File model = new File(aContext.getFolder("MODEL", AccessMode.READONLY), "model.jar");
File tsv = new File(aContext.getFolder("TSV", AccessMode.READWRITE), "output.tsv");
return createEngine(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(SnowballStemmer.class), createEngineDescription(ExamplePosAnnotator.class, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, model.getAbsolutePath()), createEngineDescription(ImsCwbWriter.class, ImsCwbWriter.PARAM_TARGET_LOCATION, tsv));
}
};
ParameterSpace pSpace = new ParameterSpace(Dimension.create("corpusPath", CORPUS_PATH), Dimension.create("iterations", 20, 50, 100), Dimension.create("cutoff", 5));
featureExtractionTask.addImport(preprocessingTask, "XMI");
trainingTask.addImport(featureExtractionTask, "MODEL");
analysisTask.addImport(trainingTask, "MODEL");
DefaultBatchTask batch = new DefaultBatchTask();
batch.setParameterSpace(pSpace);
batch.setExecutionPolicy(ExecutionPolicy.USE_EXISTING);
batch.addTask(preprocessingTask);
batch.addTask(featureExtractionTask);
batch.addTask(trainingTask);
batch.addTask(analysisTask);
Lab.getInstance().run(batch);
}
Aggregations