use of org.dkpro.lab.task.ParameterSpace in project dkpro-lab by dkpro.
the class BatchTaskEngine method run.
@Override
public String run(Task aConfiguration) throws ExecutionException, LifeCycleException {
if (!(aConfiguration instanceof BatchTask)) {
throw new ExecutionException("This engine can only execute [" + BatchTask.class.getName() + "]");
}
// Create persistence service for injection into analysis components
TaskContext ctx = null;
try {
ctx = contextFactory.createContext(aConfiguration);
// Now the setup is complete
ctx.getLifeCycleManager().initialize(ctx, aConfiguration);
// Start recording
ctx.getLifeCycleManager().begin(ctx, aConfiguration);
try {
BatchTask cfg = (BatchTask) aConfiguration;
ParameterSpace parameterSpace = cfg.getParameterSpace();
// Try to calculate the parameter space size.
int estimatedSize = 1;
for (Dimension<?> d : parameterSpace.getDimensions()) {
if (d instanceof FixedSizeDimension) {
FixedSizeDimension fsd = (FixedSizeDimension) d;
if (fsd.size() > 0) {
estimatedSize *= fsd.size();
}
}
}
// A subtask execution may apply to multiple parameter space coordinates!
Set<String> executedSubtasks = new LinkedHashSet<String>();
ProgressMeter progress = new ProgressMeter(estimatedSize);
for (Map<String, Object> config : parameterSpace) {
if (cfg.getConfiguration() != null) {
for (Entry<String, Object> e : cfg.getConfiguration().entrySet()) {
if (!config.containsKey(e.getKey())) {
config.put(e.getKey(), e.getValue());
}
}
}
log.info("== Running new configuration [" + ctx.getId() + "] ==");
List<String> keys = new ArrayList<String>(config.keySet());
for (String key : keys) {
log.info("[" + key + "]: [" + StringUtils.abbreviateMiddle(Util.toString(config.get(key)), "…", 150) + "]");
}
executeConfiguration(cfg, ctx, config, executedSubtasks);
progress.next();
log.info("Completed configuration " + progress);
}
// Set the subtask property and persist again, so the property is available to
// reports
cfg.setAttribute(SUBTASKS_KEY, executedSubtasks.toString());
cfg.persist(ctx);
} catch (LifeCycleException e) {
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw e;
} catch (UnresolvedImportException e) {
// HACK - pass unresolved import exceptions up to the outer batch task
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw e;
} catch (Throwable e) {
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw new ExecutionException(e);
}
// End recording (here the reports will nbe done)
ctx.getLifeCycleManager().complete(ctx, aConfiguration);
return ctx.getId();
} finally {
if (ctx != null) {
ctx.getLifeCycleManager().destroy(ctx, aConfiguration);
}
}
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class ExperimentBuilder method build.
/**
* Wires all provided information into a parameter space object that can be provided to an
* experiment
*
* @return the parameter space filled with the provided information
*/
public ParameterSpace build() {
List<Dimension<?>> dimensions = new ArrayList<>();
dimensions.add(getAsDimensionMachineLearningAdapter());
dimensions.add(getAsDimensionFeatureMode());
dimensions.add(getAsDimensionLearningMode());
dimensions.add(getAsDimensionFeatureSets());
dimensions.add(getAsDimensionReaders());
ParameterSpace ps = new ParameterSpace();
ps.setDimensions(dimensions.toArray(new Dimension<?>[0]));
return ps;
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class WekaDocumentPlain method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
// train/test will use both, while cross-validation will only use the
// train part
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TEST, readerTest);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet("DummyFeatureSet", TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 20, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), NaiveBayes.class.getName() });
config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
return pSpace;
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class WekaDocumentPlain method main.
public static void main(String[] args) throws Exception {
DemoUtils.setDkproHome("target/");
ParameterSpace pSpace = getParameterSpace();
WekaDocumentPlain experiment = new WekaDocumentPlain();
experiment.runTrainTest(pSpace);
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class WekaManualFoldCrossValidation method getParameterSpace.
public static ParameterSpace getParameterSpace(boolean manualFolds) throws ResourceInitializationException {
Map<String, Object> dimReaders = new HashMap<String, Object>();
dimReaders.put(DIM_READER_TRAIN, BrownCorpusReader.class);
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(BrownCorpusReader.class, BrownCorpusReader.PARAM_LANGUAGE, "de", BrownCorpusReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, BrownCorpusReader.PARAM_PATTERNS, INCLUDE_PREFIX + "*.xml");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_MIN_N, 2, CharacterNGram.PARAM_NGRAM_MAX_N, 3, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 750)));
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), NaiveBayes.class.getName() });
config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_UNIT), dimFeatureSets, mlas, /*
* MANUAL CROSS VALIDATION FOLDS - i.e. the cas created by your reader will be used
* as is to make folds
*/
Dimension.create(DIM_CROSS_VALIDATION_MANUAL_FOLDS, manualFolds));
return pSpace;
}
Aggregations