Search in sources :

Example 66 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-lab by dkpro.

the class BatchTaskEngine method run.

@Override
public String run(Task aConfiguration) throws ExecutionException, LifeCycleException {
    if (!(aConfiguration instanceof BatchTask)) {
        throw new ExecutionException("This engine can only execute [" + BatchTask.class.getName() + "]");
    }
    // Create persistence service for injection into analysis components
    TaskContext ctx = null;
    try {
        ctx = contextFactory.createContext(aConfiguration);
        // Now the setup is complete
        ctx.getLifeCycleManager().initialize(ctx, aConfiguration);
        // Start recording
        ctx.getLifeCycleManager().begin(ctx, aConfiguration);
        try {
            BatchTask cfg = (BatchTask) aConfiguration;
            ParameterSpace parameterSpace = cfg.getParameterSpace();
            // Try to calculate the parameter space size.
            int estimatedSize = 1;
            for (Dimension<?> d : parameterSpace.getDimensions()) {
                if (d instanceof FixedSizeDimension) {
                    FixedSizeDimension fsd = (FixedSizeDimension) d;
                    if (fsd.size() > 0) {
                        estimatedSize *= fsd.size();
                    }
                }
            }
            // A subtask execution may apply to multiple parameter space coordinates!
            Set<String> executedSubtasks = new LinkedHashSet<String>();
            ProgressMeter progress = new ProgressMeter(estimatedSize);
            for (Map<String, Object> config : parameterSpace) {
                if (cfg.getConfiguration() != null) {
                    for (Entry<String, Object> e : cfg.getConfiguration().entrySet()) {
                        if (!config.containsKey(e.getKey())) {
                            config.put(e.getKey(), e.getValue());
                        }
                    }
                }
                log.info("== Running new configuration [" + ctx.getId() + "] ==");
                List<String> keys = new ArrayList<String>(config.keySet());
                for (String key : keys) {
                    log.info("[" + key + "]: [" + StringUtils.abbreviateMiddle(Util.toString(config.get(key)), "…", 150) + "]");
                }
                executeConfiguration(cfg, ctx, config, executedSubtasks);
                progress.next();
                log.info("Completed configuration " + progress);
            }
            // Set the subtask property and persist again, so the property is available to
            // reports
            cfg.setAttribute(SUBTASKS_KEY, executedSubtasks.toString());
            cfg.persist(ctx);
        } catch (LifeCycleException e) {
            ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
            throw e;
        } catch (UnresolvedImportException e) {
            // HACK - pass unresolved import exceptions up to the outer batch task
            ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
            throw e;
        } catch (Throwable e) {
            ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
            throw new ExecutionException(e);
        }
        // End recording (here the reports will nbe done)
        ctx.getLifeCycleManager().complete(ctx, aConfiguration);
        return ctx.getId();
    } finally {
        if (ctx != null) {
            ctx.getLifeCycleManager().destroy(ctx, aConfiguration);
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TaskContext(org.dkpro.lab.engine.TaskContext) BatchTask(org.dkpro.lab.task.BatchTask) ArrayList(java.util.ArrayList) LifeCycleException(org.dkpro.lab.engine.LifeCycleException) FixedSizeDimension(org.dkpro.lab.task.FixedSizeDimension) UnresolvedImportException(org.dkpro.lab.storage.UnresolvedImportException) ParameterSpace(org.dkpro.lab.task.ParameterSpace) ProgressMeter(org.dkpro.lab.ProgressMeter) ExecutionException(org.dkpro.lab.engine.ExecutionException)

Example 67 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class ExperimentBuilder method build.

/**
 * Wires all provided information into a parameter space object that can be provided to an
 * experiment
 *
 * @return the parameter space filled with the provided information
 */
public ParameterSpace build() {
    List<Dimension<?>> dimensions = new ArrayList<>();
    dimensions.add(getAsDimensionMachineLearningAdapter());
    dimensions.add(getAsDimensionFeatureMode());
    dimensions.add(getAsDimensionLearningMode());
    dimensions.add(getAsDimensionFeatureSets());
    dimensions.add(getAsDimensionReaders());
    ParameterSpace ps = new ParameterSpace();
    ps.setDimensions(dimensions.toArray(new Dimension<?>[0]));
    return ps;
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace) ArrayList(java.util.ArrayList) Dimension(org.dkpro.lab.task.Dimension)

Example 68 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaDocumentPlain method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the
    // train part
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TEST, readerTest);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet("DummyFeatureSet", TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 20, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), NaiveBayes.class.getName() });
    config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) NaiveBayes(weka.classifiers.bayes.NaiveBayes) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) HashMap(java.util.HashMap) Map(java.util.Map) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter)

Example 69 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaDocumentPlain method main.

public static void main(String[] args) throws Exception {
    DemoUtils.setDkproHome("target/");
    ParameterSpace pSpace = getParameterSpace();
    WekaDocumentPlain experiment = new WekaDocumentPlain();
    experiment.runTrainTest(pSpace);
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace)

Example 70 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaManualFoldCrossValidation method getParameterSpace.

public static ParameterSpace getParameterSpace(boolean manualFolds) throws ResourceInitializationException {
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    dimReaders.put(DIM_READER_TRAIN, BrownCorpusReader.class);
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(BrownCorpusReader.class, BrownCorpusReader.PARAM_LANGUAGE, "de", BrownCorpusReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, BrownCorpusReader.PARAM_PATTERNS, INCLUDE_PREFIX + "*.xml");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_MIN_N, 2, CharacterNGram.PARAM_NGRAM_MAX_N, 3, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 750)));
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), NaiveBayes.class.getName() });
    config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_UNIT), dimFeatureSets, mlas, /*
                 * MANUAL CROSS VALIDATION FOLDS - i.e. the cas created by your reader will be used
                 * as is to make folds
                 */
    Dimension.create(DIM_CROSS_VALIDATION_MANUAL_FOLDS, manualFolds));
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) NaiveBayes(weka.classifiers.bayes.NaiveBayes) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) HashMap(java.util.HashMap) Map(java.util.Map) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter)

Aggregations

ParameterSpace (org.dkpro.lab.task.ParameterSpace)130 HashMap (java.util.HashMap)60 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)51 Map (java.util.Map)45 Test (org.junit.Test)44 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)42 File (java.io.File)26 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)21 DefaultBatchTask (org.dkpro.lab.task.impl.DefaultBatchTask)12 ArrayList (java.util.ArrayList)10 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)9 NaiveBayes (weka.classifiers.bayes.NaiveBayes)9 TaskContext (org.dkpro.lab.engine.TaskContext)7 CrfSuiteAdapter (org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 List (java.util.List)6 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)6 FoldDimensionBundle (org.dkpro.lab.task.impl.FoldDimensionBundle)5 SMO (weka.classifiers.functions.SMO)5 Task (org.dkpro.lab.task.Task)4