Search in sources :

Example 16 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class SVMHMMSaveAndLoadModelTest method getParameterSpace.

private ParameterSpace getParameterSpace() throws ResourceInitializationException {
    DemoUtils.setDkproHome(this.getClass().getName());
    String trainFolder = "src/main/resources/data/brown_tei/";
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the
    // train part
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(BrownCorpusReader.class, BrownCorpusReader.PARAM_LANGUAGE, "en", BrownCorpusReader.PARAM_SOURCE_LOCATION, trainFolder, BrownCorpusReader.PARAM_LANGUAGE, "en", BrownCorpusReader.PARAM_PATTERNS, "*.xml");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3), TcFeatureFactory.create(TokenRatioPerDocument.class)));
    Map<String, Object> wekaConfig = new HashMap<>();
    wekaConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new SvmHmmAdapter() });
    wekaConfig.put(DIM_DATA_WRITER, new SvmHmmAdapter().getDataWriterClass().getName());
    wekaConfig.put(DIM_FEATURE_USE_SPARSE, new SvmHmmAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", wekaConfig);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_SEQUENCE), dimFeatureSets, mlas);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) SvmHmmAdapter(org.dkpro.tc.ml.svmhmm.SvmHmmAdapter) HashMap(java.util.HashMap) Map(java.util.Map)

Example 17 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaComplexConfigurationSingleDemo method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, CORPUS_FILEPATH_TRAIN, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, COPRUS_FILEPATH_TEST, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TEST, readerTest);
    Map<String, Object> config1 = new HashMap<>();
    config1.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), SMO.class.getName(), "-C", "1.0", "-K", PolyKernel.class.getName() + " " + "-C -1 -E 2" });
    config1.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    config1.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Map<String, Object> config2 = new HashMap<>();
    config2.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), RandomForest.class.getName(), "-I", "5" });
    config2.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    config2.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Map<String, Object> config3 = new HashMap<>();
    config3.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), Bagging.class.getName(), "-I", "2", "-W", J48.class.getName(), "--", "-C", "0.5", "-M", "2" });
    config3.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    config3.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config1, config2, config3);
    // We configure 2 sets of feature extractors, one consisting of 3 extractors, and one with
    // only 1
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)), new TcFeatureSet(TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
    // single-label feature selection (Weka specific options), reduces the feature set to 10
    Map<String, Object> dimFeatureSelection = new HashMap<String, Object>();
    dimFeatureSelection.put(DIM_FEATURE_SEARCHER_ARGS, asList(new String[] { Ranker.class.getName(), "-N", "10" }));
    dimFeatureSelection.put(DIM_ATTRIBUTE_EVALUATOR_ARGS, asList(new String[] { InfoGainAttributeEval.class.getName() }));
    dimFeatureSelection.put(DIM_APPLY_FEATURE_SELECTION, true);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas, Dimension.createBundle("featureSelection", dimFeatureSelection));
    return pSpace;
}
Also used : HashMap(java.util.HashMap) RandomForest(weka.classifiers.trees.RandomForest) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter) J48(weka.classifiers.trees.J48) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) SMO(weka.classifiers.functions.SMO) ParameterSpace(org.dkpro.lab.task.ParameterSpace) PolyKernel(weka.classifiers.functions.supportVector.PolyKernel) HashMap(java.util.HashMap) Map(java.util.Map) Bagging(weka.classifiers.meta.Bagging)

Example 18 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaComplexConfigurationSingleDemo method main.

public static void main(String[] args) throws Exception {
    // This is used to ensure that the required DKPRO_HOME environment variable is set.
    // Ensures that people can run the experiments even if they haven't read the setup
    // instructions first :)
    // Don't use this in real experiments! Read the documentation and set DKPRO_HOME as
    // explained there.
    DemoUtils.setDkproHome(WekaComplexConfigurationSingleDemo.class.getSimpleName());
    ParameterSpace pSpace = getParameterSpace();
    WekaComplexConfigurationSingleDemo experiment = new WekaComplexConfigurationSingleDemo();
    experiment.runTrainTest(pSpace);
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace)

Example 19 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaUniformClassDistributionDemo method main.

public static void main(String[] args) throws Exception {
    // This is used to ensure that the required DKPRO_HOME environment variable is set.
    // Ensures that people can run the experiments even if they haven't read the setup
    // instructions first :)
    // Don't use this in real experiments! Read the documentation and set DKPRO_HOME as
    // explained there.
    DemoUtils.setDkproHome(WekaUniformClassDistributionDemo.class.getSimpleName());
    ParameterSpace pSpace = getParameterSpace();
    WekaUniformClassDistributionDemo experiment = new WekaUniformClassDistributionDemo();
    // experiment.runCrossValidation(pSpace);
    experiment.runTrainTest(pSpace);
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace)

Example 20 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaUniformClassDistributionDemo method getParameterSpace.

@SuppressWarnings("unchecked")
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the train part
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TEST, readerTest);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
    Dimension<List<String>> dimFeatureFilters = Dimension.create(DIM_FEATURE_FILTERS, Arrays.asList(new String[] { UniformClassDistributionFilter.class.getName() }));
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), NaiveBayes.class.getName() });
    config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, dimFeatureFilters, mlas);
    return pSpace;
}
Also used : HashMap(java.util.HashMap) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) NaiveBayes(weka.classifiers.bayes.NaiveBayes) ParameterSpace(org.dkpro.lab.task.ParameterSpace) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ParameterSpace (org.dkpro.lab.task.ParameterSpace)130 HashMap (java.util.HashMap)60 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)51 Map (java.util.Map)45 Test (org.junit.Test)44 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)42 File (java.io.File)26 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)21 DefaultBatchTask (org.dkpro.lab.task.impl.DefaultBatchTask)12 ArrayList (java.util.ArrayList)10 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)9 NaiveBayes (weka.classifiers.bayes.NaiveBayes)9 TaskContext (org.dkpro.lab.engine.TaskContext)7 CrfSuiteAdapter (org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 List (java.util.List)6 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)6 FoldDimensionBundle (org.dkpro.lab.task.impl.FoldDimensionBundle)5 SMO (weka.classifiers.functions.SMO)5 Task (org.dkpro.lab.task.Task)4