Search in sources :

Example 71 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaTwentyNewsgroupsInstanceWeightingDemo method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the train part
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(WeightedTwentyNewsgroupsCorpusReader.class, WeightedTwentyNewsgroupsCorpusReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, WeightedTwentyNewsgroupsCorpusReader.PARAM_WEIGHT_FILE_LOCATION, corpusFilePathTrain + weightsFile, WeightedTwentyNewsgroupsCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, WeightedTwentyNewsgroupsCorpusReader.PARAM_PATTERNS, Arrays.asList(WeightedTwentyNewsgroupsCorpusReader.INCLUDE_PREFIX + "*/*.txt"));
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(WeightedTwentyNewsgroupsCorpusReader.class, WeightedTwentyNewsgroupsCorpusReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, WeightedTwentyNewsgroupsCorpusReader.PARAM_WEIGHT_FILE_LOCATION, corpusFilePathTest + weightsFile, WeightedTwentyNewsgroupsCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, WeightedTwentyNewsgroupsCorpusReader.PARAM_PATTERNS, WeightedTwentyNewsgroupsCorpusReader.INCLUDE_PREFIX + "*/*.txt");
    dimReaders.put(DIM_READER_TEST, readerTest);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 2, WordNGram.PARAM_NGRAM_MAX_N, 3)));
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), SMO.class.getName() });
    config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas, Dimension.create(DIM_APPLY_INSTANCE_WEIGHTING, true));
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) SMO(weka.classifiers.functions.SMO) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) HashMap(java.util.HashMap) Map(java.util.Map) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter)

Example 72 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class MekaComplexConfigurationMultiDemo method main.

/*
     * Starts the experiment.
     */
public static void main(String[] args) throws Exception {
    // This is used to ensure that the required DKPRO_HOME environment variable is set.
    // Ensures that people can run the experiments even if they haven't read the setup
    // instructions first :)
    // Don't use this in real experiments! Read the documentation and set DKPRO_HOME as
    // explained there.
    DemoUtils.setDkproHome(MekaComplexConfigurationMultiDemo.class.getSimpleName());
    ParameterSpace pSpace = getParameterSpace();
    MekaComplexConfigurationMultiDemo experiment = new MekaComplexConfigurationMultiDemo();
    experiment.runTrainTest(pSpace);
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace)

Example 73 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class PairModeWekaDemo method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the train part
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(PairTwentyNewsgroupsReader.class, PairTwentyNewsgroupsReader.PARAM_LISTFILE, listFilePathTrain, PairTwentyNewsgroupsReader.PARAM_LANGUAGE_CODE, languageCode);
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(PairTwentyNewsgroupsReader.class, PairTwentyNewsgroupsReader.PARAM_LISTFILE, listFilePathTest, PairTwentyNewsgroupsReader.PARAM_LANGUAGE_CODE, languageCode);
    dimReaders.put(DIM_READER_TEST, readerTest);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(DiffNrOfTokensPairFeatureExtractor.class)));
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), NaiveBayes.class.getName() });
    config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_PAIR), dimFeatureSets, mlas);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) NaiveBayes(weka.classifiers.bayes.NaiveBayes) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) HashMap(java.util.HashMap) Map(java.util.Map) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter)

Example 74 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class PairModeWekaDemo method main.

public static void main(String[] args) throws Exception {
    // This is used to ensure that the required DKPRO_HOME environment variable is set.
    // Ensures that people can run the experiments even if they haven't read the setup
    // instructions first :)
    // Don't use this in real experiments! Read the documentation and set DKPRO_HOME as
    // explained there.
    DemoUtils.setDkproHome(PairModeWekaDemo.class.getSimpleName());
    ParameterSpace pSpace = getParameterSpace();
    PairModeWekaDemo experiment = new PairModeWekaDemo();
    experiment.runTrainTest(pSpace);
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace)

Example 75 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class MekaSaveAndApplyModelMultilabelDemo method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the
    // train part
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(ReutersCorpusReader.class, ReutersCorpusReader.PARAM_SOURCE_LOCATION, FILEPATH_TRAIN, ReutersCorpusReader.PARAM_GOLD_LABEL_FILE, FILEPATH_GOLD_LABELS, ReutersCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, ReutersCorpusReader.PARAM_PATTERNS, ReutersCorpusReader.INCLUDE_PREFIX + "*.txt");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(ReutersCorpusReader.class, ReutersCorpusReader.PARAM_SOURCE_LOCATION, FILEPATH_TEST, ReutersCorpusReader.PARAM_GOLD_LABEL_FILE, FILEPATH_GOLD_LABELS, ReutersCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, ReutersCorpusReader.PARAM_PATTERNS, ReutersCorpusReader.INCLUDE_PREFIX + "*.txt");
    dimReaders.put(DIM_READER_TEST, readerTest);
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new MekaAdapter(), MULAN.class.getName(), "-S", "RAkEL2", "-W", RandomForest.class.getName() });
    config.put(DIM_DATA_WRITER, new MekaAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new MekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 100, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_MULTI_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), Dimension.create(DIM_BIPARTITION_THRESHOLD, BIPARTITION_THRESHOLD), dimFeatureSets, mlas);
    return pSpace;
}
Also used : HashMap(java.util.HashMap) RandomForest(weka.classifiers.trees.RandomForest) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) MekaAdapter(org.dkpro.tc.ml.weka.MekaAdapter) MULAN(meka.classifiers.multilabel.MULAN) ParameterSpace(org.dkpro.lab.task.ParameterSpace) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ParameterSpace (org.dkpro.lab.task.ParameterSpace)130 HashMap (java.util.HashMap)60 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)51 Map (java.util.Map)45 Test (org.junit.Test)44 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)42 File (java.io.File)26 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)21 DefaultBatchTask (org.dkpro.lab.task.impl.DefaultBatchTask)12 ArrayList (java.util.ArrayList)10 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)9 NaiveBayes (weka.classifiers.bayes.NaiveBayes)9 TaskContext (org.dkpro.lab.engine.TaskContext)7 CrfSuiteAdapter (org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 List (java.util.List)6 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)6 FoldDimensionBundle (org.dkpro.lab.task.impl.FoldDimensionBundle)5 SMO (weka.classifiers.functions.SMO)5 Task (org.dkpro.lab.task.Task)4