Search in sources :

Example 21 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaTwentyNewsgroupsInstanceWeightingDemo method main.

public static void main(String[] args) throws Exception {
    // This is used to ensure that the required DKPRO_HOME environment variable is set.
    // Ensures that people can run the experiments even if they haven't read the setup
    // instructions first :)
    // Don't use this in real experiments! Read the documentation and set DKPRO_HOME as
    // explained there.
    DemoUtils.setDkproHome(WekaTwentyNewsgroupsInstanceWeightingDemo.class.getSimpleName());
    ParameterSpace pSpace = getParameterSpace();
    WekaTwentyNewsgroupsInstanceWeightingDemo experiment = new WekaTwentyNewsgroupsInstanceWeightingDemo();
    experiment.runTrainTest(pSpace);
    experiment.runCrossValidation(pSpace);
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace)

Example 22 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class MekaComplexConfigurationMultiDemo method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(ReutersCorpusReader.class, ReutersCorpusReader.PARAM_SOURCE_LOCATION, FILEPATH_TRAIN, ReutersCorpusReader.PARAM_GOLD_LABEL_FILE, FILEPATH_GOLD_LABELS, ReutersCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, ReutersCorpusReader.PARAM_PATTERNS, ReutersCorpusReader.INCLUDE_PREFIX + "*.txt");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(ReutersCorpusReader.class, ReutersCorpusReader.PARAM_SOURCE_LOCATION, FILEPATH_TEST, ReutersCorpusReader.PARAM_GOLD_LABEL_FILE, FILEPATH_GOLD_LABELS, ReutersCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, ReutersCorpusReader.PARAM_PATTERNS, ReutersCorpusReader.INCLUDE_PREFIX + "*.txt");
    dimReaders.put(DIM_READER_TEST, readerTest);
    // Config 1
    Map<String, Object> config1 = new HashMap<>();
    config1.put(DIM_CLASSIFICATION_ARGS, new Object[] { new MekaAdapter(), BR.class.getName(), "-W", NaiveBayes.class.getName() });
    config1.put(DIM_DATA_WRITER, new MekaAdapter().getDataWriterClass().getName());
    config1.put(DIM_FEATURE_USE_SPARSE, new MekaAdapter().useSparseFeatures());
    Map<String, Object> config2 = new HashMap<>();
    config2.put(DIM_CLASSIFICATION_ARGS, new Object[] { new MekaAdapter(), CCq.class.getName(), "-P", "0.9" });
    config2.put(DIM_DATA_WRITER, new MekaAdapter().getDataWriterClass().getName());
    config2.put(DIM_FEATURE_USE_SPARSE, new MekaAdapter().useSparseFeatures());
    Map<String, Object> config3 = new HashMap<>();
    config3.put(DIM_CLASSIFICATION_ARGS, new Object[] { new MekaAdapter(), PSUpdateable.class.getName(), "-B", "900", "-S", "9" });
    config3.put(DIM_DATA_WRITER, new MekaAdapter().getDataWriterClass().getName());
    config3.put(DIM_FEATURE_USE_SPARSE, new MekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config1, config2, config3);
    // We configure 2 sets of feature extractors, one consisting of 2 extractors, and one with
    // only one
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 600, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
    // multi-label feature selection (Mulan specific options), reduces the feature set to 10
    Map<String, Object> dimFeatureSelection = new HashMap<String, Object>();
    dimFeatureSelection.put(DIM_LABEL_TRANSFORMATION_METHOD, "BinaryRelevanceAttributeEvaluator");
    dimFeatureSelection.put(DIM_ATTRIBUTE_EVALUATOR_ARGS, asList(new String[] { InfoGainAttributeEval.class.getName() }));
    dimFeatureSelection.put(DIM_NUM_LABELS_TO_KEEP, 10);
    dimFeatureSelection.put(DIM_APPLY_FEATURE_SELECTION, true);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_MULTI_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), Dimension.create(DIM_BIPARTITION_THRESHOLD, BIPARTITION_THRESHOLD), dimFeatureSets, mlas, Dimension.createBundle("featureSelection", dimFeatureSelection));
    return pSpace;
}
Also used : HashMap(java.util.HashMap) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) BR(meka.classifiers.multilabel.BR) CCq(meka.classifiers.multilabel.CCq) NaiveBayes(weka.classifiers.bayes.NaiveBayes) MekaAdapter(org.dkpro.tc.ml.weka.MekaAdapter) ParameterSpace(org.dkpro.lab.task.ParameterSpace) PSUpdateable(meka.classifiers.multilabel.incremental.PSUpdateable) HashMap(java.util.HashMap) Map(java.util.Map)

Example 23 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class MekaSaveAndApplyModelMultilabelDemo method main.

public static void main(String[] args) throws Exception {
    // This is used to ensure that the required DKPRO_HOME environment
    // variable is set.
    // Ensures that people can run the experiments even if they haven't read
    // the setup
    // instructions first :)
    // Don't use this in real experiments! Read the documentation and set
    // DKPRO_HOME as
    // explained there.
    DemoUtils.setDkproHome(MekaSaveAndApplyModelMultilabelDemo.class.getSimpleName());
    ParameterSpace pSpace = getParameterSpace();
    MekaSaveAndApplyModelMultilabelDemo experiment = new MekaSaveAndApplyModelMultilabelDemo();
    experiment.runSaveModel(pSpace);
    experiment.applyStoredModel("An example sentence. And another one.");
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace)

Example 24 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class DynetSeq2SeqTrainTest method getParameterSpace.

public static ParameterSpace getParameterSpace(String python3) throws ResourceInitializationException {
    // configure training and test data reader dimension
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription train = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, TeiReader.PARAM_PATTERNS, "*.xml");
    dimReaders.put(DIM_READER_TRAIN, train);
    // Careful - we need at least 2 sequences in the testing file otherwise
    // things will crash
    CollectionReaderDescription test = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, TeiReader.PARAM_PATTERNS, "*.xml");
    dimReaders.put(DIM_READER_TEST, test);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_FEATURE_MODE, Constants.FM_SEQUENCE), Dimension.create(DIM_LEARNING_MODE, Constants.LM_SINGLE_LABEL), Dimension.create(DeepLearningConstants.DIM_PYTHON_INSTALLATION, python3), Dimension.create(DeepLearningConstants.DIM_PRETRAINED_EMBEDDINGS, "src/test/resources/wordvector/glove.6B.50d_250.txt"), Dimension.create(DeepLearningConstants.DIM_RAM_WORKING_MEMORY, "4096"), Dimension.create(DeepLearningConstants.DIM_VECTORIZE_TO_INTEGER, false), Dimension.create(DeepLearningConstants.DIM_USER_CODE, "src/main/resources/dynetCode/dynetPoStagger.py"));
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace)

Example 25 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class KerasSeq2SeqCrossValidation method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription train = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, TeiReader.PARAM_PATTERNS, asList(INCLUDE_PREFIX + "a01.xml"));
    dimReaders.put(DIM_READER_TRAIN, train);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_FEATURE_MODE, Constants.FM_SEQUENCE), Dimension.create(DIM_LEARNING_MODE, Constants.LM_SINGLE_LABEL), Dimension.create(DeepLearningConstants.DIM_PYTHON_INSTALLATION, "/usr/local/bin/python3"), Dimension.create(DeepLearningConstants.DIM_MAXIMUM_LENGTH, 75), Dimension.create(DeepLearningConstants.DIM_VECTORIZE_TO_INTEGER, true), Dimension.create(DeepLearningConstants.DIM_USER_CODE, "src/main/resources/kerasCode/seq/posTaggingLstm.py"));
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace)

Aggregations

ParameterSpace (org.dkpro.lab.task.ParameterSpace)130 HashMap (java.util.HashMap)60 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)51 Map (java.util.Map)45 Test (org.junit.Test)44 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)42 File (java.io.File)26 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)21 DefaultBatchTask (org.dkpro.lab.task.impl.DefaultBatchTask)12 ArrayList (java.util.ArrayList)10 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)9 NaiveBayes (weka.classifiers.bayes.NaiveBayes)9 TaskContext (org.dkpro.lab.engine.TaskContext)7 CrfSuiteAdapter (org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 List (java.util.List)6 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)6 FoldDimensionBundle (org.dkpro.lab.task.impl.FoldDimensionBundle)5 SMO (weka.classifiers.functions.SMO)5 Task (org.dkpro.lab.task.Task)4