Search in sources :

Example 36 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class SvmHmmBrownPosDemo method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    Map<String, Object> dimReaders = getDimReaders();
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(Constants.DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 20, CharacterNGram.PARAM_NGRAM_MIN_N, 2, CharacterNGram.PARAM_NGRAM_MAX_N, 3)));
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new SvmHmmAdapter(), "-c", "5.0", "-t", "1", "-m", "0" });
    config.put(DIM_DATA_WRITER, new SvmHmmAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new SvmHmmAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    return new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(Constants.DIM_LEARNING_MODE, Constants.LM_SINGLE_LABEL), Dimension.create(Constants.DIM_FEATURE_MODE, Constants.FM_SEQUENCE), dimFeatureSets, mlas);
}
Also used : HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) SvmHmmAdapter(org.dkpro.tc.ml.svmhmm.SvmHmmAdapter) HashMap(java.util.HashMap) Map(java.util.Map)

Example 37 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaAblationDemo method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the
    // train part
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TEST, readerTest);
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), NaiveBayes.class.getName() });
    config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    Dimension<TcFeatureSet> dimFeatureSets = ExperimentUtil.getAblationTestFeatures(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(EmoticonRatio.class), TcFeatureFactory.create(NumberOfHashTags.class));
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
    return pSpace;
}
Also used : EmoticonRatio(org.dkpro.tc.features.twitter.EmoticonRatio) HashMap(java.util.HashMap) TokenRatioPerDocument(org.dkpro.tc.features.maxnormalization.TokenRatioPerDocument) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) NaiveBayes(weka.classifiers.bayes.NaiveBayes) ParameterSpace(org.dkpro.lab.task.ParameterSpace) NumberOfHashTags(org.dkpro.tc.features.twitter.NumberOfHashTags) HashMap(java.util.HashMap) Map(java.util.Map)

Example 38 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class XgboostRegression method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the train part
    // The reader is also responsible for setting the labels/outcome on all
    // documents/instances it creates.
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, "src/main/resources/data/essays/train/essay_train.txt", LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, "src/main/resources/data/essays/test/essay_test.txt", LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    dimReaders.put(DIM_READER_TEST, readerTest);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(SentenceRatioPerDocument.class), TcFeatureFactory.create(TokenRatioPerDocument.class)));
    Map<String, Object> xgboostConfig = new HashMap<>();
    xgboostConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new XgboostAdapter(), "booster=gbtree", "reg:linear" });
    xgboostConfig.put(DIM_DATA_WRITER, new XgboostAdapter().getDataWriterClass().getName());
    xgboostConfig.put(DIM_FEATURE_USE_SPARSE, new XgboostAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", xgboostConfig);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_REGRESSION), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) XgboostAdapter(org.dkpro.tc.ml.xgboost.XgboostAdapter) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) HashMap(java.util.HashMap) Map(java.util.Map)

Example 39 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class KerasCrossValidation method runTest.

@Test
public void runTest() throws Exception {
    DemoUtils.setDkproHome(KerasDocumentCrossValidation.class.getSimpleName());
    boolean testConditon = true;
    String python3 = null;
    try {
        python3 = getEnvironment();
    } catch (Exception e) {
        System.err.println("Failed to locate Python with Keras - will skip this test case");
        testConditon = false;
    }
    if (testConditon) {
        ParameterSpace ps = KerasDocumentCrossValidation.getParameterSpace(python3);
        KerasDocumentCrossValidation.runCrossValidation(ps);
        // The comined file in the CV folder + 2 single files in the per-fold folder
        assertEquals(3, ContextMemoryReport.id2outcomeFiles.size());
        assertTrue(ContextMemoryReport.id2outcomeFiles.get(0).exists());
        assertTrue(ContextMemoryReport.id2outcomeFiles.get(1).exists());
        assertTrue(ContextMemoryReport.id2outcomeFiles.get(2).exists());
    }
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace) Test(org.junit.Test)

Example 40 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class KerasRegressionCrossValidation method runTest.

@Test
public void runTest() throws Exception {
    DemoUtils.setDkproHome(KerasRegressionWassa.class.getSimpleName());
    boolean testConditon = true;
    String python3 = null;
    try {
        python3 = getEnvironment();
    } catch (Exception e) {
        System.err.println("Failed to locate Python with Keras - will skip this test case");
        testConditon = false;
    }
    if (testConditon) {
        ParameterSpace ps = KerasRegression.getParameterSpace(python3);
        KerasRegression.runCrossValidation(ps);
        EvaluationData<Double> data = Tc2LtlabEvalConverter.convertRegressionModeId2Outcome(ContextMemoryReport.crossValidationCombinedIdFiles.get(0));
        SpearmanCorrelation spear = new SpearmanCorrelation(data);
        assertTrue(spear.getResult() < 0.0);
    }
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace) SpearmanCorrelation(de.unidue.ltl.evaluation.measures.correlation.SpearmanCorrelation) Test(org.junit.Test)

Aggregations

ParameterSpace (org.dkpro.lab.task.ParameterSpace)130 HashMap (java.util.HashMap)60 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)51 Map (java.util.Map)45 Test (org.junit.Test)44 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)42 File (java.io.File)26 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)21 DefaultBatchTask (org.dkpro.lab.task.impl.DefaultBatchTask)12 ArrayList (java.util.ArrayList)10 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)9 NaiveBayes (weka.classifiers.bayes.NaiveBayes)9 TaskContext (org.dkpro.lab.engine.TaskContext)7 CrfSuiteAdapter (org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 List (java.util.List)6 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)6 FoldDimensionBundle (org.dkpro.lab.task.impl.FoldDimensionBundle)5 SMO (weka.classifiers.functions.SMO)5 Task (org.dkpro.lab.task.Task)4