Search in sources :

Example 1 with LibsvmAdapter

use of org.dkpro.tc.ml.libsvm.LibsvmAdapter in project dkpro-tc by dkpro.

the class LibsvmDocumentPlain method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TEST, readerTest);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet("DummyFeatureSet", TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LibsvmAdapter(), "-s", "1", "-c", "1000", "-t", "3" });
    config.put(DIM_DATA_WRITER, new LibsvmAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new LibsvmAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) LibsvmAdapter(org.dkpro.tc.ml.libsvm.LibsvmAdapter) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with LibsvmAdapter

use of org.dkpro.tc.ml.libsvm.LibsvmAdapter in project dkpro-tc by dkpro.

the class LibsvmBrownUnitPosDemo method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, TeiReader.PARAM_PATTERNS, new String[] { INCLUDE_PREFIX + "*.xml", INCLUDE_PREFIX + "*.xml.gz" });
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, TeiReader.PARAM_PATTERNS, new String[] { INCLUDE_PREFIX + "*.xml", INCLUDE_PREFIX + "*.xml.gz" });
    dimReaders.put(DIM_READER_TEST, readerTest);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(Constants.DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_LOWER_CASE, false, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 50)));
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LibsvmAdapter(), "-c", "10" });
    config.put(DIM_DATA_WRITER, new LibsvmAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new LibsvmAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_UNIT), dimFeatureSets, mlas);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) LibsvmAdapter(org.dkpro.tc.ml.libsvm.LibsvmAdapter) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with LibsvmAdapter

use of org.dkpro.tc.ml.libsvm.LibsvmAdapter in project dkpro-tc by dkpro.

the class MultiSvmUsingWekaLibsvmLiblinear method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the
    // train part
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    // 
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TEST, readerTest);
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), SMO.class.getName(), "-C", "1.0", "-K", PolyKernel.class.getName() + " " + "-C -1 -E 2" });
    config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Map<String, Object> config2 = new HashMap<>();
    config2.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LiblinearAdapter(), "-s", "4", "-c", "100" });
    config2.put(DIM_DATA_WRITER, new LiblinearAdapter().getDataWriterClass().getName());
    config2.put(DIM_FEATURE_USE_SPARSE, new LiblinearAdapter().useSparseFeatures());
    Map<String, Object> config3 = new HashMap<>();
    config3.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LibsvmAdapter(), "-s", "1", "-c", "1000", "-t", "3" });
    config3.put(DIM_DATA_WRITER, new LibsvmAdapter().getDataWriterClass().getName());
    config3.put(DIM_FEATURE_USE_SPARSE, new LibsvmAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config, config2, config3);
    Dimension<String> dimLearningMode = Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL);
    Dimension<String> dimFeatureMode = Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT);
    Dimension<TcFeatureSet> dimFeatureSet = Dimension.create(DIM_FEATURE_SET, getFeatureSet());
    ParameterSpace ps = new ParameterSpace(dimLearningMode, dimFeatureMode, dimFeatureMode, dimFeatureSet, mlas, Dimension.createBundle(DIM_READERS, dimReaders));
    return ps;
}
Also used : HashMap(java.util.HashMap) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) LiblinearAdapter(org.dkpro.tc.ml.liblinear.LiblinearAdapter) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) SMO(weka.classifiers.functions.SMO) ParameterSpace(org.dkpro.lab.task.ParameterSpace) LibsvmAdapter(org.dkpro.tc.ml.libsvm.LibsvmAdapter) PolyKernel(weka.classifiers.functions.supportVector.PolyKernel) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with LibsvmAdapter

use of org.dkpro.tc.ml.libsvm.LibsvmAdapter in project dkpro-tc by dkpro.

the class LibsvmSaveAndLoadModelDocumentSingleLabelTest method documentGetParameterSpaceSingleLabel.

private ParameterSpace documentGetParameterSpaceSingleLabel(boolean useClassificationArguments) throws ResourceInitializationException {
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, documentTrainFolder, FolderwiseDataReader.PARAM_LANGUAGE, "en", FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LibsvmAdapter(), "-c", "100" });
    config.put(DIM_DATA_WRITER, new LibsvmAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new LibsvmAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
    ParameterSpace pSpace;
    if (useClassificationArguments) {
        pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), mlas, dimFeatureSets);
    } else {
        config = new HashMap<>();
        config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LibsvmAdapter() });
        config.put(DIM_DATA_WRITER, new LibsvmAdapter().getDataWriterClass().getName());
        config.put(DIM_FEATURE_USE_SPARSE, new LibsvmAdapter().useSparseFeatures());
        mlas = Dimension.createBundle("config", config);
        pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
    }
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) LibsvmAdapter(org.dkpro.tc.ml.libsvm.LibsvmAdapter) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with LibsvmAdapter

use of org.dkpro.tc.ml.libsvm.LibsvmAdapter in project dkpro-tc by dkpro.

the class MultiRegressionWekaLibsvmLiblinear method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the train part
    // The reader is also responsible for setting the labels/outcome on all
    // documents/instances it creates.
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, "src/main/resources/data/essays/train/essay_train.txt", LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, "src/main/resources/data/essays/test/essay_test.txt", LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    dimReaders.put(DIM_READER_TEST, readerTest);
    Map<String, Object> xgboostConfig = new HashMap<>();
    xgboostConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new XgboostAdapter(), "booster=gbtree", "reg:linear" });
    xgboostConfig.put(DIM_DATA_WRITER, new XgboostAdapter().getDataWriterClass().getName());
    xgboostConfig.put(DIM_FEATURE_USE_SPARSE, new XgboostAdapter().useSparseFeatures());
    Map<String, Object> liblinearConfig = new HashMap<>();
    liblinearConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LiblinearAdapter(), "-s", "6" });
    liblinearConfig.put(DIM_DATA_WRITER, new LiblinearAdapter().getDataWriterClass().getName());
    liblinearConfig.put(DIM_FEATURE_USE_SPARSE, new LiblinearAdapter().useSparseFeatures());
    Map<String, Object> libsvmConfig = new HashMap<>();
    libsvmConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LibsvmAdapter(), "-s", "3", "-c", "10" });
    libsvmConfig.put(DIM_DATA_WRITER, new LibsvmAdapter().getDataWriterClass().getName());
    libsvmConfig.put(DIM_FEATURE_USE_SPARSE, new LibsvmAdapter().useSparseFeatures());
    Map<String, Object> wekaConfig = new HashMap<>();
    wekaConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), LinearRegression.class.getName() });
    wekaConfig.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    wekaConfig.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", xgboostConfig, liblinearConfig, libsvmConfig, wekaConfig);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(SentenceRatioPerDocument.class), TcFeatureFactory.create(LengthFeatureNominal.class), TcFeatureFactory.create(TokenRatioPerDocument.class)));
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_REGRESSION), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
    return pSpace;
}
Also used : HashMap(java.util.HashMap) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) LiblinearAdapter(org.dkpro.tc.ml.liblinear.LiblinearAdapter) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) ParameterSpace(org.dkpro.lab.task.ParameterSpace) LibsvmAdapter(org.dkpro.tc.ml.libsvm.LibsvmAdapter) XgboostAdapter(org.dkpro.tc.ml.xgboost.XgboostAdapter) LinearRegression(weka.classifiers.functions.LinearRegression) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

HashMap (java.util.HashMap)7 Map (java.util.Map)7 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)7 ParameterSpace (org.dkpro.lab.task.ParameterSpace)7 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)2 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)2 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)1 LinearRegression (weka.classifiers.functions.LinearRegression)1 SMO (weka.classifiers.functions.SMO)1 PolyKernel (weka.classifiers.functions.supportVector.PolyKernel)1