Search in sources :

Example 41 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class XgboostUnit method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, TeiReader.PARAM_PATTERNS, new String[] { INCLUDE_PREFIX + "*.xml", INCLUDE_PREFIX + "*.xml.gz" });
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, TeiReader.PARAM_PATTERNS, new String[] { "*.xml", "*.xml.gz" });
    dimReaders.put(DIM_READER_TEST, readerTest);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(Constants.DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_LOWER_CASE, false, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 50)));
    Map<String, Object> xgboostConfig = new HashMap<>();
    xgboostConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new XgboostAdapter(), "objective=multi:softmax" });
    xgboostConfig.put(DIM_DATA_WRITER, new XgboostAdapter().getDataWriterClass().getName());
    xgboostConfig.put(DIM_FEATURE_USE_SPARSE, new XgboostAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", xgboostConfig);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_UNIT), dimFeatureSets, mlas);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) XgboostAdapter(org.dkpro.tc.ml.xgboost.XgboostAdapter) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) HashMap(java.util.HashMap) Map(java.util.Map)

Example 42 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaSaveAndLoadModelDocumentSingleLabelTest method documentRoundTripWekaSingleLabel.

@Test
public void documentRoundTripWekaSingleLabel() throws Exception {
    DemoUtils.setDkproHome(WekaSaveAndLoadModelDocumentSingleLabelTest.class.getSimpleName());
    File modelFolder = folder.newFolder();
    ParameterSpace docParamSpace = documentGetParameterSpaceSingleLabel();
    documentWriteModel(docParamSpace, modelFolder);
    documentLoadModelSingleLabel(modelFolder);
    // verify created files
    File classifierFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_CLASSIFIER);
    assertTrue(classifierFile.exists());
    File metaOverride = new File(modelFolder.getAbsolutePath() + "/" + META_COLLECTOR_OVERRIDE);
    assertTrue(metaOverride.exists());
    File extractorOverride = new File(modelFolder.getAbsolutePath() + "/" + META_EXTRACTOR_OVERRIDE);
    assertTrue(extractorOverride.exists());
    File modelMetaFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_META);
    assertTrue(modelMetaFile.exists());
    File featureMode = new File(modelFolder.getAbsolutePath() + "/" + MODEL_FEATURE_MODE);
    assertTrue(featureMode.exists());
    File learningMode = new File(modelFolder.getAbsolutePath() + "/" + MODEL_LEARNING_MODE);
    assertTrue(learningMode.exists());
    File bipartitionThreshold = new File(modelFolder.getAbsolutePath() + "/" + MODEL_BIPARTITION_THRESHOLD);
    assertTrue(bipartitionThreshold.exists());
    modelFolder.deleteOnExit();
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace) File(java.io.File) Test(org.junit.Test)

Example 43 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaSaveAndLoadModelDocumentMultiLabelTest method documentGetParameterSpaceMultiLabel.

private ParameterSpace documentGetParameterSpaceMultiLabel() throws ResourceInitializationException {
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(ReutersCorpusReader.class, ReutersCorpusReader.PARAM_SOURCE_LOCATION, documentTrainFolderReuters, ReutersCorpusReader.PARAM_GOLD_LABEL_FILE, documentGoldLabelsReuters, ReutersCorpusReader.PARAM_LANGUAGE, "en", ReutersCorpusReader.PARAM_PATTERNS, ReutersCorpusReader.INCLUDE_PREFIX + "*.txt");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    Map<String, Object> wekaConfig = new HashMap<>();
    wekaConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new MekaAdapter(), MULAN.class.getName(), "-S", "RAkEL2", "-W", RandomForest.class.getName() });
    wekaConfig.put(DIM_DATA_WRITER, new MekaAdapter().getDataWriterClass().getName());
    wekaConfig.put(DIM_FEATURE_USE_SPARSE, new MekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", wekaConfig);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3), TcFeatureFactory.create(TokenRatioPerDocument.class)));
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_MULTI_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, Dimension.create(DIM_BIPARTITION_THRESHOLD, "0.5"), mlas);
    return pSpace;
}
Also used : HashMap(java.util.HashMap) RandomForest(weka.classifiers.trees.RandomForest) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) MekaAdapter(org.dkpro.tc.ml.weka.MekaAdapter) MULAN(meka.classifiers.multilabel.MULAN) ParameterSpace(org.dkpro.lab.task.ParameterSpace) HashMap(java.util.HashMap) Map(java.util.Map)

Example 44 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaSaveAndLoadModelDocumentRegression method regressionGetParameterSpace.

private ParameterSpace regressionGetParameterSpace() throws Exception {
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, "src/main/resources/data/essays/train/essay_train.txt", LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    @SuppressWarnings("unchecked") Dimension<List<Object>> dimClassificationArgs = Dimension.create(DIM_CLASSIFICATION_ARGS, Arrays.asList(new Object[] { new WekaAdapter(), LinearRegression.class.getName() }));
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(SentenceRatioPerDocument.class)));
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_REGRESSION), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, dimClassificationArgs);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) ArrayList(java.util.ArrayList) List(java.util.List) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter)

Example 45 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaSaveAndLoadModelDocumentRegression method documentRoundTripWekaRegression.

/**
 * This test case trains a regression model on scored essay texts
 */
@Test
public void documentRoundTripWekaRegression() throws Exception {
    DemoUtils.setDkproHome(WekaSaveAndLoadModelDocumentRegression.class.getSimpleName());
    File modelFolder = folder.newFolder();
    ParameterSpace paramSpace = regressionGetParameterSpace();
    regressionExecuteSaveModel(paramSpace, modelFolder);
    regressionLoadModel(modelFolder);
    File metaOverride = new File(modelFolder.getAbsolutePath() + "/" + META_COLLECTOR_OVERRIDE);
    assertTrue(metaOverride.exists());
    File extractorOverride = new File(modelFolder.getAbsolutePath() + "/" + META_EXTRACTOR_OVERRIDE);
    assertTrue(extractorOverride.exists());
    // verify that all expected files have been created
    File classifierFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_CLASSIFIER);
    assertTrue(classifierFile.exists());
    File modelMetaFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_META);
    assertTrue(modelMetaFile.exists());
    File featureMode = new File(modelFolder.getAbsolutePath() + "/" + MODEL_FEATURE_MODE);
    assertTrue(featureMode.exists());
    File learningMode = new File(modelFolder.getAbsolutePath() + "/" + MODEL_LEARNING_MODE);
    assertTrue(learningMode.exists());
    File bipartitionThreshold = new File(modelFolder.getAbsolutePath() + "/" + MODEL_BIPARTITION_THRESHOLD);
    assertTrue(bipartitionThreshold.exists());
    modelFolder.deleteOnExit();
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace) File(java.io.File) Test(org.junit.Test)

Aggregations

ParameterSpace (org.dkpro.lab.task.ParameterSpace)130 HashMap (java.util.HashMap)60 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)51 Map (java.util.Map)45 Test (org.junit.Test)44 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)42 File (java.io.File)26 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)21 DefaultBatchTask (org.dkpro.lab.task.impl.DefaultBatchTask)12 ArrayList (java.util.ArrayList)10 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)9 NaiveBayes (weka.classifiers.bayes.NaiveBayes)9 TaskContext (org.dkpro.lab.engine.TaskContext)7 CrfSuiteAdapter (org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 List (java.util.List)6 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)6 FoldDimensionBundle (org.dkpro.lab.task.impl.FoldDimensionBundle)5 SMO (weka.classifiers.functions.SMO)5 Task (org.dkpro.lab.task.Task)4