Search in sources :

Example 11 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class CRFSuiteSaveAndLoadModelTest method loadModelArowParameters.

@Test
public void loadModelArowParameters() throws Exception {
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new CrfSuiteAdapter(), CrfSuiteAdapter.ALGORITHM_ADAPTIVE_REGULARIZATION_OF_WEIGHT_VECTOR, "-p", "max_iterations=2" });
    config.put(DIM_DATA_WRITER, new CrfSuiteAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    // create a model
    File modelFolder = folder.newFolder();
    ParameterSpace pSpace = getParameterSpace(mlas);
    executeSaveModelIntoTemporyFolder(pSpace, modelFolder);
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText("This is an example text. It has 2 sentences.");
    jcas.setDocumentLanguage("en");
    AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_SEQUENCE_ANNOTATION, Sentence.class.getName(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    tokenizer.process(jcas);
    tcAnno.process(jcas);
    List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
    // 9 token + 2 punctuation marks
    assertEquals(11, outcomes.size());
    for (TextClassificationOutcome o : outcomes) {
        assertTrue(postags.contains(o.getOutcome()));
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) CrfSuiteAdapter(org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter) ParameterSpace(org.dkpro.lab.task.ParameterSpace) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) HashMap(java.util.HashMap) Map(java.util.Map) File(java.io.File) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 12 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class LiblinearSaveAndLoadModelDocumentRegression method regressionGetParameterSpace.

private ParameterSpace regressionGetParameterSpace() throws Exception {
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en", LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, "src/main/resources/data/essays/train/essay_train.txt", LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    @SuppressWarnings("unchecked") Dimension<List<Object>> dimClassificationArgs = Dimension.create(DIM_CLASSIFICATION_ARGS, Arrays.asList(new Object[] { new LiblinearAdapter(), "-s", "6" }));
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(SentenceRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class), TcFeatureFactory.create(TokenRatioPerDocument.class)));
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_REGRESSION), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, dimClassificationArgs);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) ArrayList(java.util.ArrayList) List(java.util.List) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) LiblinearAdapter(org.dkpro.tc.ml.liblinear.LiblinearAdapter)

Example 13 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class LiblinearSaveAndLoadModelDocumentRegression method documentRoundTripLiblinear.

/**
 * This test case trains a regression model on scored essay texts
 */
@Test
public void documentRoundTripLiblinear() throws Exception {
    DemoUtils.setDkproHome(LiblinearSaveAndLoadModelDocumentRegression.class.getSimpleName());
    File modelFolder = folder.newFolder();
    ParameterSpace paramSpace = regressionGetParameterSpace();
    regressionExecuteSaveModel(paramSpace, modelFolder);
    regressionLoadModel(modelFolder);
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace) File(java.io.File) Test(org.junit.Test)

Example 14 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class LibsvmSaveAndLoadModelDocumentRegression method documentRoundTripLiblinear.

/**
 * This test case trains a regression model on scored essay texts
 */
@Test
public void documentRoundTripLiblinear() throws Exception {
    DemoUtils.setDkproHome(LibsvmSaveAndLoadModelDocumentRegression.class.getSimpleName());
    File modelFolder = folder.newFolder();
    ParameterSpace paramSpace = regressionGetParameterSpace();
    regressionExecuteSaveModel(paramSpace, modelFolder);
    regressionLoadModel(modelFolder);
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace) File(java.io.File) Test(org.junit.Test)

Example 15 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class SVMHMMSaveAndLoadModelTest method saveModel.

@Test
public void saveModel() throws Exception {
    File modelFolder = folder.newFolder();
    ParameterSpace pSpace = getParameterSpace();
    executeSaveModelIntoTemporyFolder(pSpace, modelFolder);
    File classifierFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_CLASSIFIER);
    assertTrue(classifierFile.exists());
    File metaOverride = new File(modelFolder.getAbsolutePath() + "/" + META_COLLECTOR_OVERRIDE);
    assertTrue(metaOverride.exists());
    File extractorOverride = new File(modelFolder.getAbsolutePath() + "/" + META_EXTRACTOR_OVERRIDE);
    assertTrue(extractorOverride.exists());
    File modelMetaFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_META);
    assertTrue(modelMetaFile.exists());
    File featureMode = new File(modelFolder.getAbsolutePath() + "/" + MODEL_FEATURE_MODE);
    assertTrue(featureMode.exists());
    File learningMode = new File(modelFolder.getAbsolutePath() + "/" + MODEL_LEARNING_MODE);
    assertTrue(learningMode.exists());
    modelFolder.deleteOnExit();
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace) File(java.io.File) Test(org.junit.Test)

Aggregations

ParameterSpace (org.dkpro.lab.task.ParameterSpace)130 HashMap (java.util.HashMap)60 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)51 Map (java.util.Map)45 Test (org.junit.Test)44 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)42 File (java.io.File)26 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)21 DefaultBatchTask (org.dkpro.lab.task.impl.DefaultBatchTask)12 ArrayList (java.util.ArrayList)10 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)9 NaiveBayes (weka.classifiers.bayes.NaiveBayes)9 TaskContext (org.dkpro.lab.engine.TaskContext)7 CrfSuiteAdapter (org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 List (java.util.List)6 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)6 FoldDimensionBundle (org.dkpro.lab.task.impl.FoldDimensionBundle)5 SMO (weka.classifiers.functions.SMO)5 Task (org.dkpro.lab.task.Task)4