Search in sources :

Example 91 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class CRFSuiteBrownPosDemoTest method runTrainTestFilter.

@SuppressWarnings("unchecked")
@Test
public void runTrainTestFilter() throws Exception {
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new CrfSuiteAdapter(), CrfSuiteAdapter.ALGORITHM_ADAPTIVE_REGULARIZATION_OF_WEIGHT_VECTOR });
    config.put(DIM_DATA_WRITER, new CrfSuiteAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new CrfSuiteAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    Dimension<List<String>> dimFilter = Dimension.create(Constants.DIM_FEATURE_FILTERS, asList(FilterLuceneCharacterNgramStartingWithLetter.class.getName()));
    ParameterSpace pSpace = CRFSuiteBrownPosDemoSimpleDkproReader.getParameterSpace(Constants.FM_SEQUENCE, Constants.LM_SINGLE_LABEL, mlas, dimFilter);
    javaExperiment.runTrainTest(pSpace);
}
Also used : HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) List(java.util.List) Arrays.asList(java.util.Arrays.asList) HashMap(java.util.HashMap) Map(java.util.Map) CrfSuiteAdapter(org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter) Test(org.junit.Test)

Example 92 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class CRFSuiteSaveAndLoadModelTest method saveModel.

@Test
public void saveModel() throws Exception {
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new CrfSuiteAdapter(), CrfSuiteAdapter.ALGORITHM_ADAPTIVE_REGULARIZATION_OF_WEIGHT_VECTOR, "-p", "max_iterations=2" });
    config.put(DIM_DATA_WRITER, new CrfSuiteAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    File modelFolder = folder.newFolder();
    ParameterSpace pSpace = getParameterSpace(mlas);
    executeSaveModelIntoTemporyFolder(pSpace, modelFolder);
    File classifierFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_CLASSIFIER);
    assertTrue(classifierFile.exists());
    File parameterFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_FEATURE_EXTRACTOR_CONFIGURATION);
    assertTrue(parameterFile.exists());
    File metaOverride = new File(modelFolder.getAbsolutePath() + "/" + META_COLLECTOR_OVERRIDE);
    assertTrue(metaOverride.exists());
    File extractorOverride = new File(modelFolder.getAbsolutePath() + "/" + META_EXTRACTOR_OVERRIDE);
    assertTrue(extractorOverride.exists());
    File modelMetaFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_META);
    assertTrue(modelMetaFile.exists());
    File tcversion = new File(modelFolder.getAbsolutePath() + "/" + MODEL_TC_VERSION);
    assertTrue(tcversion.exists());
    File featureMode = new File(modelFolder.getAbsolutePath() + "/" + MODEL_FEATURE_MODE);
    assertTrue(featureMode.exists());
    File learningMode = new File(modelFolder.getAbsolutePath() + "/" + MODEL_LEARNING_MODE);
    assertTrue(learningMode.exists());
    modelFolder.deleteOnExit();
}
Also used : HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) HashMap(java.util.HashMap) Map(java.util.Map) File(java.io.File) CrfSuiteAdapter(org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter) Test(org.junit.Test)

Example 93 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class CRFSuiteSaveAndLoadModelTest method getParameterSpace.

private ParameterSpace getParameterSpace(Dimension<Map<String, Object>> mlas) throws ResourceInitializationException {
    DemoUtils.setDkproHome(this.getClass().getName());
    String trainFolder = "src/main/resources/data/brown_tei/";
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the
    // train part
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(BrownCorpusReader.class, BrownCorpusReader.PARAM_LANGUAGE, "en", BrownCorpusReader.PARAM_SOURCE_LOCATION, trainFolder, BrownCorpusReader.PARAM_LANGUAGE, "en", BrownCorpusReader.PARAM_PATTERNS, "*.xml");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 50, CharacterNGram.PARAM_NGRAM_MIN_N, 1, CharacterNGram.PARAM_NGRAM_MAX_N, 3), // :)
    TcFeatureFactory.create(BrownClusterFeature.class, BrownClusterFeature.PARAM_BROWN_CLUSTERS_LOCATION, "src/test/resources/brownCluster/enTweetBrownC1000F40"), TcFeatureFactory.create(TokenRatioPerDocument.class)));
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_SEQUENCE), dimFeatureSets, mlas);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet)

Example 94 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class CRFSuiteSaveAndLoadModelTest method loadModelArow.

@Test
public void loadModelArow() throws Exception {
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new CrfSuiteAdapter(), CrfSuiteAdapter.ALGORITHM_ADAPTIVE_REGULARIZATION_OF_WEIGHT_VECTOR, "-p", "max_iterations=2" });
    config.put(DIM_DATA_WRITER, new CrfSuiteAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    // create a model
    File modelFolder = folder.newFolder();
    ParameterSpace pSpace = getParameterSpace(mlas);
    executeSaveModelIntoTemporyFolder(pSpace, modelFolder);
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText("This is an example text. It has 2 sentences.");
    jcas.setDocumentLanguage("en");
    AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_SEQUENCE_ANNOTATION, Sentence.class.getName(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    tokenizer.process(jcas);
    tcAnno.process(jcas);
    List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
    // 9 token + 2 punctuation marks
    assertEquals(11, outcomes.size());
    for (TextClassificationOutcome o : outcomes) {
        String label = o.getOutcome();
        assertTrue(postags.contains(label));
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) CrfSuiteAdapter(org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter) ParameterSpace(org.dkpro.lab.task.ParameterSpace) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) HashMap(java.util.HashMap) Map(java.util.Map) File(java.io.File) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 95 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class LibsvmSaveAndLoadModelDocumentRegression method regressionGetParameterSpace.

private ParameterSpace regressionGetParameterSpace() throws Exception {
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, "src/main/resources/data/essays/train/essay_train.txt", LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LibsvmAdapter(), "-s", "3" });
    config.put(DIM_DATA_WRITER, new LibsvmAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new LibsvmAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(SentenceRatioPerDocument.class)));
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_REGRESSION), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) LibsvmAdapter(org.dkpro.tc.ml.libsvm.LibsvmAdapter) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ParameterSpace (org.dkpro.lab.task.ParameterSpace)130 HashMap (java.util.HashMap)60 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)51 Map (java.util.Map)45 Test (org.junit.Test)44 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)42 File (java.io.File)26 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)21 DefaultBatchTask (org.dkpro.lab.task.impl.DefaultBatchTask)12 ArrayList (java.util.ArrayList)10 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)9 NaiveBayes (weka.classifiers.bayes.NaiveBayes)9 TaskContext (org.dkpro.lab.engine.TaskContext)7 CrfSuiteAdapter (org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 List (java.util.List)6 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)6 FoldDimensionBundle (org.dkpro.lab.task.impl.FoldDimensionBundle)5 SMO (weka.classifiers.functions.SMO)5 Task (org.dkpro.lab.task.Task)4