use of org.dkpro.tc.ml.weka.WekaAdapter in project dkpro-tc by dkpro.
the class CRFSuiteSaveAndLoadModelTest method saveModel.
@Test
public void saveModel() throws Exception {
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new CrfSuiteAdapter(), CrfSuiteAdapter.ALGORITHM_ADAPTIVE_REGULARIZATION_OF_WEIGHT_VECTOR, "-p", "max_iterations=2" });
config.put(DIM_DATA_WRITER, new CrfSuiteAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
File modelFolder = folder.newFolder();
ParameterSpace pSpace = getParameterSpace(mlas);
executeSaveModelIntoTemporyFolder(pSpace, modelFolder);
File classifierFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_CLASSIFIER);
assertTrue(classifierFile.exists());
File parameterFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_FEATURE_EXTRACTOR_CONFIGURATION);
assertTrue(parameterFile.exists());
File metaOverride = new File(modelFolder.getAbsolutePath() + "/" + META_COLLECTOR_OVERRIDE);
assertTrue(metaOverride.exists());
File extractorOverride = new File(modelFolder.getAbsolutePath() + "/" + META_EXTRACTOR_OVERRIDE);
assertTrue(extractorOverride.exists());
File modelMetaFile = new File(modelFolder.getAbsolutePath() + "/" + MODEL_META);
assertTrue(modelMetaFile.exists());
File tcversion = new File(modelFolder.getAbsolutePath() + "/" + MODEL_TC_VERSION);
assertTrue(tcversion.exists());
File featureMode = new File(modelFolder.getAbsolutePath() + "/" + MODEL_FEATURE_MODE);
assertTrue(featureMode.exists());
File learningMode = new File(modelFolder.getAbsolutePath() + "/" + MODEL_LEARNING_MODE);
assertTrue(learningMode.exists());
modelFolder.deleteOnExit();
}
use of org.dkpro.tc.ml.weka.WekaAdapter in project dkpro-tc by dkpro.
the class CRFSuiteSaveAndLoadModelTest method loadModelArow.
@Test
public void loadModelArow() throws Exception {
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new CrfSuiteAdapter(), CrfSuiteAdapter.ALGORITHM_ADAPTIVE_REGULARIZATION_OF_WEIGHT_VECTOR, "-p", "max_iterations=2" });
config.put(DIM_DATA_WRITER, new CrfSuiteAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
// create a model
File modelFolder = folder.newFolder();
ParameterSpace pSpace = getParameterSpace(mlas);
executeSaveModelIntoTemporyFolder(pSpace, modelFolder);
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentText("This is an example text. It has 2 sentences.");
jcas.setDocumentLanguage("en");
AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_SEQUENCE_ANNOTATION, Sentence.class.getName(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
tokenizer.process(jcas);
tcAnno.process(jcas);
List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
// 9 token + 2 punctuation marks
assertEquals(11, outcomes.size());
for (TextClassificationOutcome o : outcomes) {
String label = o.getOutcome();
assertTrue(postags.contains(label));
}
}
use of org.dkpro.tc.ml.weka.WekaAdapter in project dkpro-tc by dkpro.
the class WekaBrownUnitPosDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(BrownCorpusReader.class, BrownCorpusReader.PARAM_LANGUAGE, "en", BrownCorpusReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, BrownCorpusReader.PARAM_PATTERNS, new String[] { INCLUDE_PREFIX + "*.xml", INCLUDE_PREFIX + "*.xml.gz" });
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(BrownCorpusReader.class, BrownCorpusReader.PARAM_LANGUAGE, "en", BrownCorpusReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, BrownCorpusReader.PARAM_PATTERNS, new String[] { "*.xml", "*.xml.gz" });
dimReaders.put(DIM_READER_TEST, readerTest);
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), NaiveBayes.class.getName() });
config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(Constants.DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 50)));
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle(DIM_READERS, dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_UNIT), dimFeatureSets, mlas);
return pSpace;
}
use of org.dkpro.tc.ml.weka.WekaAdapter in project dkpro-tc by dkpro.
the class WekaSaveAndLoadModelDocumentPairRegression method pairGetParameterSpace.
private static ParameterSpace pairGetParameterSpace() throws ResourceInitializationException {
Map<String, Object> dimReaders = new HashMap<String, Object>();
Object readerTrain = CollectionReaderFactory.createReaderDescription(STSReader.class, STSReader.PARAM_INPUT_FILE, pairTrainFiles, STSReader.PARAM_GOLD_FILE, pairGoldFiles);
dimReaders.put(DIM_READER_TRAIN, readerTrain);
@SuppressWarnings("unchecked") Dimension<List<Object>> dimClassificationArgs = Dimension.create(Constants.DIM_CLASSIFICATION_ARGS, Arrays.asList(new Object[] { new WekaAdapter(), SMOreg.class.getName() }));
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(DiffNrOfTokensPairFeatureExtractor.class)));
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_REGRESSION), Dimension.create(DIM_FEATURE_MODE, FM_PAIR), dimFeatureSets, dimClassificationArgs);
return pSpace;
}
use of org.dkpro.tc.ml.weka.WekaAdapter in project dkpro-tc by dkpro.
the class WekaSaveAndLoadModelDocumentSingleLabelTest method documentGetParameterSpaceSingleLabel.
private ParameterSpace documentGetParameterSpaceSingleLabel() throws ResourceInitializationException {
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, documentTrainFolder, FolderwiseDataReader.PARAM_LANGUAGE, "en", FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
Map<String, Object> wekaConfig = new HashMap<>();
wekaConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), NaiveBayes.class.getName() });
wekaConfig.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
wekaConfig.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", wekaConfig);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3), TcFeatureFactory.create(TokenRatioPerDocument.class)));
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
return pSpace;
}
Aggregations