use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class WekaSaveAndLoadModelDocumentSingleLabelTest method documentLoadModelSingleLabel.
private static void documentLoadModelSingleLabel(File modelFolder) throws Exception {
AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath());
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentText("This is an example text");
jcas.setDocumentLanguage("en");
tokenizer.process(jcas);
tcAnno.process(jcas);
List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
assertEquals(1, outcomes.size());
assertEquals("comp.graphics", outcomes.get(0).getOutcome());
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class WekaSaveAndLoadModelDocumentMultiLabelTest method documentLoadModelMultiLabel.
private static void documentLoadModelMultiLabel(File modelFolder) throws Exception {
AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath());
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentText("This is an example text");
jcas.setDocumentLanguage("en");
tokenizer.process(jcas);
tcAnno.process(jcas);
List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
assertEquals(2, outcomes.size());
assertEquals("grain", outcomes.get(0).getOutcome());
assertEquals("corn", outcomes.get(1).getOutcome());
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class WekaSaveAndLoadModelDocumentRegression method regressionLoadModel.
private void regressionLoadModel(File modelFolder) throws UIMAException, IOException {
CollectionReader reader = CollectionReaderFactory.createReader(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, regressionTest, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
AnalysisEngine segmenter = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
JCas jcas = JCasFactory.createJCas();
reader.hasNext();
reader.getNext(jcas.getCas());
segmenter.process(jcas);
tcAnno.process(jcas);
List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
assertEquals(1, outcomes.size());
Double d = Double.valueOf(outcomes.get(0).getOutcome());
assertTrue(d > 0.1 && d < 5);
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class WekaSaveAndLoadModelUnitTest method unitLoadModel.
private static void unitLoadModel(File modelFolder) throws Exception {
CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, "*.xml");
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
JCas jcas = JCasFactory.createJCas();
reader.getNext(jcas.getCas());
// sanity check i.e. check
assertEquals(163, JCasUtil.select(jcas, Token.class).size());
// number of tokens which
// determine number of
// outcomes that are expected
tcAnno.process(jcas);
Collection<TextClassificationOutcome> outcomes = JCasUtil.select(jcas, TextClassificationOutcome.class);
// 168 tokens in the training file we expect for each an
assertEquals(163, outcomes.size());
// outcome
for (TextClassificationOutcome o : outcomes) {
assertTrue(!o.getOutcome().isEmpty());
}
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class XgboostSaveAndLoadModelDocumentSingleLabelTest method documentLoadAndUseModel.
private static void documentLoadAndUseModel(File modelFolder, boolean evaluateWithClassificationArgs) throws Exception {
AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath());
CollectionReader reader = CollectionReaderFactory.createReader(TextReader.class, TextReader.PARAM_SOURCE_LOCATION, documentTestFolder, TextReader.PARAM_LANGUAGE, "en", TextReader.PARAM_PATTERNS, Arrays.asList(TextReader.INCLUDE_PREFIX + "*/*.txt"));
List<TextClassificationOutcome> outcomes = new ArrayList<>();
while (reader.hasNext()) {
JCas jcas = JCasFactory.createJCas();
reader.getNext(jcas.getCas());
jcas.setDocumentLanguage("en");
tokenizer.process(jcas);
tcAnno.process(jcas);
outcomes.add(JCasUtil.selectSingle(jcas, TextClassificationOutcome.class));
}
assertEquals(4, outcomes.size());
if (evaluateWithClassificationArgs) {
assertEquals(4, outcomes.size());
assertEquals("emotional", outcomes.get(0).getOutcome());
assertEquals("neutral", outcomes.get(1).getOutcome());
assertEquals("neutral", outcomes.get(2).getOutcome());
assertEquals("neutral", outcomes.get(3).getOutcome());
} else {
assertEquals(4, outcomes.size());
assertEquals("emotional", outcomes.get(0).getOutcome());
assertEquals("emotional", outcomes.get(1).getOutcome());
assertEquals("emotional", outcomes.get(2).getOutcome());
assertEquals("emotional", outcomes.get(3).getOutcome());
}
}
Aggregations