Search in sources :

Example 6 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class WekaSaveAndLoadModelDocumentSingleLabelTest method documentLoadModelSingleLabel.

private static void documentLoadModelSingleLabel(File modelFolder) throws Exception {
    AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath());
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText("This is an example text");
    jcas.setDocumentLanguage("en");
    tokenizer.process(jcas);
    tcAnno.process(jcas);
    List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
    assertEquals(1, outcomes.size());
    assertEquals("comp.graphics", outcomes.get(0).getOutcome());
}
Also used : TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 7 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class WekaSaveAndLoadModelDocumentMultiLabelTest method documentLoadModelMultiLabel.

private static void documentLoadModelMultiLabel(File modelFolder) throws Exception {
    AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath());
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText("This is an example text");
    jcas.setDocumentLanguage("en");
    tokenizer.process(jcas);
    tcAnno.process(jcas);
    List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
    assertEquals(2, outcomes.size());
    assertEquals("grain", outcomes.get(0).getOutcome());
    assertEquals("corn", outcomes.get(1).getOutcome());
}
Also used : TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 8 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class WekaSaveAndLoadModelDocumentRegression method regressionLoadModel.

private void regressionLoadModel(File modelFolder) throws UIMAException, IOException {
    CollectionReader reader = CollectionReaderFactory.createReader(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, regressionTest, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    AnalysisEngine segmenter = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    JCas jcas = JCasFactory.createJCas();
    reader.hasNext();
    reader.getNext(jcas.getCas());
    segmenter.process(jcas);
    tcAnno.process(jcas);
    List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
    assertEquals(1, outcomes.size());
    Double d = Double.valueOf(outcomes.get(0).getOutcome());
    assertTrue(d > 0.1 && d < 5);
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 9 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class WekaSaveAndLoadModelUnitTest method unitLoadModel.

private static void unitLoadModel(File modelFolder) throws Exception {
    CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, "*.xml");
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    JCas jcas = JCasFactory.createJCas();
    reader.getNext(jcas.getCas());
    // sanity check i.e. check
    assertEquals(163, JCasUtil.select(jcas, Token.class).size());
    // number of tokens which
    // determine number of
    // outcomes that are expected
    tcAnno.process(jcas);
    Collection<TextClassificationOutcome> outcomes = JCasUtil.select(jcas, TextClassificationOutcome.class);
    // 168 tokens in the training file we expect for each an
    assertEquals(163, outcomes.size());
    // outcome
    for (TextClassificationOutcome o : outcomes) {
        assertTrue(!o.getOutcome().isEmpty());
    }
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 10 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class XgboostSaveAndLoadModelDocumentSingleLabelTest method documentLoadAndUseModel.

private static void documentLoadAndUseModel(File modelFolder, boolean evaluateWithClassificationArgs) throws Exception {
    AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath());
    CollectionReader reader = CollectionReaderFactory.createReader(TextReader.class, TextReader.PARAM_SOURCE_LOCATION, documentTestFolder, TextReader.PARAM_LANGUAGE, "en", TextReader.PARAM_PATTERNS, Arrays.asList(TextReader.INCLUDE_PREFIX + "*/*.txt"));
    List<TextClassificationOutcome> outcomes = new ArrayList<>();
    while (reader.hasNext()) {
        JCas jcas = JCasFactory.createJCas();
        reader.getNext(jcas.getCas());
        jcas.setDocumentLanguage("en");
        tokenizer.process(jcas);
        tcAnno.process(jcas);
        outcomes.add(JCasUtil.selectSingle(jcas, TextClassificationOutcome.class));
    }
    assertEquals(4, outcomes.size());
    if (evaluateWithClassificationArgs) {
        assertEquals(4, outcomes.size());
        assertEquals("emotional", outcomes.get(0).getOutcome());
        assertEquals("neutral", outcomes.get(1).getOutcome());
        assertEquals("neutral", outcomes.get(2).getOutcome());
        assertEquals("neutral", outcomes.get(3).getOutcome());
    } else {
        assertEquals(4, outcomes.size());
        assertEquals("emotional", outcomes.get(0).getOutcome());
        assertEquals("emotional", outcomes.get(1).getOutcome());
        assertEquals("emotional", outcomes.get(2).getOutcome());
        assertEquals("emotional", outcomes.get(3).getOutcome());
    }
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Aggregations

TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)59 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)27 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)19 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)18 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)16 CollectionReader (org.apache.uima.collection.CollectionReader)15 CollectionException (org.apache.uima.collection.CollectionException)9 CASException (org.apache.uima.cas.CASException)8 JCasId (org.dkpro.tc.api.type.JCasId)8 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)7 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)6 Test (org.junit.Test)6 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)5 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)5 File (java.io.File)5 IOException (java.io.IOException)4 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)4 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)4 HashSet (java.util.HashSet)3