Search in sources :

Example 11 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class XgboostSaveAndLoadModelDocumentSingleLabelTest method unitLoadAndUseModel.

private static void unitLoadAndUseModel(File modelFolder) throws Exception {
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, Arrays.asList(TeiReader.INCLUDE_PREFIX + "a02.xml"));
    List<TextClassificationOutcome> outcomes = new ArrayList<>();
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentLanguage("en");
    reader.getNext(jcas.getCas());
    tcAnno.process(jcas);
    outcomes.addAll(JCasUtil.select(jcas, TextClassificationOutcome.class));
    Set<String> possibleOutcomes = new HashSet<>();
    possibleOutcomes.add("AT");
    possibleOutcomes.add("NP");
    possibleOutcomes.add("pct");
    possibleOutcomes.add("WDT");
    possibleOutcomes.add("JJ");
    possibleOutcomes.add("VBD");
    possibleOutcomes.add("NNS");
    possibleOutcomes.add("TO");
    possibleOutcomes.add("VBN");
    possibleOutcomes.add("IN");
    possibleOutcomes.add("CC");
    possibleOutcomes.add("NN");
    possibleOutcomes.add("VBD");
    possibleOutcomes.add("AP");
    possibleOutcomes.add("HVD");
    assertEquals(31, outcomes.size());
    for (TextClassificationOutcome o : outcomes) {
        assertTrue(possibleOutcomes.contains(o.getOutcome()));
    }
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) HashSet(java.util.HashSet)

Example 12 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class LibsvmSaveAndLoadModelDocumentSingleLabelTest method unitLoadAndUseModel.

private static void unitLoadAndUseModel(File modelFolder) throws Exception {
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, Arrays.asList(TeiReader.INCLUDE_PREFIX + "a02.xml"));
    List<TextClassificationOutcome> outcomes = new ArrayList<>();
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentLanguage("en");
    reader.getNext(jcas.getCas());
    tcAnno.process(jcas);
    outcomes.addAll(JCasUtil.select(jcas, TextClassificationOutcome.class));
    // int i=0;
    // for(TextClassificationOutcome o: outcomes){
    // System.out.println("assertEquals(\"" + o.getOutcome() + "\",
    // outcomes.get("+(i++)+").getOutcome());");
    // }
    assertEquals(31, outcomes.size());
    assertEquals("AT", outcomes.get(0).getOutcome());
    assertEquals("IN", outcomes.get(1).getOutcome());
    assertEquals("pct", outcomes.get(2).getOutcome());
    assertEquals("WDT", outcomes.get(3).getOutcome());
    assertEquals("NP", outcomes.get(4).getOutcome());
    assertEquals("VBD", outcomes.get(5).getOutcome());
    assertEquals("AT", outcomes.get(6).getOutcome());
    assertEquals("VBN", outcomes.get(7).getOutcome());
    assertEquals("RB", outcomes.get(8).getOutcome());
    assertEquals("pct", outcomes.get(9).getOutcome());
    assertEquals("NP", outcomes.get(10).getOutcome());
    assertEquals("CC", outcomes.get(11).getOutcome());
    assertEquals("AT", outcomes.get(12).getOutcome());
    assertEquals("pct", outcomes.get(13).getOutcome());
    assertEquals("JJ", outcomes.get(14).getOutcome());
    assertEquals("NN", outcomes.get(15).getOutcome());
    assertEquals("pct", outcomes.get(16).getOutcome());
    assertEquals("NP", outcomes.get(17).getOutcome());
    assertEquals("NN", outcomes.get(18).getOutcome());
    assertEquals("CC", outcomes.get(19).getOutcome());
    assertEquals("AP", outcomes.get(20).getOutcome());
    assertEquals("NN", outcomes.get(21).getOutcome());
    assertEquals("IN", outcomes.get(22).getOutcome());
    assertEquals("NNS", outcomes.get(23).getOutcome());
    assertEquals("JJ", outcomes.get(24).getOutcome());
    assertEquals("NP", outcomes.get(25).getOutcome());
    assertEquals("IN", outcomes.get(26).getOutcome());
    assertEquals("AT", outcomes.get(27).getOutcome());
    assertEquals("AT", outcomes.get(28).getOutcome());
    assertEquals("JJ", outcomes.get(29).getOutcome());
    assertEquals("pct", outcomes.get(30).getOutcome());
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 13 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class TestReaderSentenceToDocument method getNext.

@Override
public void getNext(JCas aJCas) throws IOException, CollectionException {
    // setting the document text
    aJCas.setDocumentText(texts.get(offset));
    aJCas.setDocumentLanguage(LANGUAGE_CODE);
    // as we are creating more than one CAS out of a single file, we need to have different
    // document titles and URIs for each CAS
    // otherwise, serialized CASes will be overwritten
    DocumentMetaData dmd = DocumentMetaData.create(aJCas);
    dmd.setDocumentTitle("Sentence" + offset);
    dmd.setDocumentUri("Sentence" + offset);
    dmd.setDocumentId(String.valueOf(offset));
    JCasId id = new JCasId(aJCas);
    id.setId(jcasId);
    id.addToIndexes();
    // setting the outcome / label for this document
    TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas);
    outcome.setOutcome(getTextClassificationOutcome(aJCas));
    outcome.addToIndexes();
    new TextClassificationTarget(aJCas, 0, aJCas.getDocumentText().length()).addToIndexes();
    offset++;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)

Example 14 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class TestReaderSingleLabelUnitReader method getNext.

@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
    super.getNext(aCAS);
    JCas jcas;
    try {
        jcas = aCAS.getJCas();
        JCasId id = new JCasId(jcas);
        id.setId(jcasId++);
        id.addToIndexes();
    } catch (CASException e) {
        throw new CollectionException();
    }
    String documentText = aCAS.getDocumentText();
    int s = 0;
    for (String t : documentText.split(" ")) {
        int e = documentText.indexOf(t, s) + t.length();
        new TextClassificationTarget(jcas, s, e).addToIndexes();
        new TextClassificationOutcome(jcas, s, e).addToIndexes();
        s += 1;
    }
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) CollectionException(org.apache.uima.collection.CollectionException) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) CASException(org.apache.uima.cas.CASException)

Example 15 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class LinewiseTextOutcomeReader method setTextClassificationOutcome.

protected void setTextClassificationOutcome(JCas aJCas, String outcome, int begin, int end) throws IOException {
    TextClassificationOutcome tco = new TextClassificationOutcome(aJCas, begin, end);
    tco.setOutcome(outcome);
    tco.addToIndexes();
}
Also used : TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome)

Aggregations

TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)59 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)27 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)19 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)18 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)16 CollectionReader (org.apache.uima.collection.CollectionReader)15 CollectionException (org.apache.uima.collection.CollectionException)9 CASException (org.apache.uima.cas.CASException)8 JCasId (org.dkpro.tc.api.type.JCasId)8 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)7 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)6 Test (org.junit.Test)6 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)5 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)5 File (java.io.File)5 IOException (java.io.IOException)4 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)4 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)4 HashSet (java.util.HashSet)3