Search in sources :

Example 56 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class SequenceOutcomeReaderTest method testReader.

@Test
public void testReader() throws Exception {
    CollectionReader reader = CollectionReaderFactory.createReader(SequenceOutcomeReader.class, SequenceOutcomeReader.PARAM_SOURCE_LOCATION, "src/test/resources/sequence/", SequenceOutcomeReader.PARAM_PATTERNS, "posDummy.txt", SequenceOutcomeReader.PARAM_SEQUENCES_PER_CAS, 1);
    List<List<String>> readSequences = new ArrayList<>();
    List<List<String>> readOutcomes = new ArrayList<>();
    int seqTargets = 0;
    while (reader.hasNext()) {
        JCas theJCas = JCasFactory.createJCas();
        reader.getNext(theJCas.getCas());
        Collection<TextClassificationSequence> sequence = JCasUtil.select(theJCas, TextClassificationSequence.class);
        for (TextClassificationSequence s : sequence) {
            List<TextClassificationTarget> targets = JCasUtil.selectCovered(theJCas, TextClassificationTarget.class, s);
            List<String> tokens = new ArrayList<>();
            for (TextClassificationTarget target : targets) {
                tokens.add(target.getCoveredText());
            }
            readSequences.add(tokens);
        }
        for (TextClassificationSequence s : sequence) {
            List<TextClassificationOutcome> outcomeAnnotations = JCasUtil.selectCovered(theJCas, TextClassificationOutcome.class, s);
            List<String> outcomes = new ArrayList<>();
            for (TextClassificationOutcome o : outcomeAnnotations) {
                outcomes.add(o.getOutcome());
            }
            readOutcomes.add(outcomes);
        }
        seqTargets += JCasUtil.select(theJCas, TextClassificationSequence.class).size();
    }
    assertEquals(3, seqTargets);
    assertEquals(3, readSequences.size());
    assertEquals(3, readOutcomes.size());
    assertEquals(4, readSequences.get(0).size());
    // 1 - tokens
    assertEquals("This", readSequences.get(0).get(0));
    assertEquals("is", readSequences.get(0).get(1));
    assertEquals("a", readSequences.get(0).get(2));
    assertEquals("test", readSequences.get(0).get(3));
    // 2 - outcomes
    assertEquals("DET", readOutcomes.get(0).get(0));
    assertEquals("VERB", readOutcomes.get(0).get(1));
    assertEquals("DET", readOutcomes.get(0).get(2));
    assertEquals("NOUN", readOutcomes.get(0).get(3));
    assertEquals(5, readSequences.get(1).size());
    // 2 - tokens
    assertEquals("This2", readSequences.get(1).get(0));
    assertEquals("is2", readSequences.get(1).get(1));
    assertEquals("a2", readSequences.get(1).get(2));
    assertEquals("#test2", readSequences.get(1).get(3));
    assertEquals("!", readSequences.get(1).get(4));
    // 2 - outcomes
    assertEquals("DET2", readOutcomes.get(1).get(0));
    assertEquals("VERB2", readOutcomes.get(1).get(1));
    assertEquals("DET2", readOutcomes.get(1).get(2));
    assertEquals("NOUN2", readOutcomes.get(1).get(3));
    assertEquals("PUNCT2", readOutcomes.get(1).get(4));
    assertEquals(6, readSequences.get(2).size());
    // 3 - tokens
    assertEquals("This3", readSequences.get(2).get(0));
    assertEquals("is3", readSequences.get(2).get(1));
    assertEquals("a3", readSequences.get(2).get(2));
    assertEquals("test3", readSequences.get(2).get(3));
    assertEquals("!", readSequences.get(2).get(4));
    assertEquals("!", readSequences.get(2).get(5));
    // 3 - outcomes
    assertEquals("DET3", readOutcomes.get(2).get(0));
    assertEquals("VERB3", readOutcomes.get(2).get(1));
    assertEquals("DET3", readOutcomes.get(2).get(2));
    assertEquals("NOUN3", readOutcomes.get(2).get(3));
    assertEquals("PUNCT3", readOutcomes.get(2).get(4));
    assertEquals("PUNCT3", readOutcomes.get(2).get(5));
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) List(java.util.List) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence) Test(org.junit.Test)

Example 57 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class SequenceOutcomeReaderTest method testReaderIndexParameter.

@Test
public void testReaderIndexParameter() throws Exception {
    CollectionReader reader = CollectionReaderFactory.createReader(SequenceOutcomeReader.class, SequenceOutcomeReader.PARAM_SOURCE_LOCATION, "src/test/resources/sequence/", SequenceOutcomeReader.PARAM_PATTERNS, "otherFormat.txt", SequenceOutcomeReader.PARAM_OUTCOME_INDEX, 1, SequenceOutcomeReader.PARAM_TOKEN_INDEX, 2);
    List<List<String>> readSequences = new ArrayList<>();
    List<List<String>> readOutcomes = new ArrayList<>();
    int seqTargets = 0;
    while (reader.hasNext()) {
        JCas theJCas = JCasFactory.createJCas();
        reader.getNext(theJCas.getCas());
        Collection<TextClassificationSequence> sequences = JCasUtil.select(theJCas, TextClassificationSequence.class);
        for (TextClassificationSequence s : sequences) {
            List<TextClassificationTarget> targets = JCasUtil.selectCovered(theJCas, TextClassificationTarget.class, s);
            List<String> tokens = new ArrayList<>();
            for (TextClassificationTarget target : targets) {
                tokens.add(target.getCoveredText());
            }
            readSequences.add(tokens);
        }
        Collection<TextClassificationSequence> outcomeSequences = JCasUtil.select(theJCas, TextClassificationSequence.class);
        for (TextClassificationSequence s : outcomeSequences) {
            List<TextClassificationOutcome> outcomeAnnotations = JCasUtil.selectCovered(theJCas, TextClassificationOutcome.class, s);
            List<String> outcomes = new ArrayList<>();
            for (TextClassificationOutcome o : outcomeAnnotations) {
                outcomes.add(o.getOutcome());
            }
            readOutcomes.add(outcomes);
        }
        seqTargets += JCasUtil.select(theJCas, TextClassificationSequence.class).size();
    }
    assertEquals(2, seqTargets);
    assertEquals(2, readSequences.size());
    assertEquals(2, readOutcomes.size());
    assertEquals(4, readSequences.get(0).size());
    // 1 - tokens
    assertEquals("This", readSequences.get(0).get(0));
    assertEquals("is", readSequences.get(0).get(1));
    assertEquals("a", readSequences.get(0).get(2));
    assertEquals("test", readSequences.get(0).get(3));
    // 2 - outcomes
    assertEquals("DET", readOutcomes.get(0).get(0));
    assertEquals("VERB", readOutcomes.get(0).get(1));
    assertEquals("DET", readOutcomes.get(0).get(2));
    assertEquals("NOUN", readOutcomes.get(0).get(3));
    assertEquals(5, readSequences.get(1).size());
    // 2 - tokens
    assertEquals("This2", readSequences.get(1).get(0));
    assertEquals("is2", readSequences.get(1).get(1));
    assertEquals("a2", readSequences.get(1).get(2));
    assertEquals("test2", readSequences.get(1).get(3));
    assertEquals("!2", readSequences.get(1).get(4));
    // 2 - outcomes
    assertEquals("DET2", readOutcomes.get(1).get(0));
    assertEquals("VERB2", readOutcomes.get(1).get(1));
    assertEquals("DET2", readOutcomes.get(1).get(2));
    assertEquals("NOUN2", readOutcomes.get(1).get(3));
    assertEquals("PUNCT2", readOutcomes.get(1).get(4));
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) List(java.util.List) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence) Test(org.junit.Test)

Example 58 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class TcAnnotator method processUnit.

private void processUnit(JCas aJCas) throws AnalysisEngineProcessException {
    Type type = aJCas.getCas().getTypeSystem().getType(nameUnit);
    Collection<AnnotationFS> select = CasUtil.select(aJCas.getCas(), type);
    List<AnnotationFS> unitAnnotation = new ArrayList<AnnotationFS>(select);
    TextClassificationOutcome tco = null;
    List<String> outcomes = new ArrayList<String>();
    // iterate the units and set on each a prepared dummy outcome
    for (AnnotationFS unit : unitAnnotation) {
        TextClassificationTarget tcs = new TextClassificationTarget(aJCas, unit.getBegin(), unit.getEnd());
        tcs.addToIndexes();
        tco = new TextClassificationOutcome(aJCas, unit.getBegin(), unit.getEnd());
        tco.setOutcome(Constants.TC_OUTCOME_DUMMY_VALUE);
        tco.addToIndexes();
        engine.process(aJCas);
        // store the outcome
        outcomes.add(tco.getOutcome());
        tcs.removeFromIndexes();
        tco.removeFromIndexes();
    }
    // iterate again to set for each unit the outcome
    for (int i = 0; i < unitAnnotation.size(); i++) {
        AnnotationFS unit = unitAnnotation.get(i);
        tco = new TextClassificationOutcome(aJCas, unit.getBegin(), unit.getEnd());
        tco.setOutcome(outcomes.get(i));
        tco.addToIndexes();
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget)

Example 59 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class TcAnnotator method processDocument.

private void processDocument(JCas aJCas) throws AnalysisEngineProcessException {
    if (!JCasUtil.exists(aJCas, TextClassificationTarget.class)) {
        TextClassificationTarget aTarget = new TextClassificationTarget(aJCas, 0, aJCas.getDocumentText().length());
        aTarget.addToIndexes();
    }
    // we need an outcome annotation to be present
    if (!JCasUtil.exists(aJCas, TextClassificationOutcome.class)) {
        TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas);
        outcome.setOutcome("");
        outcome.addToIndexes();
    }
    // loaded from the model
    try {
        engine.process(aJCas);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
}
Also used : TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Aggregations

TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)59 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)27 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)19 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)18 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)16 CollectionReader (org.apache.uima.collection.CollectionReader)15 CollectionException (org.apache.uima.collection.CollectionException)9 CASException (org.apache.uima.cas.CASException)8 JCasId (org.dkpro.tc.api.type.JCasId)8 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)7 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)6 Test (org.junit.Test)6 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)5 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)5 File (java.io.File)5 IOException (java.io.IOException)4 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)4 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)4 HashSet (java.util.HashSet)3