Search in sources :

Example 1 with TextClassificationSequence

use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.

the class BrownCorpusReader method getNext.

@Override
public void getNext(CAS cas) throws IOException, CollectionException {
    super.getNext(cas);
    JCas jcas;
    try {
        jcas = cas.getJCas();
    } catch (CASException e) {
        throw new CollectionException(e);
    }
    for (Sentence sentence : JCasUtil.select(jcas, Sentence.class)) {
        TextClassificationSequence sequence = new TextClassificationSequence(jcas, sentence.getBegin(), sentence.getEnd());
        sequence.addToIndexes();
        for (Token token : JCasUtil.selectCovered(jcas, Token.class, sentence)) {
            TextClassificationTarget unit = new TextClassificationTarget(jcas, token.getBegin(), token.getEnd());
            // will add the token content as a suffix to the ID of this unit
            unit.setSuffix(token.getCoveredText());
            unit.addToIndexes();
            TextClassificationOutcome outcome = new TextClassificationOutcome(jcas, token.getBegin(), token.getEnd());
            outcome.setOutcome(getTextClassificationOutcome(jcas, unit));
            outcome.addToIndexes();
        }
    }
}
Also used : CollectionException(org.apache.uima.collection.CollectionException) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) CASException(org.apache.uima.cas.CASException) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 2 with TextClassificationSequence

use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.

the class SequenceOutcomeReaderTest method testSkipLineReader.

@Test
public void testSkipLineReader() throws Exception {
    CollectionReader reader = CollectionReaderFactory.createReader(SequenceOutcomeReader.class, SequenceOutcomeReader.PARAM_SOURCE_LOCATION, "src/test/resources/sequence/posDummy.txt", SequenceOutcomeReader.PARAM_SKIP_LINES_START_WITH_STRING, "#");
    List<List<String>> readSequences = new ArrayList<>();
    List<List<String>> readOutcomes = new ArrayList<>();
    while (reader.hasNext()) {
        JCas theJCas = JCasFactory.createJCas();
        reader.getNext(theJCas.getCas());
        Collection<TextClassificationSequence> sequence = JCasUtil.select(theJCas, TextClassificationSequence.class);
        for (TextClassificationSequence s : sequence) {
            List<TextClassificationTarget> targets = JCasUtil.selectCovered(theJCas, TextClassificationTarget.class, s);
            List<String> tokens = new ArrayList<>();
            for (TextClassificationTarget target : targets) {
                tokens.add(target.getCoveredText());
            }
            readSequences.add(tokens);
        }
        Collection<TextClassificationOutcome> outcomeAnnotations = JCasUtil.select(theJCas, TextClassificationOutcome.class);
        List<String> outcomes = new ArrayList<>();
        for (TextClassificationOutcome o : outcomeAnnotations) {
            outcomes.add(o.getOutcome());
        }
        readOutcomes.add(outcomes);
    }
    assertEquals(4, readSequences.get(1).size());
    // 2 - tokens
    assertEquals("This2", readSequences.get(1).get(0));
    assertEquals("is2", readSequences.get(1).get(1));
    assertEquals("a2", readSequences.get(1).get(2));
    assertEquals("!", readSequences.get(1).get(3));
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) List(java.util.List) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence) Test(org.junit.Test)

Example 3 with TextClassificationSequence

use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.

the class SequenceOutcomeReader method setTextClassificationSequence.

protected void setTextClassificationSequence(JCas aJCas, int begin, int end) {
    TextClassificationSequence aSequence = new TextClassificationSequence(aJCas, begin, end);
    aSequence.addToIndexes();
}
Also used : TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence)

Example 4 with TextClassificationSequence

use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.

the class TestTaskUtils method initJCas.

private JCas initJCas(boolean setUnitIdAsPartOfTheInstanceId) throws Exception {
    AnalysisEngine engine = AnalysisEngineFactory.createEngine(NoOpAnnotator.class);
    JCas jCas = engine.newJCas();
    JCasId id = new JCasId(jCas);
    id.setId(4711);
    id.addToIndexes();
    DocumentMetaData meta = new DocumentMetaData(jCas);
    meta.setDocumentTitle("title");
    meta.setDocumentId("4711");
    meta.addToIndexes();
    String[][] tokens = { // sequence 1
    { "a", "DT" }, // sequence 1
    { "car", "NN" }, // sequence 1
    { "drives", "VBZ" }, // sequence 2
    { "the", "DT" }, // sequence 2
    { "hedgehogs", "NN" }, // sequence 2
    { "dies", "VBZ" } };
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < tokens.length; i++) {
        int start = sb.length();
        int end = start + tokens[i][0].length();
        TextClassificationTarget unit = new TextClassificationTarget(jCas, start, end);
        if (setUnitIdAsPartOfTheInstanceId) {
            unit.setSuffix(tokens[i][0]);
        }
        unit.setId(i);
        unit.addToIndexes();
        TextClassificationOutcome outcome = new TextClassificationOutcome(jCas, start, end);
        outcome.setOutcome(tokens[i][1]);
        outcome.addToIndexes();
        sb.append(tokens[i][0]);
        if (i + 1 < tokens.length) {
            sb.append(" ");
        }
    }
    String text = sb.toString();
    jCas.setDocumentText(text);
    int lenSeq1 = tokens[0][0].length() + 1 + tokens[1][0].length() + 1 + tokens[2][0].length();
    TextClassificationSequence seq1 = new TextClassificationSequence(jCas, 0, lenSeq1);
    seq1.addToIndexes();
    TextClassificationSequence seq2 = new TextClassificationSequence(jCas, lenSeq1 + 1, text.length());
    seq2.addToIndexes();
    return jCas;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 5 with TextClassificationSequence

use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.

the class FoldClassificationUnitCasMultiplier method setTargetAnnotation.

private void setTargetAnnotation(JCas copyJCas) {
    if (useSequences) {
        for (AnnotationFS s : buf) {
            TextClassificationSequence seq = new TextClassificationSequence(copyJCas, s.getBegin(), s.getEnd());
            seq.addToIndexes();
            seq.setId(seqCounter++);
            // re-add the units that are covered by those sequences
            for (TextClassificationTarget u : seqModeUnitsCoveredBySequenceAnno) {
                u.addToIndexes();
            }
            seqModeUnitsCoveredBySequenceAnno = new ArrayList<>();
        }
    } else {
        for (AnnotationFS u : buf) {
            TextClassificationTarget unit = new TextClassificationTarget(copyJCas, u.getBegin(), u.getEnd());
            unit.addToIndexes();
            unit.setId(unitCounter);
            unitCounter++;
        }
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence)

Aggregations

TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)15 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)11 ArrayList (java.util.ArrayList)6 JCas (org.apache.uima.jcas.JCas)6 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)6 List (java.util.List)4 CollectionReader (org.apache.uima.collection.CollectionReader)4 Test (org.junit.Test)3 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)2 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)2 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)2 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)2 Instance (org.dkpro.tc.api.features.Instance)2 JCasId (org.dkpro.tc.api.type.JCasId)2 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)1 File (java.io.File)1 IOException (java.io.IOException)1 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)1 CASException (org.apache.uima.cas.CASException)1 Type (org.apache.uima.cas.Type)1