Search in sources :

Example 21 with FeatureStructure

use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method makeChainHead.

private static void makeChainHead(Type aType, AnnotationFS first) {
    CAS cas = first.getCAS();
    FeatureStructure h = cas.createFS(aType);
    FSUtil.setFeature(h, "first", first);
    cas.addFsToIndexes(h);
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) CAS(org.apache.uima.cas.CAS)

Example 22 with FeatureStructure

use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method testZeroLengthSlotFeature2.

@Test
public void testZeroLengthSlotFeature2() throws Exception {
    JCas jcas = makeJCasOneSentence();
    CAS cas = jcas.getCas();
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
    Token t1 = tokens.get(0);
    Token t2 = tokens.get(1);
    Token t3 = tokens.get(2);
    Type type = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
    AnnotationFS s2 = cas.createAnnotation(type, t2.getBegin(), t3.getEnd());
    cas.addFsToIndexes(s2);
    AnnotationFS s3 = cas.createAnnotation(type, t3.getEnd(), t3.getEnd());
    cas.addFsToIndexes(s3);
    FeatureStructure link1 = makeLinkFS(jcas, "p1", s2);
    FeatureStructure link2 = makeLinkFS(jcas, "p2", s3);
    makeLinkHostFS(jcas, t1.getBegin(), t1.getEnd(), link1, link2);
    writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.SimpleLinkHost:links"), WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan", "webanno.custom.SimpleLinkHost"), WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.LinkType"), WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"));
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Test(org.junit.Test)

Example 23 with FeatureStructure

use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method testMultiTokenStackedSlotFeature.

@Test
public void testMultiTokenStackedSlotFeature() throws Exception {
    JCas jcas = makeJCasOneSentence();
    CAS cas = jcas.getCas();
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
    Token t1 = tokens.get(0);
    Token t2 = tokens.get(1);
    Token t3 = tokens.get(2);
    Type type = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
    AnnotationFS s2 = cas.createAnnotation(type, t2.getBegin(), t3.getEnd());
    cas.addFsToIndexes(s2);
    AnnotationFS s3 = cas.createAnnotation(type, t2.getBegin(), t3.getEnd());
    cas.addFsToIndexes(s3);
    FeatureStructure link1 = makeLinkFS(jcas, "p1", s2);
    FeatureStructure link2 = makeLinkFS(jcas, "p2", s3);
    makeLinkHostFS(jcas, t1.getBegin(), t1.getEnd(), link1, link2);
    writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.SimpleLinkHost:links"), WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan", "webanno.custom.SimpleLinkHost"), WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.LinkType"), WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"));
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Test(org.junit.Test)

Example 24 with FeatureStructure

use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method testSimpleSameRoleSlotFeature.

@Test
public void testSimpleSameRoleSlotFeature() throws Exception {
    JCas jcas = makeJCasOneSentence();
    CAS cas = jcas.getCas();
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
    Token t1 = tokens.get(0);
    Token t2 = tokens.get(1);
    Token t3 = tokens.get(2);
    Type type = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
    AnnotationFS s2 = cas.createAnnotation(type, t2.getBegin(), t2.getEnd());
    cas.addFsToIndexes(s2);
    AnnotationFS s3 = cas.createAnnotation(type, t3.getBegin(), t3.getEnd());
    cas.addFsToIndexes(s3);
    FeatureStructure link1 = makeLinkFS(jcas, "p1", s2);
    FeatureStructure link2 = makeLinkFS(jcas, "p1", s3);
    makeLinkHostFS(jcas, t1.getBegin(), t1.getEnd(), link1, link2);
    writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.SimpleLinkHost:links"), WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan", "webanno.custom.SimpleLinkHost"), WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.LinkType"), WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"));
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Test(org.junit.Test)

Example 25 with FeatureStructure

use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.

the class Tsv3XDeserializer method readContent.

private void readContent(LineNumberReader aIn, TsvDocument aDoc) throws IOException {
    StringBuilder text = new StringBuilder();
    State prevState = State.INTER_SENTENCE_SPACE;
    State state = State.INTER_SENTENCE_SPACE;
    StringBuilder sentenceText = new StringBuilder();
    TsvSentence prevSentence = null;
    TsvSentence sentence = null;
    TsvToken token = null;
    String line = aIn.readLine();
    while (!State.END.equals(state)) {
        // These variables are only used in TOKEN and SUBTOKEN states.
        String[] fields = null;
        String id = null;
        String[] offsets = null;
        int begin = -1;
        int end = -1;
        // Determine the status of the current line
        if (startsWith(line, PREFIX_TEXT)) {
            state = State.SENTENCE;
        } else if (line == null) {
            state = State.END;
        } else if (isEmpty(line)) {
            state = State.INTER_SENTENCE_SPACE;
        } else {
            fields = splitPreserveAllTokens(line, FIELD_SEPARATOR);
            // Get token metadata
            id = fields[0];
            offsets = split(fields[1], "-");
            begin = Integer.valueOf(offsets[0]);
            end = Integer.valueOf(offsets[1]);
            // TOKEN or SUBTOKEN?
            if (id.contains(".")) {
                state = State.SUBTOKEN;
            } else {
                state = State.TOKEN;
            }
        }
        // Assert that the order of information in the file is correct
        switch(prevState) {
            case INTER_SENTENCE_SPACE:
                if (!State.SENTENCE.equals(state)) {
                    throw new IOException("Line " + aIn.getLineNumber() + ": Expected sentence header but got [" + state + "]");
                }
                break;
            case SENTENCE:
                if (!(State.SENTENCE.equals(state) || State.TOKEN.equals(state))) {
                    throw new IOException("Line " + aIn.getLineNumber() + ": Expected sentence header or token but got [" + state + "]");
                }
                break;
            case TOKEN:
            case SUBTOKEN:
                if (!(State.INTER_SENTENCE_SPACE.equals(state) || State.END.equals(state) || State.TOKEN.equals(state) || State.SUBTOKEN.equals(state))) {
                    throw new IOException("Line " + aIn.getLineNumber() + ": Expected token, sub-token or sentence break but got [" + state + "]");
                }
                break;
        }
        // Do the actual parsing
        switch(state) {
            case END:
            case INTER_SENTENCE_SPACE:
                // End of sentence action
                // The -1 here is to account for the tailing line break
                sentence.getUimaSentence().setEnd(text.length() - 1);
                sentence.getUimaSentence().addToIndexes();
                prevSentence = sentence;
                sentence = null;
                break;
            case TOKEN:
                // End of sentence header action
                if (State.SENTENCE.equals(prevState)) {
                    // last sentence!
                    if (text.length() > begin) {
                        assert text.length() == begin + 1;
                        assert text.charAt(text.length() - 1) == LINE_BREAK;
                        text.setLength(text.length() - 1);
                    }
                    // the gap.
                    if (text.length() < begin) {
                        text.append(repeat(' ', begin - text.length()));
                    }
                    assert text.length() == begin;
                    assert sentence == null;
                    Sentence uimaSentence = new Sentence(aDoc.getJCas());
                    uimaSentence.setBegin(text.length());
                    sentence = aDoc.createSentence(uimaSentence);
                    text.append(sentenceText);
                    sentenceText.setLength(0);
                }
                // Token parsing action
                Token uimaToken = new Token(aDoc.getJCas(), begin, end);
                uimaToken.addToIndexes();
                token = sentence.createToken(uimaToken);
                // Read annotations from the columns
                parseAnnotations(aDoc, sentence, token, fields);
                break;
            case SUBTOKEN:
                // Read annotations from the columns
                TsvSubToken subToken = token.createSubToken(begin, end);
                parseAnnotations(aDoc, sentence, subToken, fields);
                break;
            case SENTENCE:
                // Header parsing action
                String textFragment = substringAfter(line, "=");
                textFragment = unescapeText(aDoc.getFormatHeader(), textFragment);
                sentenceText.append(textFragment);
                sentenceText.append(LINE_BREAK);
                break;
        }
        prevState = state;
        line = aIn.readLine();
    }
    aDoc.getJCas().setDocumentText(text.toString());
    // After all data has been read, we also add the annotations with disambiguation ID to
    // the CAS indexes. This ensures we only add them after their final begin/end offsets
    // have been determined since most of these annotations are actually multi-token
    // annotations.
    CAS cas = aDoc.getJCas().getCas();
    Set<FeatureStructure> fses = new LinkedHashSet<>();
    for (TsvSentence s : aDoc.getSentences()) {
        for (TsvToken t : s.getTokens()) {
            for (Type type : t.getUimaTypes()) {
                fses.addAll(t.getUimaAnnotations(type));
            }
            for (TsvSubToken st : t.getSubTokens()) {
                for (Type type : st.getUimaTypes()) {
                    fses.addAll(st.getUimaAnnotations(type));
                }
            }
        }
    }
    fses.forEach(cas::addFsToIndexes);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TsvToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken) TsvSubToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSubToken) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) TsvSentence(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSentence) IOException(java.io.IOException) TsvSubToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSubToken) FeatureStructure(org.apache.uima.cas.FeatureStructure) Type(org.apache.uima.cas.Type) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) CAS(org.apache.uima.cas.CAS) TsvToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) TsvSentence(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSentence)

Aggregations

FeatureStructure (org.apache.uima.cas.FeatureStructure)60 Type (org.apache.uima.cas.Type)38 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)37 ArrayList (java.util.ArrayList)29 JCas (org.apache.uima.jcas.JCas)20 Feature (org.apache.uima.cas.Feature)17 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)16 CAS (org.apache.uima.cas.CAS)16 Test (org.junit.Test)16 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)13 List (java.util.List)12 ArrayFS (org.apache.uima.cas.ArrayFS)8 Arrays.asList (java.util.Arrays.asList)6 LinkedHashMap (java.util.LinkedHashMap)6 CasUtil.getType (org.apache.uima.fit.util.CasUtil.getType)6 AnnotationLayer (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer)5 AnnotatorState (de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.AnnotatorState)4 LogMessage (de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage)4 TsvColumn (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)4 Map (java.util.Map)4