Examples with Type - org.apache.uima.cas.Type

Example 11 with Type

use of org.apache.uima.cas.Type in project webanno by webanno.

the class WebannoTsv3Writer method setSpanAnnoPerFeature.

private void setSpanAnnoPerFeature(Map<AnnotationUnit, List<List<String>>> aAnnotationsPertype, Type aType, AnnotationFS aFs, AnnotationUnit aUnit, boolean aIsMultiToken, boolean aIsFirst) {
    List<String> annoPerFeatures = new ArrayList<>();
    featurePerLayer.putIfAbsent(aType.getName(), new LinkedHashSet<>());
    int ref = getRefId(aType, aFs, aUnit);
    if (ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)) != null && ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)).equals(false)) {
        ref = 0;
    }
    if (ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)) == null && ambigUnits.get(aType.getName()).get(aUnit).equals(false)) {
        ref = 0;
    }
    for (Feature feature : aType.getFeatures()) {
        if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT) || feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
            continue;
        }
        // if slot feature
        if (slotFeatures != null && slotFeatures.contains(feature.getName())) {
            if (aFs.getFeatureValue(feature) != null) {
                ArrayFS array = (ArrayFS) aFs.getFeatureValue(feature);
                StringBuilder sbRole = new StringBuilder();
                StringBuilder sbTarget = new StringBuilder();
                for (FeatureStructure linkFS : array.toArray()) {
                    String role = linkFS.getStringValue(linkFS.getType().getFeatureByBaseName("role"));
                    AnnotationFS targetFs = (AnnotationFS) linkFS.getFeatureValue(linkFS.getType().getFeatureByBaseName("target"));
                    Type tType = targetFs.getType();
                    AnnotationUnit firstUnit = getFirstUnit(targetFs);
                    ref = getRefId(tType, targetFs, firstUnit);
                    // Check if the target is ambiguous or not
                    if (ambigUnits.get(tType.getName()).get(firstUnit).equals(false)) {
                        ref = 0;
                    }
                    if (role == null) {
                        role = "*";
                    } else {
                        // Escape special character
                        role = replaceEscapeChars(role);
                    }
                    if (sbRole.length() < 1) {
                        sbRole.append(role);
                        // record the actual target type column number if slot target is
                        // uima.tcas.Annotation
                        int targetTypeNumber = 0;
                        if (slotFeatureTypes.get(feature).getName().equals(CAS.TYPE_NAME_ANNOTATION)) {
                            targetTypeNumber = layerMaps.get(tType);
                        }
                        sbTarget.append(unitsLineNumber.get(firstUnit)).append(targetTypeNumber == 0 ? "" : "-" + targetTypeNumber).append(ref > 0 ? "[" + ref + "]" : "");
                    } else {
                        sbRole.append(";");
                        sbTarget.append(";");
                        sbRole.append(role);
                        int targetTypeNumber = 0;
                        if (slotFeatureTypes.get(feature).getName().equals(CAS.TYPE_NAME_ANNOTATION)) {
                            targetTypeNumber = layerMaps.get(tType);
                        }
                        sbTarget.append(unitsLineNumber.get(firstUnit)).append(targetTypeNumber == 0 ? "" : "-" + targetTypeNumber).append(ref > 0 ? "[" + ref + "]" : "");
                    }
                }
                annoPerFeatures.add(sbRole.toString().isEmpty() ? "_" : sbRole.toString());
                annoPerFeatures.add(sbTarget.toString().isEmpty() ? "_" : sbTarget.toString());
            } else {
                // setting it to null
                annoPerFeatures.add("_");
                annoPerFeatures.add("_");
            }
            featurePerLayer.get(aType.getName()).add(ROLE + feature.getName() + "_" + slotLinkTypes.get(feature.getName()));
            featurePerLayer.get(aType.getName()).add(slotFeatureTypes.get(feature).getName());
        } else {
            String annotation = aFs.getFeatureValueAsString(feature);
            if (annotation == null) {
                annotation = "*";
            } else {
                // Escape special character
                annotation = replaceEscapeChars(annotation);
            }
            annotation = annotation + (ref > 0 ? "[" + ref + "]" : "");
            // only add BIO markers to multiple annotations
            setAnnoFeature(aIsMultiToken, aIsFirst, annoPerFeatures, annotation);
            featurePerLayer.get(aType.getName()).add(feature.getShortName());
        }
    }
    aAnnotationsPertype.putIfAbsent(aUnit, new ArrayList<>());
    // If the layer do not have a feature at all, add dummy * as a place holder
    if (annoPerFeatures.size() == 0) {
        setAnnoFeature(aIsMultiToken, aIsFirst, annoPerFeatures, "*" + (ref > 0 ? "[" + ref + "]" : ""));
    }
    aAnnotationsPertype.get(aUnit).add(annoPerFeatures);
}

Also used : ArrayList(java.util.ArrayList) Feature(org.apache.uima.cas.Feature) FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) ArrayFS(org.apache.uima.cas.ArrayFS)

Example 12 with Type

use of org.apache.uima.cas.Type in project webanno by webanno.

the class Tsv3XCasDocumentBuilder method of.

public static TsvDocument of(TsvSchema aSchema, JCas aJCas) {
    TsvFormatHeader format = new TsvFormatHeader("WebAnno TSV", "3.2");
    TsvDocument doc = new TsvDocument(format, aSchema, aJCas);
    // Fill document with all the sentences and tokens
    for (Sentence uimaSentence : select(aJCas, Sentence.class)) {
        TsvSentence sentence = doc.createSentence(uimaSentence);
        for (Token uimaToken : selectCovered(Token.class, uimaSentence)) {
            sentence.createToken(uimaToken);
        }
    }
    // Scan for chains
    for (Type headType : aSchema.getChainHeadTypes()) {
        for (FeatureStructure chainHead : CasUtil.selectFS(aJCas.getCas(), headType)) {
            List<AnnotationFS> elements = new ArrayList<>();
            AnnotationFS link = getFeature(chainHead, CHAIN_FIRST_FEAT, AnnotationFS.class);
            while (link != null) {
                elements.add(link);
                link = getFeature(link, CHAIN_NEXT_FEAT, AnnotationFS.class);
            }
            if (!elements.isEmpty()) {
                Type elementType = headType.getFeatureByBaseName(CHAIN_FIRST_FEAT).getRange();
                doc.createChain(headType, elementType, elements);
            }
        }
    }
    // Build indexes over the token start and end positions such that we can quickly locate
    // tokens based on their offsets.
    NavigableMap<Integer, TsvToken> tokenBeginIndex = new TreeMap<>();
    NavigableMap<Integer, TsvToken> tokenEndIndex = new TreeMap<>();
    List<TsvToken> tokens = new ArrayList<>();
    for (TsvSentence sentence : doc.getSentences()) {
        for (TsvToken token : sentence.getTokens()) {
            tokenBeginIndex.put(token.getBegin(), token);
            tokenEndIndex.put(token.getEnd(), token);
            tokens.add(token);
        }
    }
    // units.
    for (Type type : aSchema.getUimaTypes()) {
        LayerType layerType = aSchema.getLayerType(type);
        boolean addDisambiguationIdIfStacked = SPAN.equals(layerType);
        for (AnnotationFS annotation : CasUtil.select(aJCas.getCas(), type)) {
            doc.activateType(annotation.getType());
            // Get the relevant begin and end offsets for the current annotation
            int begin = annotation.getBegin();
            int end = annotation.getEnd();
            // to be sure.
            if (RELATION.equals(layerType)) {
                AnnotationFS targetFS = getFeature(annotation, FEAT_REL_TARGET, AnnotationFS.class);
                begin = targetFS.getBegin();
                end = targetFS.getEnd();
            }
            TsvToken beginToken = tokenBeginIndex.floorEntry(begin).getValue();
            TsvToken endToken = tokenEndIndex.ceilingEntry(end).getValue();
            // value obtained from the tokenBeginIndex.
            if (begin == end) {
                beginToken = endToken;
            }
            boolean singleToken = beginToken == endToken;
            boolean zeroWitdh = begin == end;
            boolean multiTokenCapable = SPAN.equals(layerType) || CHAIN.equals(layerType);
            // in either case.
            if (beginToken.getBegin() == begin && endToken.getEnd() == end) {
                doc.mapFS2Unit(annotation, beginToken);
                beginToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                if (multiTokenCapable) {
                    endToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                }
            } else if (zeroWitdh) {
                TsvSubToken t = beginToken.createSubToken(begin, min(beginToken.getEnd(), end));
                doc.mapFS2Unit(annotation, t);
                t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
            } else {
                // the annotation.
                if (beginToken.getBegin() < begin) {
                    TsvSubToken t = beginToken.createSubToken(begin, min(beginToken.getEnd(), end));
                    doc.mapFS2Unit(annotation, t);
                    t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                } else // If not the sub-token is ID-defining, then the begin token is ID-defining
                {
                    beginToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                    doc.mapFS2Unit(annotation, beginToken);
                }
                // checking if if singleToke is true.
                if (endToken.getEnd() > end) {
                    TsvSubToken t = endToken.createSubToken(max(endToken.getBegin(), begin), end);
                    t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                    if (!singleToken) {
                        doc.mapFS2Unit(annotation, t);
                    }
                } else if (!singleToken && multiTokenCapable) {
                    endToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                }
            }
            // the end token
            if (multiTokenCapable && !singleToken) {
                ListIterator<TsvToken> i = tokens.listIterator(tokens.indexOf(beginToken));
                TsvToken t;
                while ((t = i.next()) != endToken) {
                    if (t != beginToken) {
                        t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                    }
                }
            }
            // Multi-token span annotations must get a disambiguation ID
            if (SPAN.equals(layerType) && !singleToken) {
                doc.addDisambiguationId(annotation);
            }
        }
    }
    // Scan all created units to see which columns actually contains values
    for (TsvSentence sentence : doc.getSentences()) {
        for (TsvToken token : sentence.getTokens()) {
            scanUnitForActiveColumns(token);
            scanUnitForAmbiguousSlotReferences(token);
            for (TsvSubToken subToken : token.getSubTokens()) {
                scanUnitForActiveColumns(subToken);
                scanUnitForAmbiguousSlotReferences(subToken);
            }
        }
    }
    // Activate the placeholder columns for any active types for which no other columns are
    // active.
    Set<Type> activeTypesNeedingPlaceholders = new HashSet<>(doc.getActiveTypes());
    for (TsvColumn col : doc.getActiveColumns()) {
        activeTypesNeedingPlaceholders.remove(col.uimaType);
    }
    for (TsvColumn col : doc.getSchema().getColumns()) {
        if (PLACEHOLDER.equals(col.featureType) && activeTypesNeedingPlaceholders.contains(col.uimaType)) {
            doc.activateColumn(col);
        }
    }
    return doc;
}

Also used : TsvFormatHeader(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvFormatHeader) ArrayList(java.util.ArrayList) TsvToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) TsvSubToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSubToken) TsvSentence(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSentence) TreeMap(java.util.TreeMap) TsvSubToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSubToken) FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) TsvToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) TsvSentence(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSentence) HashSet(java.util.HashSet)

Example 13 with Type

use of org.apache.uima.cas.Type in project webanno by webanno.

the class Tsv3XCasSchemaAnalyzer method analyze.

public static TsvSchema analyze(TypeSystem aTypeSystem) {
    TsvSchema schema = new TsvSchema();
    Set<Type> chainLinkTypes = new HashSet<>();
    // Consider only direct subtypes of the UIMA Annotation type. Currently, WebAnno only
    // supports such layers.
    Type annotationType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION);
    Type documentAnnotationType = aTypeSystem.getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION);
    for (Type type : aTypeSystem.getDirectSubtypes(annotationType)) {
        if (aTypeSystem.subsumes(documentAnnotationType, type)) {
            continue;
        }
        if (type.getName().equals(Token.class.getName()) || type.getName().equals(Sentence.class.getName())) {
            continue;
        }
        switch(schema.getLayerType(type)) {
            case RELATION:
                schema.addColumn(new TsvColumn(type, RELATION, type.getFeatureByBaseName(FEAT_REL_SOURCE), RELATION_REF));
                generateColumns(aTypeSystem, schema, RELATION, type);
                break;
            case CHAIN:
                schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_TYPE_FEATURE), CHAIN_ELEMENT_TYPE));
                schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_RELATION_FEATURE), CHAIN_LINK_TYPE));
                chainLinkTypes.add(type);
                break;
            case SPAN:
                schema.addColumn(new TsvColumn(type, SPAN));
                generateColumns(aTypeSystem, schema, SPAN, type);
                break;
            case INCOMPATIBLE:
                // Do not generate a column definition for incompatible types.
                break;
        }
    }
    // Scan again for the chain head types
    Type topType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION_BASE);
    for (Type type : aTypeSystem.getDirectSubtypes(topType)) {
        Feature firstFeat = type.getFeatureByBaseName(CHAIN_FIRST_FEAT);
        if (firstFeat != null && chainLinkTypes.contains(firstFeat.getRange())) {
            schema.addChainHeadType(type);
        }
    }
    return schema;
}

Also used : LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) FeatureType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Feature(org.apache.uima.cas.Feature) HashSet(java.util.HashSet)

Example 14 with Type

use of org.apache.uima.cas.Type in project webanno by webanno.

the class Tsv3XDeserializer method parseColumnDeclaration.

private TsvColumn parseColumnDeclaration(JCas aJCas, LayerType aLayerType, Type aUimaType, int aIndex, String aColDecl, TsvColumn aPrevCol) throws IOException {
    TypeSystem ts = aJCas.getTypeSystem();
    TsvColumn column;
    // SLOT_ROLE - starts with "ROLE_"
    if (SPAN.equals(aLayerType) && startsWith(aColDecl, HEADER_PREFIX_ROLE)) {
        String[] subFields = splitPreserveAllTokens(aColDecl, '_');
        String featureName = substringAfter(subFields[1], ":");
        Feature feat = aUimaType.getFeatureByBaseName(featureName);
        if (feat == null) {
            throw new IOException("CAS type [" + aUimaType.getName() + "] does not have a feature called [" + featureName + "]");
        }
        column = new TsvColumn(aIndex, aUimaType, aLayerType, featureName, SLOT_ROLE);
        String typeName = subFields[2];
        Type type = ts.getType(typeName);
        if (type == null) {
            throw new IOException("CAS does not contain a type called [" + typeName + "]");
        }
        column.setTargetTypeHint(type);
    } else // RELATION_REF - starts with "BT_
    if (RELATION.equals(aLayerType) && startsWith(aColDecl, HEADER_PREFIX_BASE_TYPE)) {
        column = new TsvColumn(aIndex, aUimaType, aLayerType, FEAT_REL_SOURCE, RELATION_REF);
        String typeName = substringAfter(aColDecl, HEADER_PREFIX_BASE_TYPE);
        Type type = ts.getType(typeName);
        if (type == null) {
            throw new IOException("CAS does not contain a type called [" + typeName + "]");
        }
        column.setTargetTypeHint(type);
    } else // CHAIN_ELEMENT_TYPE - "referenceType"
    if (CHAIN.equals(aLayerType) && COREFERENCE_TYPE_FEATURE.equals(aColDecl)) {
        column = new TsvColumn(aIndex, aUimaType, aLayerType, COREFERENCE_TYPE_FEATURE, CHAIN_ELEMENT_TYPE);
    } else // CHAIN_LINK_TYPE - "referenceRelation"
    if (CHAIN.equals(aLayerType) && COREFERENCE_RELATION_FEATURE.equals(aColDecl)) {
        column = new TsvColumn(aIndex, aUimaType, aLayerType, COREFERENCE_RELATION_FEATURE, CHAIN_LINK_TYPE);
    } else // SLOT_TARGET - name of the link target type
    if (SPAN.equals(aLayerType) && aColDecl.contains(".") || ts.getType(aColDecl) != null) {
        // the type name really exists in the target CAS.
        if (ts.getType(aColDecl) == null) {
            throw new IOException("CAS type system does not contain a type named [" + aColDecl + "]");
        }
        // name from it.
        if (aPrevCol == null || !SLOT_ROLE.equals(aPrevCol.featureType)) {
            throw new IOException("Slot target column declaration must follow slot role column declaration");
        }
        column = new TsvColumn(aIndex, aUimaType, aLayerType, aPrevCol.uimaFeature.getShortName(), SLOT_TARGET);
        Type type = ts.getType(aColDecl);
        if (type == null) {
            throw new IOException("CAS does not contain a type called [" + aColDecl + "]");
        }
        column.setTargetTypeHint(type);
    } else // PRIMITIVE - feature name
    if (aUimaType.getFeatureByBaseName(aColDecl) != null) {
        column = new TsvColumn(aIndex, aUimaType, aLayerType, aColDecl, PRIMITIVE);
    } else {
        throw new IOException("Type [" + aUimaType.getName() + "] does not contain a feature called [" + aColDecl + "]");
    }
    return column;
}

Also used : TypeSystem(org.apache.uima.cas.TypeSystem) Type(org.apache.uima.cas.Type) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) IOException(java.io.IOException) FSUtil.setFeature(org.apache.uima.fit.util.FSUtil.setFeature) FSUtil.getFeature(org.apache.uima.fit.util.FSUtil.getFeature) Feature(org.apache.uima.cas.Feature)

Example 15 with Type

use of org.apache.uima.cas.Type in project webanno by webanno.

the class Conll2009Reader method convert.

public void convert(JCas aJCas, BufferedReader aReader) throws IOException {
    if (readPos) {
        try {
            posMappingProvider.configure(aJCas.getCas());
        } catch (AnalysisEngineProcessException e) {
            throw new IOException(e);
        }
    }
    JCasBuilder doc = new JCasBuilder(aJCas);
    List<String[]> words;
    while ((words = readSentence(aReader)) != null) {
        if (words.isEmpty()) {
            // markers following each other.
            continue;
        }
        int sentenceBegin = doc.getPosition();
        int sentenceEnd = sentenceBegin;
        // Tokens, Lemma, POS
        Map<Integer, Token> tokens = new HashMap<Integer, Token>();
        List<SemPred> preds = new ArrayList<>();
        Iterator<String[]> wordIterator = words.iterator();
        while (wordIterator.hasNext()) {
            String[] word = wordIterator.next();
            // Read token
            Token token = doc.add(word[FORM], Token.class);
            tokens.put(Integer.valueOf(word[ID]), token);
            if (wordIterator.hasNext()) {
                doc.add(" ");
            }
            // Read lemma
            if (!UNUSED.equals(word[LEMMA]) && readLemma) {
                Lemma lemma = new Lemma(aJCas, token.getBegin(), token.getEnd());
                lemma.setValue(word[LEMMA]);
                lemma.addToIndexes();
                token.setLemma(lemma);
            }
            // Read part-of-speech tag
            if (!UNUSED.equals(word[POS]) && readPos) {
                Type posTag = posMappingProvider.getTagType(word[POS]);
                POS pos = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd());
                pos.setPosValue(word[POS].intern());
                // WebAnno did not yet backport the coarse grained POS feature from
                // DKPro Core 1.9.0
                // POSUtils.assignCoarseValue(pos);
                pos.addToIndexes();
                token.setPos(pos);
            }
            // Read morphological features
            if (!UNUSED.equals(word[FEAT]) && readMorph) {
                MorphologicalFeatures morphtag = new MorphologicalFeatures(aJCas, token.getBegin(), token.getEnd());
                morphtag.setValue(word[FEAT]);
                morphtag.addToIndexes();
            }
            if (!UNUSED.equals(word[PRED]) && readSemanticPredicate) {
                SemPred pred = new SemPred(aJCas, token.getBegin(), token.getEnd());
                pred.setCategory(word[PRED]);
                pred.addToIndexes();
                preds.add(pred);
            }
            sentenceEnd = token.getEnd();
        }
        // Dependencies
        if (readDependency) {
            for (String[] word : words) {
                if (!UNUSED.equals(word[DEPREL])) {
                    int depId = Integer.valueOf(word[ID]);
                    int govId = Integer.valueOf(word[HEAD]);
                    // Model the root as a loop onto itself
                    if (govId == 0) {
                        // Not using ROOT here because WebAnno cannot deal with elevated
                        // types
                        Dependency rel = new Dependency(aJCas);
                        rel.setGovernor(tokens.get(depId));
                        rel.setDependent(tokens.get(depId));
                        rel.setDependencyType(word[DEPREL]);
                        rel.setBegin(rel.getDependent().getBegin());
                        rel.setEnd(rel.getDependent().getEnd());
                        // This is set via FSUtil because we still use the DKPro Core 1.7.0 JCas
                        // classes
                        FSUtil.setFeature(rel, "flavor", DependencyFlavor.BASIC);
                        rel.addToIndexes();
                    } else {
                        Dependency rel = new Dependency(aJCas);
                        rel.setGovernor(tokens.get(govId));
                        rel.setDependent(tokens.get(depId));
                        rel.setDependencyType(word[DEPREL]);
                        rel.setBegin(rel.getDependent().getBegin());
                        rel.setEnd(rel.getDependent().getEnd());
                        // This is set via FSUtil because we still use the DKPro Core 1.7.0 JCas
                        // classes
                        FSUtil.setFeature(rel, "flavor", DependencyFlavor.BASIC);
                        rel.addToIndexes();
                    }
                }
            }
        }
        // Semantic arguments
        if (readSemanticPredicate) {
            // Get arguments for one predicate at a time
            for (int p = 0; p < preds.size(); p++) {
                List<SemArgLink> args = new ArrayList<>();
                for (String[] word : words) {
                    if (!UNUSED.equals(word[APRED + p])) {
                        Token token = tokens.get(Integer.valueOf(word[ID]));
                        SemArg arg = new SemArg(aJCas, token.getBegin(), token.getEnd());
                        arg.addToIndexes();
                        SemArgLink link = new SemArgLink(aJCas);
                        link.setRole(word[APRED + p]);
                        link.setTarget(arg);
                        args.add(link);
                    }
                }
                SemPred pred = preds.get(p);
                pred.setArguments(FSCollectionFactory.createFSArray(aJCas, args));
            }
        }
        // Sentence
        Sentence sentence = new Sentence(aJCas, sentenceBegin, sentenceEnd);
        sentence.addToIndexes();
        // Once sentence per line.
        doc.add("\n");
    }
    doc.close();
}

Also used : MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SemArgLink(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) IOException(java.io.IOException) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) Type(org.apache.uima.cas.Type) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) JCasBuilder(org.apache.uima.fit.factory.JCasBuilder) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) SemPred(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) SemArg(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg)

Aggregations

Type (org.apache.uima.cas.Type)160 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)112 JCas (org.apache.uima.jcas.JCas)75 ArrayList (java.util.ArrayList)72 Feature (org.apache.uima.cas.Feature)62 Test (org.junit.Test)59 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)54 CAS (org.apache.uima.cas.CAS)47 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)40 FeatureStructure (org.apache.uima.cas.FeatureStructure)39 CasUtil.getType (org.apache.uima.fit.util.CasUtil.getType)29 List (java.util.List)25 IOException (java.io.IOException)17 HashMap (java.util.HashMap)15 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)14 LinkedHashMap (java.util.LinkedHashMap)14 NamedEntity (de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity)13 Map (java.util.Map)13 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)13 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)13