Search in sources :

Example 1 with FSArray

use of org.apache.uima.jcas.cas.FSArray in project webanno by webanno.

the class Conll2009Writer method convert.

private void convert(JCas aJCas, PrintWriter aOut) {
    Map<Token, Collection<SemPred>> predIdx = indexCovered(aJCas, Token.class, SemPred.class);
    Map<SemArg, Collection<Token>> argIdx = indexCovered(aJCas, SemArg.class, Token.class);
    for (Sentence sentence : select(aJCas, Sentence.class)) {
        HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>();
        // Tokens
        List<Token> tokens = selectCovered(Token.class, sentence);
        // Check if we should try to include the FEATS in output
        List<MorphologicalFeatures> morphology = selectCovered(MorphologicalFeatures.class, sentence);
        boolean useFeats = tokens.size() == morphology.size();
        List<SemPred> preds = selectCovered(SemPred.class, sentence);
        for (int i = 0; i < tokens.size(); i++) {
            Row row = new Row();
            row.id = i + 1;
            row.token = tokens.get(i);
            row.args = new SemArgLink[preds.size()];
            if (useFeats) {
                row.feats = morphology.get(i);
            }
            // If there are multiple semantic predicates for the current token, then
            // we keep only the first
            Collection<SemPred> predsForToken = predIdx.get(row.token);
            if (predsForToken != null && !predsForToken.isEmpty()) {
                row.pred = predsForToken.iterator().next();
            }
            ctokens.put(row.token, row);
        }
        // Dependencies
        List<Dependency> basicDeps = selectCovered(Dependency.class, sentence).stream().filter(dep -> {
            String flavor = FSUtil.getFeature(dep, "flavor", String.class);
            return flavor == null || DependencyFlavor.BASIC.equals(flavor);
        }).collect(Collectors.toList());
        for (Dependency rel : basicDeps) {
            Row row = ctokens.get(rel.getDependent());
            if (row.deprel != null) {
                throw new IllegalStateException("Illegal basic dependency structure - token [" + row.token.getCoveredText() + "] is dependent of more than one dependency.");
            }
            row.deprel = rel;
        }
        // Semantic arguments
        for (int p = 0; p < preds.size(); p++) {
            FSArray args = preds.get(p).getArguments();
            for (SemArgLink arg : select(args, SemArgLink.class)) {
                for (Token t : argIdx.get(arg.getTarget())) {
                    Row row = ctokens.get(t);
                    row.args[p] = arg;
                }
            }
        }
        // Write sentence in CONLL 2009 format
        for (Row row : ctokens.values()) {
            int id = row.id;
            String form = row.token.getCoveredText();
            String lemma = UNUSED;
            if (writeLemma && (row.token.getLemma() != null)) {
                lemma = row.token.getLemma().getValue();
            }
            String plemma = lemma;
            String pos = UNUSED;
            if (writePos && (row.token.getPos() != null)) {
                POS posAnno = row.token.getPos();
                pos = posAnno.getPosValue();
            }
            String ppos = pos;
            String feat = UNUSED;
            if (writeMorph && (row.feats != null)) {
                feat = row.feats.getValue();
            }
            String pfeat = feat;
            int headId = UNUSED_INT;
            String deprel = UNUSED;
            if (writeDependency && (row.deprel != null)) {
                deprel = row.deprel.getDependencyType();
                headId = ctokens.get(row.deprel.getGovernor()).id;
                if (headId == row.id) {
                    // ROOT dependencies may be modeled as a loop, ignore these.
                    headId = 0;
                }
            }
            String head = UNUSED;
            if (headId != UNUSED_INT) {
                head = Integer.toString(headId);
            }
            String phead = head;
            String pdeprel = deprel;
            String fillpred = UNUSED;
            String pred = UNUSED;
            StringBuilder apreds = new StringBuilder();
            if (writeSemanticPredicate) {
                if (row.pred != null) {
                    fillpred = "Y";
                    pred = row.pred.getCategory();
                }
                for (SemArgLink arg : row.args) {
                    if (apreds.length() > 0) {
                        apreds.append('\t');
                    }
                    apreds.append(arg != null ? arg.getRole() : UNUSED);
                }
            }
            aOut.printf("%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", id, form, lemma, plemma, pos, ppos, feat, pfeat, head, phead, deprel, pdeprel, fillpred, pred, apreds);
        }
        aOut.println();
    }
}
Also used : FSArray(org.apache.uima.jcas.cas.FSArray) JCasFileWriter_ImplBase(de.tudarmstadt.ukp.dkpro.core.api.io.JCasFileWriter_ImplBase) SemArgLink(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink) HashMap(java.util.HashMap) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) FSUtil(org.apache.uima.fit.util.FSUtil) LinkedHashMap(java.util.LinkedHashMap) TypeCapability(org.apache.uima.fit.descriptor.TypeCapability) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) SemPred(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) ConfigurationParameter(org.apache.uima.fit.descriptor.ConfigurationParameter) DependencyFlavor(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.DependencyFlavor) Map(java.util.Map) OutputStreamWriter(java.io.OutputStreamWriter) SemArg(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg) PrintWriter(java.io.PrintWriter) JCas(org.apache.uima.jcas.JCas) ResourceMetaData(org.apache.uima.fit.descriptor.ResourceMetaData) MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) JCasUtil.indexCovered(org.apache.uima.fit.util.JCasUtil.indexCovered) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) ComponentParameters(de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters) IOUtils.closeQuietly(org.apache.commons.io.IOUtils.closeQuietly) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) List(java.util.List) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) JCasUtil.select(org.apache.uima.fit.util.JCasUtil.select) JCasUtil.selectCovered(org.apache.uima.fit.util.JCasUtil.selectCovered) SemArgLink(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) LinkedHashMap(java.util.LinkedHashMap) FSArray(org.apache.uima.jcas.cas.FSArray) SemPred(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Collection(java.util.Collection) SemArg(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg)

Example 2 with FSArray

use of org.apache.uima.jcas.cas.FSArray in project tika by apache.

the class CTAKESUtils method getAnnotationProperty.

/**
	 * Returns the annotation value based on the given annotation type.
	 * 
	 * @param annotation
	 *            {@see IdentifiedAnnotation} object.
	 * @param property
	 *            {@see CTAKESAnnotationProperty} enum used to identify the
	 *            annotation type.
	 * @return the annotation value.
	 */
public static String getAnnotationProperty(IdentifiedAnnotation annotation, CTAKESAnnotationProperty property) {
    String value = null;
    if (property == CTAKESAnnotationProperty.BEGIN) {
        value = Integer.toString(annotation.getBegin());
    } else if (property == CTAKESAnnotationProperty.END) {
        value = Integer.toString(annotation.getEnd());
    } else if (property == CTAKESAnnotationProperty.CONDITIONAL) {
        value = Boolean.toString(annotation.getConditional());
    } else if (property == CTAKESAnnotationProperty.CONFIDENCE) {
        value = Float.toString(annotation.getConfidence());
    } else if (property == CTAKESAnnotationProperty.DISCOVERY_TECNIQUE) {
        value = Integer.toString(annotation.getDiscoveryTechnique());
    } else if (property == CTAKESAnnotationProperty.GENERIC) {
        value = Boolean.toString(annotation.getGeneric());
    } else if (property == CTAKESAnnotationProperty.HISTORY_OF) {
        value = Integer.toString(annotation.getHistoryOf());
    } else if (property == CTAKESAnnotationProperty.ID) {
        value = Integer.toString(annotation.getId());
    } else if (property == CTAKESAnnotationProperty.ONTOLOGY_CONCEPT_ARR) {
        FSArray mentions = annotation.getOntologyConceptArr();
        StringBuilder sb = new StringBuilder();
        if (mentions != null) {
            for (int i = 0; i < mentions.size(); i++) {
                if (mentions.get(i) instanceof UmlsConcept) {
                    UmlsConcept concept = (UmlsConcept) mentions.get(i);
                    sb.append(concept.getCui());
                    if (i < mentions.size() - 1) {
                        sb.append(",");
                    }
                }
            }
        }
        value = sb.toString();
    } else if (property == CTAKESAnnotationProperty.POLARITY) {
        value = Integer.toString(annotation.getPolarity());
    }
    return value;
}
Also used : FSArray(org.apache.uima.jcas.cas.FSArray) UmlsConcept(org.apache.ctakes.typesystem.type.refsem.UmlsConcept)

Aggregations

FSArray (org.apache.uima.jcas.cas.FSArray)2 JCasFileWriter_ImplBase (de.tudarmstadt.ukp.dkpro.core.api.io.JCasFileWriter_ImplBase)1 MorphologicalFeatures (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures)1 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)1 ComponentParameters (de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters)1 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)1 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)1 SemArg (de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg)1 SemArgLink (de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink)1 SemPred (de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred)1 Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)1 DependencyFlavor (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.DependencyFlavor)1 OutputStreamWriter (java.io.OutputStreamWriter)1 PrintWriter (java.io.PrintWriter)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 Map (java.util.Map)1 Collectors (java.util.stream.Collectors)1