Search in sources :

Example 26 with NamedEntity

use of de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity in project webanno by webanno.

the class TcfWriter method writeNamedEntity.

private void writeNamedEntity(JCas aJCas, TextCorpus aTextCorpus, Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap) {
    if (!JCasUtil.exists(aJCas, NamedEntity.class)) {
        // Do nothing if there are no named entities in the CAS
        getLogger().debug("Layer [" + TextCorpusLayerTag.NAMED_ENTITIES.getXmlName() + "]: empty");
        return;
    }
    String tagSetName = "BART";
    for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) {
        if (tagSet.getLayer().equals(NamedEntity.class.getName())) {
            tagSetName = tagSet.getName();
            break;
        }
    }
    NamedEntitiesLayer namedEntitiesLayer = aTextCorpus.createNamedEntitiesLayer(tagSetName);
    getLogger().debug("Layer [" + TextCorpusLayerTag.NAMED_ENTITIES.getXmlName() + "]: created");
    for (NamedEntity namedEntity : select(aJCas, NamedEntity.class)) {
        List<Token> tokensInCas = selectCovered(aJCas, Token.class, namedEntity.getBegin(), namedEntity.getEnd());
        List<eu.clarin.weblicht.wlfxb.tc.api.Token> tokensInTcf = new ArrayList<>();
        for (Token token : tokensInCas) {
            tokensInTcf.add(aTokensBeginPositionMap.get(token.getBegin()));
        }
        namedEntitiesLayer.addEntity(namedEntity.getValue(), tokensInTcf);
    }
}
Also used : NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) NamedEntitiesLayer(eu.clarin.weblicht.wlfxb.tc.api.NamedEntitiesLayer) ArrayList(java.util.ArrayList) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) TagsetDescription(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription)

Example 27 with NamedEntity

use of de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity in project webanno by webanno.

the class WebannoTsv1Reader method createNamedEntity.

/**
 * Creates Named Entities from CoNLL BIO format to CAS format
 */
private void createNamedEntity(Map<Integer, String> aNamedEntityMap, JCas aJCas, Map<Integer, String> aTokensMap, Map<String, Token> aJcasTokens) {
    Map<Integer, NamedEntity> indexedNeAnnos = new LinkedHashMap<>();
    for (int i = 1; i <= aTokensMap.size(); i++) {
        if (aNamedEntityMap.get(i).equals("O")) {
            continue;
        }
        // to maintain multiple span ne annotation in the same index
        int index = 1;
        for (String ne : aNamedEntityMap.get(i).split("\\|")) {
            if (ne.equals("O")) {
                // for annotations such as B_LOC|O|I_PER and the like
                index++;
            } else if (ne.startsWith("B_") || ne.startsWith("B-")) {
                NamedEntity outNamedEntity = new NamedEntity(aJCas, aJcasTokens.get("t_" + i).getBegin(), aJcasTokens.get("t_" + i).getEnd());
                outNamedEntity.setValue(ne.substring(2));
                outNamedEntity.addToIndexes();
                indexedNeAnnos.put(index, outNamedEntity);
                index++;
            } else if (ne.startsWith("I_") || ne.startsWith("I-")) {
                NamedEntity outNamedEntity = indexedNeAnnos.get(index);
                outNamedEntity.setEnd(aJcasTokens.get("t_" + i).getEnd());
                outNamedEntity.addToIndexes();
                index++;
            } else {
                // NE is not in IOB format. store one NE per token. No way to detect multiple
                // token NE
                NamedEntity outNamedEntity = new NamedEntity(aJCas, aJcasTokens.get("t_" + i).getBegin(), aJcasTokens.get("t_" + i).getEnd());
                outNamedEntity.setValue(ne);
                outNamedEntity.addToIndexes();
                indexedNeAnnos.put(index, outNamedEntity);
                index++;
            }
        }
    }
}
Also used : NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) LinkedHashMap(java.util.LinkedHashMap)

Example 28 with NamedEntity

use of de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method testMultiTokenSpanWithoutFeatureValue.

@Test
public void testMultiTokenSpanWithoutFeatureValue() throws Exception {
    JCas jcas = makeJCasOneSentence();
    NamedEntity ne = new NamedEntity(jcas, 0, jcas.getDocumentText().length());
    ne.addToIndexes();
    writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(NamedEntity.class));
}
Also used : NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) JCas(org.apache.uima.jcas.JCas) Test(org.junit.Test)

Example 29 with NamedEntity

use of de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method testSingleNonMultiTokenRelationWithoutFeatureValue.

@Test
public void testSingleNonMultiTokenRelationWithoutFeatureValue() throws Exception {
    JCas jcas = makeJCasOneSentence();
    CAS cas = jcas.getCas();
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
    Token t1 = tokens.get(0);
    Token t2 = tokens.get(1);
    Token t3 = tokens.get(2);
    Token t4 = tokens.get(3);
    NamedEntity gov = new NamedEntity(jcas, t1.getBegin(), t2.getEnd());
    gov.addToIndexes();
    NamedEntity dep = new NamedEntity(jcas, t3.getBegin(), t4.getEnd());
    dep.addToIndexes();
    Type relationType = cas.getTypeSystem().getType("webanno.custom.Relation");
    // One at the beginning
    // WebAnno legacy conventions
    // AnnotationFS fs1 = cas.createAnnotation(relationType,
    // min(dep.getBegin(), gov.getBegin()),
    // max(dep.getEnd(), gov.getEnd()));
    // DKPro Core conventions
    AnnotationFS fs1 = cas.createAnnotation(relationType, dep.getBegin(), dep.getEnd());
    FSUtil.setFeature(fs1, "Governor", gov);
    FSUtil.setFeature(fs1, "Dependent", dep);
    cas.addFsToIndexes(fs1);
    writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(NamedEntity.class), WebannoTsv3Writer.PARAM_RELATION_LAYERS, asList("webanno.custom.Relation"));
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) Type(org.apache.uima.cas.Type) CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Test(org.junit.Test)

Example 30 with NamedEntity

use of de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method testTokenBoundedBioLookAlike.

@Test
public void testTokenBoundedBioLookAlike() throws Exception {
    JCas jcas = makeJCasOneSentence();
    int n = 0;
    for (Token t : select(jcas, Token.class)) {
        NamedEntity ne = new NamedEntity(jcas, t.getBegin(), t.getEnd());
        ne.setValue(((n == 0) ? "B-" : "I-") + "NOTBIO!");
        ne.addToIndexes();
        n++;
    }
    writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(NamedEntity.class));
}
Also used : NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Test(org.junit.Test)

Aggregations

NamedEntity (de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity)44 Test (org.junit.Test)40 JCas (org.apache.uima.jcas.JCas)39 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)23 ArrayList (java.util.ArrayList)15 Type (org.apache.uima.cas.Type)13 CAS (org.apache.uima.cas.CAS)12 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)12 LinkedHashMap (java.util.LinkedHashMap)2 Evaluator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.Evaluator)1 PossibleValue (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.PossibleValue)1 ValuesGenerator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.ValuesGenerator)1 ConstraintsGrammar (de.tudarmstadt.ukp.clarin.webanno.constraints.grammar.ConstraintsGrammar)1 Parse (de.tudarmstadt.ukp.clarin.webanno.constraints.grammar.syntaxtree.Parse)1 ParsedConstraints (de.tudarmstadt.ukp.clarin.webanno.constraints.model.ParsedConstraints)1 ParserVisitor (de.tudarmstadt.ukp.clarin.webanno.constraints.visitor.ParserVisitor)1 AgreementResult (de.tudarmstadt.ukp.clarin.webanno.curation.agreement.AgreementUtils.AgreementResult)1 ArcDiffAdapter (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ArcDiffAdapter)1 DiffResult (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.DiffResult)1 FeatureType (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType)1