use of de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity in project webanno by webanno.
the class TcfWriter method writeNamedEntity.
private void writeNamedEntity(JCas aJCas, TextCorpus aTextCorpus, Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap) {
if (!JCasUtil.exists(aJCas, NamedEntity.class)) {
// Do nothing if there are no named entities in the CAS
getLogger().debug("Layer [" + TextCorpusLayerTag.NAMED_ENTITIES.getXmlName() + "]: empty");
return;
}
String tagSetName = "BART";
for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) {
if (tagSet.getLayer().equals(NamedEntity.class.getName())) {
tagSetName = tagSet.getName();
break;
}
}
NamedEntitiesLayer namedEntitiesLayer = aTextCorpus.createNamedEntitiesLayer(tagSetName);
getLogger().debug("Layer [" + TextCorpusLayerTag.NAMED_ENTITIES.getXmlName() + "]: created");
for (NamedEntity namedEntity : select(aJCas, NamedEntity.class)) {
List<Token> tokensInCas = selectCovered(aJCas, Token.class, namedEntity.getBegin(), namedEntity.getEnd());
List<eu.clarin.weblicht.wlfxb.tc.api.Token> tokensInTcf = new ArrayList<>();
for (Token token : tokensInCas) {
tokensInTcf.add(aTokensBeginPositionMap.get(token.getBegin()));
}
namedEntitiesLayer.addEntity(namedEntity.getValue(), tokensInTcf);
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity in project webanno by webanno.
the class WebannoTsv1Reader method createNamedEntity.
/**
* Creates Named Entities from CoNLL BIO format to CAS format
*/
private void createNamedEntity(Map<Integer, String> aNamedEntityMap, JCas aJCas, Map<Integer, String> aTokensMap, Map<String, Token> aJcasTokens) {
Map<Integer, NamedEntity> indexedNeAnnos = new LinkedHashMap<>();
for (int i = 1; i <= aTokensMap.size(); i++) {
if (aNamedEntityMap.get(i).equals("O")) {
continue;
}
// to maintain multiple span ne annotation in the same index
int index = 1;
for (String ne : aNamedEntityMap.get(i).split("\\|")) {
if (ne.equals("O")) {
// for annotations such as B_LOC|O|I_PER and the like
index++;
} else if (ne.startsWith("B_") || ne.startsWith("B-")) {
NamedEntity outNamedEntity = new NamedEntity(aJCas, aJcasTokens.get("t_" + i).getBegin(), aJcasTokens.get("t_" + i).getEnd());
outNamedEntity.setValue(ne.substring(2));
outNamedEntity.addToIndexes();
indexedNeAnnos.put(index, outNamedEntity);
index++;
} else if (ne.startsWith("I_") || ne.startsWith("I-")) {
NamedEntity outNamedEntity = indexedNeAnnos.get(index);
outNamedEntity.setEnd(aJcasTokens.get("t_" + i).getEnd());
outNamedEntity.addToIndexes();
index++;
} else {
// NE is not in IOB format. store one NE per token. No way to detect multiple
// token NE
NamedEntity outNamedEntity = new NamedEntity(aJCas, aJcasTokens.get("t_" + i).getBegin(), aJcasTokens.get("t_" + i).getEnd());
outNamedEntity.setValue(ne);
outNamedEntity.addToIndexes();
indexedNeAnnos.put(index, outNamedEntity);
index++;
}
}
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity in project webanno by webanno.
the class WebAnnoTsv3WriterTestBase method testMultiTokenSpanWithoutFeatureValue.
@Test
public void testMultiTokenSpanWithoutFeatureValue() throws Exception {
JCas jcas = makeJCasOneSentence();
NamedEntity ne = new NamedEntity(jcas, 0, jcas.getDocumentText().length());
ne.addToIndexes();
writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(NamedEntity.class));
}
use of de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity in project webanno by webanno.
the class WebAnnoTsv3WriterTestBase method testSingleNonMultiTokenRelationWithoutFeatureValue.
@Test
public void testSingleNonMultiTokenRelationWithoutFeatureValue() throws Exception {
JCas jcas = makeJCasOneSentence();
CAS cas = jcas.getCas();
List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
Token t1 = tokens.get(0);
Token t2 = tokens.get(1);
Token t3 = tokens.get(2);
Token t4 = tokens.get(3);
NamedEntity gov = new NamedEntity(jcas, t1.getBegin(), t2.getEnd());
gov.addToIndexes();
NamedEntity dep = new NamedEntity(jcas, t3.getBegin(), t4.getEnd());
dep.addToIndexes();
Type relationType = cas.getTypeSystem().getType("webanno.custom.Relation");
// One at the beginning
// WebAnno legacy conventions
// AnnotationFS fs1 = cas.createAnnotation(relationType,
// min(dep.getBegin(), gov.getBegin()),
// max(dep.getEnd(), gov.getEnd()));
// DKPro Core conventions
AnnotationFS fs1 = cas.createAnnotation(relationType, dep.getBegin(), dep.getEnd());
FSUtil.setFeature(fs1, "Governor", gov);
FSUtil.setFeature(fs1, "Dependent", dep);
cas.addFsToIndexes(fs1);
writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(NamedEntity.class), WebannoTsv3Writer.PARAM_RELATION_LAYERS, asList("webanno.custom.Relation"));
}
use of de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity in project webanno by webanno.
the class WebAnnoTsv3WriterTestBase method testTokenBoundedBioLookAlike.
@Test
public void testTokenBoundedBioLookAlike() throws Exception {
JCas jcas = makeJCasOneSentence();
int n = 0;
for (Token t : select(jcas, Token.class)) {
NamedEntity ne = new NamedEntity(jcas, t.getBegin(), t.getEnd());
ne.setValue(((n == 0) ? "B-" : "I-") + "NOTBIO!");
ne.addToIndexes();
n++;
}
writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(NamedEntity.class));
}
Aggregations