use of de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink in project webanno by webanno.
the class TcfReader method storeReferencesAndTargetsInMap.
private void storeReferencesAndTargetsInMap(Map<Integer, CoreferenceLink> aReferencesMap, eu.clarin.weblicht.wlfxb.tc.api.ReferencedEntity entity, TextCorpus aCorpusData, Map<String, Token> aTokens, JCas aJcas) {
for (Reference reference : entity.getReferences()) {
StringBuilder sbTokens = new StringBuilder();
for (eu.clarin.weblicht.wlfxb.tc.api.Token token : aCorpusData.getReferencesLayer().getTokens(reference)) {
sbTokens.append(token.getID()).append(" ");
}
String[] referenceTokens = sbTokens.toString().split(" ");
int begin = getOffsets(referenceTokens, aTokens)[0];
int end = getOffsets(referenceTokens, aTokens)[1];
CoreferenceLink link = new CoreferenceLink(aJcas);
link.setBegin(begin);
link.setEnd(end);
String referencesType = reference.getType() == null ? "nam" : reference.getType();
link.setReferenceType(referencesType);
if (reference.getRelation() != null) {
link.setReferenceRelation(reference.getRelation());
}
link.addToIndexes();
aReferencesMap.put(aJcas.getCasImpl().ll_getFSRef(link), link);
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink in project webanno by webanno.
the class WebannoTsv2Writer method convertToTsv.
private void convertToTsv(JCas aJCas, OutputStream aOs, String aEncoding) throws IOException, ResourceInitializationException, CASRuntimeException, CASException {
LowLevelCAS llCas = aJCas.getLowLevelCas();
tokenIds = new HashMap<>();
setTokenId(aJCas, tokenIds);
tokenPositions = new TreeMap<>();
setTokenPosition(aJCas, tokenPositions);
Map<Integer, Integer> getTokensPerSentence = new TreeMap<>();
setTokenSentenceAddress(aJCas, getTokensPerSentence);
// list of annotation types
Set<Type> allTypes = new LinkedHashSet<>();
for (Annotation a : select(aJCas, Annotation.class)) {
if (!(a instanceof Token || a instanceof Sentence || a instanceof DocumentMetaData || a instanceof TagsetDescription || a instanceof CoreferenceLink)) {
allTypes.add(a.getType());
}
}
Set<Type> relationTypes = new LinkedHashSet<>();
// get all arc types
for (Type type : allTypes) {
if (type.getFeatures().size() == 0) {
continue;
}
for (Feature feature : type.getFeatures()) {
if (feature.getShortName().equals(GOVERNOR)) {
relationTypes.add(type);
break;
}
}
}
allTypes.removeAll(relationTypes);
// relation annotations
Map<Type, String> relationTypesMap = new HashMap<>();
for (Type type : relationTypes) {
if (type.getName().equals(Dependency.class.getName())) {
relationTypesMap.put(type, POS.class.getName());
continue;
}
for (AnnotationFS anno : CasUtil.select(aJCas.getCas(), type)) {
for (Feature feature : type.getFeatures()) {
if (feature.getShortName().equals(GOVERNOR)) {
relationTypesMap.put(type, anno.getFeatureValue(feature).getType().getName());
}
}
}
}
// all span annotation first
Map<Feature, Type> spanFeatures = new LinkedHashMap<>();
allTypes: for (Type type : allTypes) {
if (type.getFeatures().size() == 0) {
continue;
}
for (Feature feature : type.getFeatures()) {
// coreference annotation not supported
if (feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
continue allTypes;
}
}
IOUtils.write(" # " + type.getName(), aOs, aEncoding);
for (Feature feature : type.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end")) {
continue;
}
spanFeatures.put(feature, type);
IOUtils.write(" | " + feature.getShortName(), aOs, aEncoding);
}
}
// write all relation annotation first
Set<Feature> relationFeatures = new LinkedHashSet<>();
for (Type type : relationTypes) {
IOUtils.write(" # " + type.getName(), aOs, aEncoding);
for (Feature feature : type.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
continue;
}
relationFeatures.add(feature);
IOUtils.write(" | " + feature.getShortName(), aOs, aEncoding);
}
// Add the attach type for the realtion anotation
IOUtils.write(" | AttachTo=" + relationTypesMap.get(type), aOs, aEncoding);
}
IOUtils.write("\n", aOs, aEncoding);
Map<Feature, Map<Integer, String>> allAnnos = new HashMap<>();
allTypes: for (Type type : allTypes) {
for (Feature feature : type.getFeatures()) {
// coreference annotation not supported
if (feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
continue allTypes;
}
}
for (Feature feature : type.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end")) {
continue;
}
Map<Integer, String> tokenAnnoMap = new TreeMap<>();
setTokenAnnos(aJCas.getCas(), tokenAnnoMap, type, feature);
allAnnos.put(feature, tokenAnnoMap);
}
}
// get tokens where dependents are drown to
Map<Feature, Map<Integer, String>> relAnnos = new HashMap<>();
for (Type type : relationTypes) {
for (Feature feature : type.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
continue;
}
Map<Integer, String> tokenAnnoMap = new HashMap<>();
setRelationFeatureAnnos(aJCas.getCas(), tokenAnnoMap, type, feature);
relAnnos.put(feature, tokenAnnoMap);
}
}
// get tokens where dependents are drown from - the governor
Map<Type, Map<Integer, String>> governorAnnos = new HashMap<>();
for (Type type : relationTypes) {
Map<Integer, String> govAnnoMap = new HashMap<>();
setRelationGovernorPos(aJCas.getCas(), govAnnoMap, type);
governorAnnos.put(type, govAnnoMap);
}
int sentId = 1;
for (Sentence sentence : select(aJCas, Sentence.class)) {
IOUtils.write("#id=" + sentId++ + "\n", aOs, aEncoding);
IOUtils.write("#text=" + sentence.getCoveredText().replace("\n", "") + "\n", aOs, aEncoding);
for (Token token : selectCovered(Token.class, sentence)) {
IOUtils.write(tokenIds.get(llCas.ll_getFSRef(token)) + "\t" + token.getCoveredText() + "\t", aOs, aEncoding);
// all span annotations on this token
for (Feature feature : spanFeatures.keySet()) {
String annos = allAnnos.get(feature).get(llCas.ll_getFSRef(token));
if (annos == null) {
if (multipleSpans.contains(spanFeatures.get(feature).getName())) {
IOUtils.write("O\t", aOs, aEncoding);
} else {
IOUtils.write("_\t", aOs, aEncoding);
}
} else {
IOUtils.write(annos + "\t", aOs, aEncoding);
}
}
for (Type type : relationTypes) {
for (Feature feature : type.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
continue;
}
String annos = relAnnos.get(feature).get(llCas.ll_getFSRef(token));
if (annos == null) {
IOUtils.write("_\t", aOs, aEncoding);
} else {
IOUtils.write(annos + "\t", aOs, aEncoding);
}
}
// the governor positions
String govPos = governorAnnos.get(type).get(llCas.ll_getFSRef(token));
if (govPos == null) {
IOUtils.write("_\t", aOs, aEncoding);
} else {
IOUtils.write(governorAnnos.get(type).get(llCas.ll_getFSRef(token)) + "\t", aOs, aEncoding);
}
}
IOUtils.write("\n", aOs, aEncoding);
}
IOUtils.write("\n", aOs, aEncoding);
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink in project webanno by webanno.
the class TcfReader method convertCoreference.
/**
* Correferences in CAS should be represented {@link CoreferenceChain} and
* {@link CoreferenceLink}. The TCF representation Uses <b> rel </b> and
* <b>target </b> to build chains. Example: </br>
* <i> {@literal <entity><reference ID="rc_0" tokenIDs="t_0" mintokIDs=
* "t_0" type="nam"/> } </br>
* {@literal <reference ID="rc_1" tokenIDs="t_6" mintokIDs="t_6" type=
* "pro.per3" rel=
* "anaphoric" target="rc_0"/></entity>
* }</i> </br>
* The first phase of conversion is getting all <b>references</b> and
* <b>targets</b> alongside the <b>type</b> and <b>relations in different
* maps</b> <br>
* Second, an iteration is made through all the maps and the
* {@link CoreferenceChain} and {@link CoreferenceLink} annotations are
* constructed.
*/
private void convertCoreference(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) {
if (aCorpusData.getReferencesLayer() == null) {
// No layer to read from.
return;
}
for (int i = 0; i < aCorpusData.getReferencesLayer().size(); i++) {
eu.clarin.weblicht.wlfxb.tc.api.ReferencedEntity entity = aCorpusData.getReferencesLayer().getReferencedEntity(i);
Map<Integer, CoreferenceLink> referencesMap = new TreeMap<>();
storeReferencesAndTargetsInMap(referencesMap, entity, aCorpusData, aTokens, aJCas);
CoreferenceChain chain = new CoreferenceChain(aJCas);
CoreferenceLink link = null;
for (Integer address : referencesMap.keySet()) {
if (chain.getFirst() == null) {
chain.setFirst(referencesMap.get(address));
link = chain.getFirst();
chain.addToIndexes();
} else {
link.setNext(referencesMap.get(address));
if (link.getReferenceRelation() == null) {
link.setReferenceRelation(referencesMap.get(address).getReferenceRelation());
}
link = link.getNext();
link.addToIndexes();
}
}
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink in project webanno by webanno.
the class TcfWriter method writeCoreference.
private void writeCoreference(JCas aJCas, TextCorpus aTextCorpus, Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap) {
if (!JCasUtil.exists(aJCas, CoreferenceChain.class)) {
// Do nothing if there are no coreference chains in the CAS
getLogger().debug("Layer [" + TextCorpusLayerTag.REFERENCES.getXmlName() + "]: empty");
return;
}
String tagSetName = "TueBaDz";
for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) {
if (tagSet.getLayer().equals(CoreferenceLink.class.getName())) {
tagSetName = tagSet.getName();
break;
}
}
ReferencesLayer coreferencesLayer = aTextCorpus.createReferencesLayer(null, tagSetName, null);
getLogger().debug("Layer [" + TextCorpusLayerTag.REFERENCES.getXmlName() + "]: created");
for (CoreferenceChain chain : select(aJCas, CoreferenceChain.class)) {
CoreferenceLink prevLink = null;
Reference prevRef = null;
List<Reference> refs = new ArrayList<>();
for (CoreferenceLink link : chain.links()) {
// Get covered tokens
List<eu.clarin.weblicht.wlfxb.tc.api.Token> tokens = new ArrayList<>();
for (Token token : selectCovered(Token.class, link)) {
tokens.add(aTokensBeginPositionMap.get(token.getBegin()));
}
// Create current reference
Reference ref = coreferencesLayer.createReference(link.getReferenceType(), tokens, null);
// Special handling for expletive relations
if (REL_TYPE_EXPLETIVE.equals(link.getReferenceRelation())) {
coreferencesLayer.addRelation(ref, REL_TYPE_EXPLETIVE);
// chain, so we bail out here.
continue;
}
// Create relation between previous and current reference
if (prevLink != null) {
coreferencesLayer.addRelation(prevRef, prevLink.getReferenceRelation(), ref);
}
prevLink = link;
prevRef = ref;
refs.add(ref);
}
coreferencesLayer.addReferent(refs);
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink in project webanno by webanno.
the class WebAnnoSemanticGraphReader method convertToCas.
public void convertToCas(JCas aJCas, InputStream aIs, String aEncoding) throws IOException {
StringBuilder text = new StringBuilder();
LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding);
int tokenBeginPosition = 0;
while (lineIterator.hasNext()) {
String line = lineIterator.next();
String[] contents = line.split("\t>\t|\tX\t");
int sentenceBegin = tokenBeginPosition;
int chainBegin = tokenBeginPosition;
int chainEnd = 0;
StringTokenizer st = new StringTokenizer(contents[0]);
while (st.hasMoreTokens()) {
String content = st.nextToken();
Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + content.length());
outToken.addToIndexes();
tokenBeginPosition = outToken.getEnd() + 1;
chainEnd = tokenBeginPosition;
text.append(content).append(" ");
}
CoreferenceChain chain = new CoreferenceChain(aJCas);
CoreferenceLink link = new CoreferenceLink(aJCas, chainBegin, chainEnd - 1);
link.setReferenceType("text");
link.addToIndexes();
chain.setFirst(link);
if (line.contains("\t>\t")) {
link.setReferenceRelation("entails");
Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + 1);
outToken.addToIndexes();
tokenBeginPosition = outToken.getEnd() + 1;
text.append("> ");
} else {
link.setReferenceRelation("do not entails");
Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + 1);
outToken.addToIndexes();
tokenBeginPosition = outToken.getEnd() + 1;
text.append("X ");
}
chainBegin = tokenBeginPosition;
st = new StringTokenizer(contents[0]);
while (st.hasMoreTokens()) {
String content = st.nextToken();
Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + content.length());
outToken.addToIndexes();
tokenBeginPosition = outToken.getEnd() + 1;
chainEnd = tokenBeginPosition;
text.append(content).append(" ");
}
CoreferenceLink nextLink = new CoreferenceLink(aJCas, chainBegin, chainEnd - 1);
nextLink.setReferenceType("hypothesis");
nextLink.addToIndexes();
link.setNext(nextLink);
chain.addToIndexes();
text.append("\n");
Sentence outSentence = new Sentence(aJCas);
outSentence.setBegin(sentenceBegin);
outSentence.setEnd(tokenBeginPosition);
outSentence.addToIndexes();
tokenBeginPosition = tokenBeginPosition + 1;
sentenceBegin = tokenBeginPosition;
}
aJCas.setDocumentText(text.toString());
}
Aggregations