use of de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency in project webanno by webanno.
the class Tsv3XCasDocumentBuilder method scanUnitForActiveColumns.
private static void scanUnitForActiveColumns(TsvUnit aUnit) {
for (TsvColumn col : aUnit.getDocument().getSchema().getColumns()) {
List<AnnotationFS> annotationsForColumn = aUnit.getAnnotationsForColumn(col);
if (!annotationsForColumn.isEmpty()) {
if (!PLACEHOLDER.equals(col.featureType)) {
aUnit.getDocument().activateColumn(col);
}
// actual annotation.
if (RELATION.equals(col.layerType) && RELATION_REF.equals(col.featureType)) {
AnnotationFS annotation = annotationsForColumn.get(0);
FeatureStructure target = FSUtil.getFeature(annotation, FEAT_REL_SOURCE, FeatureStructure.class);
if (target == null) {
throw new IllegalStateException("Relation does not have its source feature (" + FEAT_REL_SOURCE + ") set: " + annotation);
}
if (col.uimaType.getName().equals(Dependency.class.getName())) {
// COMPATIBILITY NOTE:
// WebAnnoTsv3Writer hard-changes the target type for DKPro Core
// Dependency annotations from Token to POS - the reason is not really
// clear. Probably because the Dependency relations in the WebAnno UI
// attach to POS (Token's are not visible as annotations in the UI).
col.setTargetTypeHint(aUnit.getDocument().getJCas().getTypeSystem().getType(POS.class.getName()));
} else {
col.setTargetTypeHint(target.getType());
}
}
}
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency in project webanno by webanno.
the class Conll2009Reader method convert.
public void convert(JCas aJCas, BufferedReader aReader) throws IOException {
if (readPos) {
try {
posMappingProvider.configure(aJCas.getCas());
} catch (AnalysisEngineProcessException e) {
throw new IOException(e);
}
}
JCasBuilder doc = new JCasBuilder(aJCas);
List<String[]> words;
while ((words = readSentence(aReader)) != null) {
if (words.isEmpty()) {
// markers following each other.
continue;
}
int sentenceBegin = doc.getPosition();
int sentenceEnd = sentenceBegin;
// Tokens, Lemma, POS
Map<Integer, Token> tokens = new HashMap<Integer, Token>();
List<SemPred> preds = new ArrayList<>();
Iterator<String[]> wordIterator = words.iterator();
while (wordIterator.hasNext()) {
String[] word = wordIterator.next();
// Read token
Token token = doc.add(word[FORM], Token.class);
tokens.put(Integer.valueOf(word[ID]), token);
if (wordIterator.hasNext()) {
doc.add(" ");
}
// Read lemma
if (!UNUSED.equals(word[LEMMA]) && readLemma) {
Lemma lemma = new Lemma(aJCas, token.getBegin(), token.getEnd());
lemma.setValue(word[LEMMA]);
lemma.addToIndexes();
token.setLemma(lemma);
}
// Read part-of-speech tag
if (!UNUSED.equals(word[POS]) && readPos) {
Type posTag = posMappingProvider.getTagType(word[POS]);
POS pos = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd());
pos.setPosValue(word[POS].intern());
// WebAnno did not yet backport the coarse grained POS feature from
// DKPro Core 1.9.0
// POSUtils.assignCoarseValue(pos);
pos.addToIndexes();
token.setPos(pos);
}
// Read morphological features
if (!UNUSED.equals(word[FEAT]) && readMorph) {
MorphologicalFeatures morphtag = new MorphologicalFeatures(aJCas, token.getBegin(), token.getEnd());
morphtag.setValue(word[FEAT]);
morphtag.addToIndexes();
}
if (!UNUSED.equals(word[PRED]) && readSemanticPredicate) {
SemPred pred = new SemPred(aJCas, token.getBegin(), token.getEnd());
pred.setCategory(word[PRED]);
pred.addToIndexes();
preds.add(pred);
}
sentenceEnd = token.getEnd();
}
// Dependencies
if (readDependency) {
for (String[] word : words) {
if (!UNUSED.equals(word[DEPREL])) {
int depId = Integer.valueOf(word[ID]);
int govId = Integer.valueOf(word[HEAD]);
// Model the root as a loop onto itself
if (govId == 0) {
// Not using ROOT here because WebAnno cannot deal with elevated
// types
Dependency rel = new Dependency(aJCas);
rel.setGovernor(tokens.get(depId));
rel.setDependent(tokens.get(depId));
rel.setDependencyType(word[DEPREL]);
rel.setBegin(rel.getDependent().getBegin());
rel.setEnd(rel.getDependent().getEnd());
// This is set via FSUtil because we still use the DKPro Core 1.7.0 JCas
// classes
FSUtil.setFeature(rel, "flavor", DependencyFlavor.BASIC);
rel.addToIndexes();
} else {
Dependency rel = new Dependency(aJCas);
rel.setGovernor(tokens.get(govId));
rel.setDependent(tokens.get(depId));
rel.setDependencyType(word[DEPREL]);
rel.setBegin(rel.getDependent().getBegin());
rel.setEnd(rel.getDependent().getEnd());
// This is set via FSUtil because we still use the DKPro Core 1.7.0 JCas
// classes
FSUtil.setFeature(rel, "flavor", DependencyFlavor.BASIC);
rel.addToIndexes();
}
}
}
}
// Semantic arguments
if (readSemanticPredicate) {
// Get arguments for one predicate at a time
for (int p = 0; p < preds.size(); p++) {
List<SemArgLink> args = new ArrayList<>();
for (String[] word : words) {
if (!UNUSED.equals(word[APRED + p])) {
Token token = tokens.get(Integer.valueOf(word[ID]));
SemArg arg = new SemArg(aJCas, token.getBegin(), token.getEnd());
arg.addToIndexes();
SemArgLink link = new SemArgLink(aJCas);
link.setRole(word[APRED + p]);
link.setTarget(arg);
args.add(link);
}
}
SemPred pred = preds.get(p);
pred.setArguments(FSCollectionFactory.createFSArray(aJCas, args));
}
}
// Sentence
Sentence sentence = new Sentence(aJCas, sentenceBegin, sentenceEnd);
sentence.addToIndexes();
// Once sentence per line.
doc.add("\n");
}
doc.close();
}
use of de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency in project webanno by webanno.
the class Conll2009Writer method convert.
private void convert(JCas aJCas, PrintWriter aOut) {
Map<Token, Collection<SemPred>> predIdx = indexCovered(aJCas, Token.class, SemPred.class);
Map<SemArg, Collection<Token>> argIdx = indexCovered(aJCas, SemArg.class, Token.class);
for (Sentence sentence : select(aJCas, Sentence.class)) {
HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>();
// Tokens
List<Token> tokens = selectCovered(Token.class, sentence);
// Check if we should try to include the FEATS in output
List<MorphologicalFeatures> morphology = selectCovered(MorphologicalFeatures.class, sentence);
boolean useFeats = tokens.size() == morphology.size();
List<SemPred> preds = selectCovered(SemPred.class, sentence);
for (int i = 0; i < tokens.size(); i++) {
Row row = new Row();
row.id = i + 1;
row.token = tokens.get(i);
row.args = new SemArgLink[preds.size()];
if (useFeats) {
row.feats = morphology.get(i);
}
// If there are multiple semantic predicates for the current token, then
// we keep only the first
Collection<SemPred> predsForToken = predIdx.get(row.token);
if (predsForToken != null && !predsForToken.isEmpty()) {
row.pred = predsForToken.iterator().next();
}
ctokens.put(row.token, row);
}
// Dependencies
List<Dependency> basicDeps = selectCovered(Dependency.class, sentence).stream().filter(dep -> {
String flavor = FSUtil.getFeature(dep, "flavor", String.class);
return flavor == null || DependencyFlavor.BASIC.equals(flavor);
}).collect(Collectors.toList());
for (Dependency rel : basicDeps) {
Row row = ctokens.get(rel.getDependent());
if (row.deprel != null) {
throw new IllegalStateException("Illegal basic dependency structure - token [" + row.token.getCoveredText() + "] is dependent of more than one dependency.");
}
row.deprel = rel;
}
// Semantic arguments
for (int p = 0; p < preds.size(); p++) {
FSArray args = preds.get(p).getArguments();
for (SemArgLink arg : select(args, SemArgLink.class)) {
for (Token t : argIdx.get(arg.getTarget())) {
Row row = ctokens.get(t);
row.args[p] = arg;
}
}
}
// Write sentence in CONLL 2009 format
for (Row row : ctokens.values()) {
int id = row.id;
String form = row.token.getCoveredText();
String lemma = UNUSED;
if (writeLemma && (row.token.getLemma() != null)) {
lemma = row.token.getLemma().getValue();
}
String plemma = lemma;
String pos = UNUSED;
if (writePos && (row.token.getPos() != null)) {
POS posAnno = row.token.getPos();
pos = posAnno.getPosValue();
}
String ppos = pos;
String feat = UNUSED;
if (writeMorph && (row.feats != null)) {
feat = row.feats.getValue();
}
String pfeat = feat;
int headId = UNUSED_INT;
String deprel = UNUSED;
if (writeDependency && (row.deprel != null)) {
deprel = row.deprel.getDependencyType();
headId = ctokens.get(row.deprel.getGovernor()).id;
if (headId == row.id) {
// ROOT dependencies may be modeled as a loop, ignore these.
headId = 0;
}
}
String head = UNUSED;
if (headId != UNUSED_INT) {
head = Integer.toString(headId);
}
String phead = head;
String pdeprel = deprel;
String fillpred = UNUSED;
String pred = UNUSED;
StringBuilder apreds = new StringBuilder();
if (writeSemanticPredicate) {
if (row.pred != null) {
fillpred = "Y";
pred = row.pred.getCategory();
}
for (SemArgLink arg : row.args) {
if (apreds.length() > 0) {
apreds.append('\t');
}
apreds.append(arg != null ? arg.getRole() : UNUSED);
}
}
aOut.printf("%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", id, form, lemma, plemma, pos, ppos, feat, pfeat, head, phead, deprel, pdeprel, fillpred, pred, apreds);
}
aOut.println();
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency in project webanno by webanno.
the class TcfWriter method writeDependency.
private void writeDependency(JCas aJCas, TextCorpus aTextCorpus, Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap) {
if (!JCasUtil.exists(aJCas, Dependency.class)) {
// Do nothing if there are no dependencies in the CAS
getLogger().debug("Layer [" + TextCorpusLayerTag.PARSING_DEPENDENCY.getXmlName() + "]: empty");
return;
}
DependencyParsingLayer dependencyParsingLayer = null;
String tagSetName = "tiger";
for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) {
if (tagSet.getLayer().equals(Dependency.class.getName())) {
tagSetName = tagSet.getName();
break;
}
}
dependencyParsingLayer = aTextCorpus.createDependencyParsingLayer(tagSetName, false, true);
getLogger().debug("Layer [" + TextCorpusLayerTag.PARSING_DEPENDENCY.getXmlName() + "]: created");
for (Sentence s : select(aJCas, Sentence.class)) {
List<eu.clarin.weblicht.wlfxb.tc.api.Dependency> deps = new ArrayList<>();
for (Dependency d : selectCovered(Dependency.class, s)) {
eu.clarin.weblicht.wlfxb.tc.api.Dependency dependency = dependencyParsingLayer.createDependency(d.getDependencyType(), aTokensBeginPositionMap.get(d.getDependent().getBegin()), aTokensBeginPositionMap.get(d.getGovernor().getBegin()));
deps.add(dependency);
}
if (deps.size() > 0) {
dependencyParsingLayer.addParse(deps);
}
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency in project webanno by webanno.
the class WebannoTsv1Reader method createDependency.
/**
* add dependency parsing to CAS
*/
private void createDependency(JCas aJCas, Map<Integer, String> tokens, Map<Integer, String> dependencyFunction, Map<Integer, Integer> dependencyDependent, Map<String, Token> tokensStored) {
for (int i = 1; i <= tokens.size(); i++) {
if (dependencyFunction.get(i) != null) {
Dependency outDependency = new Dependency(aJCas);
outDependency.setDependencyType(dependencyFunction.get(i));
// if span A has (start,end)= (20, 26) and B has (start,end)= (30, 36)
// arc drawn from A to B, dependency will have (start, end) = (20, 36)
// arc drawn from B to A, still dependency will have (start, end) = (20, 36)
int begin = 0, end = 0;
// if not ROOT
if (dependencyDependent.get(i) != 0) {
begin = tokensStored.get("t_" + i).getBegin() > tokensStored.get("t_" + dependencyDependent.get(i)).getBegin() ? tokensStored.get("t_" + dependencyDependent.get(i)).getBegin() : tokensStored.get("t_" + i).getBegin();
end = tokensStored.get("t_" + i).getEnd() < tokensStored.get("t_" + dependencyDependent.get(i)).getEnd() ? tokensStored.get("t_" + dependencyDependent.get(i)).getEnd() : tokensStored.get("t_" + i).getEnd();
} else {
begin = tokensStored.get("t_" + i).getBegin();
end = tokensStored.get("t_" + i).getEnd();
}
outDependency.setBegin(begin);
outDependency.setEnd(end);
outDependency.setDependent(tokensStored.get("t_" + i));
if (dependencyDependent.get(i) == 0) {
outDependency.setGovernor(tokensStored.get("t_" + i));
} else {
outDependency.setGovernor(tokensStored.get("t_" + dependencyDependent.get(i)));
}
outDependency.addToIndexes();
}
}
}
Aggregations