use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.
the class CorpusModifier method onNextParseConfiguration.
@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws CircularDependencyException {
List<DependencyArc> arcs = new ArrayList<DependencyArc>(parseConfiguration.getDependencies());
for (DependencyArc arc : arcs) {
for (ModifyCommand command : commands) {
boolean applyCommand = true;
if (!command.govPosTag.equals(WILDCARD) && !command.govPosTag.equals(arc.getHead().getTag().getCode())) {
applyCommand = false;
}
if (!command.governor.equals(WILDCARD) && !command.governor.equals(arc.getHead().getToken().getOriginalText().toLowerCase())) {
applyCommand = false;
}
if (!command.depPosTag.equals(WILDCARD) && !command.depPosTag.equals(arc.getDependent().getTag().getCode())) {
applyCommand = false;
}
if (!command.dependent.equals(WILDCARD) && !command.dependent.equals(arc.getDependent().getToken().getOriginalText().toLowerCase())) {
applyCommand = false;
}
if (!command.label.equals(WILDCARD) && !command.label.equals(arc.getLabel())) {
applyCommand = false;
}
if (applyCommand) {
parseConfiguration.removeDependency(arc);
if (command.command == ModifyCommandType.Replace)
parseConfiguration.addDependency(arc.getHead(), arc.getDependent(), command.newLabel, null);
}
}
}
parseConfiguration.clearMemory();
}
use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.
the class CorpusProjectifier method onNextParseConfiguration.
@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException {
List<DependencyArc> arcs = new ArrayList<DependencyArc>(parseConfiguration.getNonProjectiveDependencies());
NonProjectivePair pair = this.getNextPair(arcs);
if (pair != null) {
// set so that it stays untouched
for (DependencyArc arc : arcs) {
parseConfiguration.addManualNonProjectiveDependency(arc.getHead(), arc.getDependent(), arc.getLabel());
}
}
while (pair != null) {
PosTaggedToken newHead1 = null;
PosTaggedToken parent1 = parseConfiguration.getHead(pair.arc1.getHead());
int depIndex1 = pair.arc1.getDependent().getToken().getIndex();
int depthDelta1 = 1;
while (parent1 != null) {
int headIndex = parent1.getToken().getIndex();
int startIndex = headIndex < depIndex1 ? headIndex : depIndex1;
int endIndex = headIndex >= depIndex1 ? headIndex : depIndex1;
if (isProjective(startIndex, endIndex, pair.arc2)) {
newHead1 = parent1;
break;
}
parent1 = parseConfiguration.getHead(parent1);
depthDelta1++;
}
PosTaggedToken newHead2 = null;
PosTaggedToken parent2 = parseConfiguration.getHead(pair.arc2.getHead());
int depIndex2 = pair.arc2.getDependent().getToken().getIndex();
int depthDelta2 = 1;
while (parent2 != null) {
int headIndex = parent2.getToken().getIndex();
int startIndex = headIndex < depIndex2 ? headIndex : depIndex2;
int endIndex = headIndex >= depIndex2 ? headIndex : depIndex2;
if (isProjective(startIndex, endIndex, pair.arc2)) {
newHead2 = parent2;
break;
}
parent2 = parseConfiguration.getHead(parent2);
depthDelta2++;
}
if (newHead1 != null && newHead2 != null) {
int linearDistance1 = Math.abs(newHead1.getIndex() - depIndex1);
int linearDistance2 = Math.abs(newHead2.getIndex() - depIndex2);
int rootDepthDelta1 = 0;
PosTaggedToken parent = parseConfiguration.getHead(newHead1);
while (parent != null) {
rootDepthDelta1++;
parent = parseConfiguration.getHead(parent);
}
int rootDepthDelta2 = 0;
parent = parseConfiguration.getHead(newHead2);
while (parent != null) {
rootDepthDelta2++;
parent = parseConfiguration.getHead(parent);
}
switch(strategy) {
case LeastLinearDistance:
if (linearDistance1 < linearDistance2) {
newHead2 = null;
break;
} else if (linearDistance2 < linearDistance1) {
newHead1 = null;
break;
}
// break left out on purpose
case LeastDepthDifference:
if (depthDelta1 < depthDelta2) {
newHead2 = null;
break;
} else if (depthDelta2 < depthDelta1) {
newHead1 = null;
break;
}
// break left out on purpose
case GreatestDepth:
if (rootDepthDelta1 < rootDepthDelta2) {
newHead1 = null;
break;
} else {
newHead2 = null;
break;
}
}
}
if (newHead1 != null && newHead2 == null) {
parseConfiguration.removeDependency(pair.arc1);
String newLabel = pair.arc1.getLabel();
if (this.nonProjectiveArcSuffix.length() > 0 && !newLabel.endsWith(this.nonProjectiveArcSuffix))
newLabel += this.nonProjectiveArcSuffix;
parseConfiguration.addDependency(newHead1, pair.arc1.getDependent(), newLabel, null);
// for the other arc, copy the non-projective version, in case
// there is an attempt at manual projectivisation
DependencyArc otherProjArc = parseConfiguration.getGoverningDependency(pair.arc2.getDependent());
parseConfiguration.removeDependency(otherProjArc);
parseConfiguration.addDependency(pair.arc2.getHead(), pair.arc2.getDependent(), pair.arc2.getLabel(), null);
} else if (newHead1 == null && newHead2 != null) {
parseConfiguration.removeDependency(pair.arc2);
String newLabel = pair.arc2.getLabel();
if (this.nonProjectiveArcSuffix.length() > 0 && !newLabel.endsWith(this.nonProjectiveArcSuffix))
newLabel += this.nonProjectiveArcSuffix;
parseConfiguration.addDependency(newHead2, pair.arc2.getDependent(), newLabel, null);
// for the other arc, copy the non-projective version, in case
// there is an attempt at manual projectivisation
DependencyArc otherProjArc = parseConfiguration.getGoverningDependency(pair.arc1.getDependent());
parseConfiguration.removeDependency(otherProjArc);
parseConfiguration.addDependency(pair.arc1.getHead(), pair.arc1.getDependent(), pair.arc1.getLabel(), null);
} else {
throw new TalismaneException("Cannot deprojectify " + pair + ". Could not find projective parents.");
}
parseConfiguration.clearMemory();
arcs = new ArrayList<DependencyArc>(parseConfiguration.getDependencies());
pair = this.getNextPair(arcs);
}
}
use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.
the class CorpusProjectifier method getNextPair.
private NonProjectivePair getNextPair(List<DependencyArc> arcs) {
NonProjectivePair pair = null;
DependencyArc arc = null;
DependencyArc otherArc = null;
for (int i = 0; i < arcs.size(); i++) {
arc = arcs.get(i);
if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0))
continue;
int headIndex = arc.getHead().getToken().getIndex();
int depIndex = arc.getDependent().getToken().getIndex();
int startIndex = headIndex < depIndex ? headIndex : depIndex;
int endIndex = headIndex >= depIndex ? headIndex : depIndex;
for (int j = i + 1; j < arcs.size(); j++) {
otherArc = arcs.get(j);
if (otherArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (otherArc.getLabel() == null || otherArc.getLabel().length() == 0))
continue;
if (!isProjective(startIndex, endIndex, otherArc)) {
pair = new NonProjectivePair(arc, otherArc);
break;
}
}
if (pair != null)
break;
}
return pair;
}
use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.
the class StandoffReader method hasNextSentence.
@Override
public boolean hasNextSentence() throws TalismaneException, IOException {
if (this.getMaxSentenceCount() > 0 && sentenceCount >= this.getMaxSentenceCount()) {
// we've reached the end, do nothing
} else {
if (configuration == null && sentenceIndex < sentences.size()) {
List<StandoffToken> tokens = sentences.get(sentenceIndex++);
LinguisticRules rules = TalismaneSession.get(sessionId).getLinguisticRules();
if (rules == null)
throw new RuntimeException("Linguistic rules have not been set.");
String text = "";
for (StandoffToken standoffToken : tokens) {
String word = standoffToken.text;
if (rules.shouldAddSpace(text, word))
text += " ";
text += word;
}
Sentence sentence = new Sentence(text, sessionId);
for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) {
annotator.annotate(sentence);
}
PretokenisedSequence tokenSequence = new PretokenisedSequence(sentence, sessionId);
PosTagSequence posTagSequence = new PosTagSequence(tokenSequence);
Map<String, PosTaggedToken> idTokenMap = new HashMap<String, PosTaggedToken>();
for (StandoffToken standoffToken : tokens) {
Token token = tokenSequence.addToken(standoffToken.text);
Decision posTagDecision = new Decision(standoffToken.posTag.getCode());
PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
if (LOG.isTraceEnabled()) {
LOG.trace(posTaggedToken.toString());
}
posTaggedToken.setComment(standoffToken.comment);
posTagSequence.addPosTaggedToken(posTaggedToken);
idTokenMap.put(standoffToken.id, posTaggedToken);
LOG.debug("Found token " + standoffToken.id + ", " + posTaggedToken);
}
tokenSequence.setWithRoot(true);
configuration = new ParseConfiguration(posTagSequence);
for (StandoffToken standoffToken : tokens) {
StandoffRelation relation = relationMap.get(standoffToken.id);
if (relation != null) {
PosTaggedToken head = idTokenMap.get(relation.fromToken);
PosTaggedToken dependent = idTokenMap.get(relation.toToken);
if (head == null) {
throw new TalismaneException("No token found for head id: " + relation.fromToken);
}
if (dependent == null) {
throw new TalismaneException("No token found for dependent id: " + relation.toToken);
}
DependencyArc arc = configuration.addDependency(head, dependent, relation.label, null);
arc.setComment(relation.comment);
} else if (standoffToken.posTag.getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION) {
if (punctuationDepLabel != null) {
PosTaggedToken dependent = idTokenMap.get(standoffToken.id);
for (int i = dependent.getIndex() - 1; i >= 0; i--) {
PosTaggedToken head = posTagSequence.get(i);
if (head.getTag().getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION)
continue;
configuration.addDependency(head, dependent, punctuationDepLabel, null);
break;
}
}
}
}
}
}
return (configuration != null);
}
Aggregations