use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class LeftArcEagerTransition method checkPreconditions.
@Override
public boolean checkPreconditions(ParseConfiguration configuration) {
if (configuration.getBuffer().isEmpty() || configuration.getStack().isEmpty()) {
if (LOG.isTraceEnabled()) {
LOG.trace("Cannot apply " + this.toString() + ": buffer or stack is empty");
}
return false;
}
// left arc cannot be applied to the root
PosTaggedToken topOfStack = configuration.getStack().peek();
if (topOfStack.getTag().equals(PosTag.ROOT_POS_TAG)) {
if (LOG.isTraceEnabled()) {
LOG.trace("Cannot apply " + this.toString() + ": top-of-stack is ROOT");
}
return false;
}
// the top-of-stack must not yet have a governor
PosTaggedToken governor = configuration.getHead(topOfStack);
if (governor != null) {
if (LOG.isTraceEnabled()) {
LOG.trace("Cannot apply " + this.toString() + ": top of stack " + topOfStack + " already has governor " + governor);
}
return false;
}
return true;
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class LeftArcTransition method applyInternal.
@Override
protected void applyInternal(ParseConfiguration configuration) throws CircularDependencyException {
PosTaggedToken head = configuration.getBuffer().getFirst();
PosTaggedToken dependent = configuration.getStack().pop();
configuration.addDependency(head, dependent, label, this);
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class ArcEagerTransitionSystem method predictTransitions.
@Override
public void predictTransitions(ParseConfiguration configuration, Set<DependencyArc> targetDependencies) throws UnknownDependencyLabelException, NonPredictableParseTreeException, CircularDependencyException {
if (LOG.isDebugEnabled()) {
LOG.debug("predictTransitions");
LOG.debug(configuration.getSentence().getText().toString());
LOG.debug(configuration.toString());
LOG.debug(targetDependencies.toString());
}
Map<PosTaggedToken, DependencyArc> ungovernedTokens = new HashMap<PosTaggedToken, DependencyArc>();
for (DependencyArc arc : targetDependencies) {
if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0)) {
ungovernedTokens.put(arc.getDependent(), arc);
}
}
while (!configuration.getBuffer().isEmpty()) {
PosTaggedToken stackHead = configuration.getStack().peek();
PosTaggedToken bufferHead = configuration.getBuffer().peekFirst();
if (LOG.isTraceEnabled()) {
LOG.trace("S0: " + stackHead);
LOG.trace("B0: " + bufferHead);
}
Transition transition = null;
DependencyArc currentDep = null;
for (DependencyArc arc : targetDependencies) {
if (arc.getHead().equals(bufferHead) && arc.getDependent().equals(stackHead)) {
try {
transition = this.getTransitionForCode("LeftArc[" + arc.getLabel() + "]");
} catch (UnknownDependencyLabelException udle) {
throw new UnknownDependencyLabelException(arc.getDependent().getIndex(), arc.getLabel());
} catch (UnknownTransitionException e) {
// should never happen
LOG.error(e.getMessage(), e);
throw new RuntimeException(e);
}
currentDep = arc;
break;
}
if (arc.getHead().equals(stackHead) && arc.getDependent().equals(bufferHead)) {
try {
transition = this.getTransitionForCode("RightArc[" + arc.getLabel() + "]");
} catch (UnknownDependencyLabelException udle) {
throw new UnknownDependencyLabelException(arc.getDependent().getIndex(), arc.getLabel());
} catch (UnknownTransitionException e) {
// should never happen
LOG.error(e.getMessage(), e);
throw new RuntimeException(e);
}
currentDep = arc;
break;
}
}
if (transition == null) {
boolean stackHeadHasGovernor = configuration.getHead(stackHead) != null;
boolean stackHeadUngoverned = ungovernedTokens.containsKey(stackHead);
boolean stackHeadHasDependents = false;
if (stackHeadHasGovernor || stackHeadUngoverned) {
for (DependencyArc arc : targetDependencies) {
if (arc.getHead().equals(stackHead)) {
stackHeadHasDependents = true;
break;
}
}
}
if (!stackHeadHasDependents) {
if (stackHeadHasGovernor) {
try {
transition = this.getTransitionForCode("Reduce");
} catch (UnknownTransitionException e) {
// should never happen
throw new RuntimeException(e);
}
} else if (stackHeadUngoverned) {
// ungoverned punctuation only
try {
transition = this.getTransitionForCode("ForceReduce");
} catch (UnknownTransitionException e) {
// should never happen
throw new RuntimeException(e);
}
currentDep = ungovernedTokens.get(stackHead);
}
}
}
if (transition == null) {
try {
transition = this.getTransitionForCode("Shift");
} catch (UnknownTransitionException e) {
// should never happen
throw new RuntimeException(e);
}
}
if (currentDep != null)
targetDependencies.remove(currentDep);
try {
transition.apply(configuration);
} catch (InvalidTransitionException e) {
// should never happen
LOG.error("Should never happen", e);
throw new RuntimeException(e);
}
if (LOG.isTraceEnabled()) {
LOG.trace("Transition: " + transition);
LOG.trace("Configuration: " + configuration);
}
}
if (targetDependencies.size() > 0) {
throw new NonPredictableParseTreeException("Wasn't able to predict: " + targetDependencies);
}
LOG.debug("Full prediction complete");
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class CorpusProjectifier method onNextParseConfiguration.
@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException {
List<DependencyArc> arcs = new ArrayList<DependencyArc>(parseConfiguration.getNonProjectiveDependencies());
NonProjectivePair pair = this.getNextPair(arcs);
if (pair != null) {
// set so that it stays untouched
for (DependencyArc arc : arcs) {
parseConfiguration.addManualNonProjectiveDependency(arc.getHead(), arc.getDependent(), arc.getLabel());
}
}
while (pair != null) {
PosTaggedToken newHead1 = null;
PosTaggedToken parent1 = parseConfiguration.getHead(pair.arc1.getHead());
int depIndex1 = pair.arc1.getDependent().getToken().getIndex();
int depthDelta1 = 1;
while (parent1 != null) {
int headIndex = parent1.getToken().getIndex();
int startIndex = headIndex < depIndex1 ? headIndex : depIndex1;
int endIndex = headIndex >= depIndex1 ? headIndex : depIndex1;
if (isProjective(startIndex, endIndex, pair.arc2)) {
newHead1 = parent1;
break;
}
parent1 = parseConfiguration.getHead(parent1);
depthDelta1++;
}
PosTaggedToken newHead2 = null;
PosTaggedToken parent2 = parseConfiguration.getHead(pair.arc2.getHead());
int depIndex2 = pair.arc2.getDependent().getToken().getIndex();
int depthDelta2 = 1;
while (parent2 != null) {
int headIndex = parent2.getToken().getIndex();
int startIndex = headIndex < depIndex2 ? headIndex : depIndex2;
int endIndex = headIndex >= depIndex2 ? headIndex : depIndex2;
if (isProjective(startIndex, endIndex, pair.arc2)) {
newHead2 = parent2;
break;
}
parent2 = parseConfiguration.getHead(parent2);
depthDelta2++;
}
if (newHead1 != null && newHead2 != null) {
int linearDistance1 = Math.abs(newHead1.getIndex() - depIndex1);
int linearDistance2 = Math.abs(newHead2.getIndex() - depIndex2);
int rootDepthDelta1 = 0;
PosTaggedToken parent = parseConfiguration.getHead(newHead1);
while (parent != null) {
rootDepthDelta1++;
parent = parseConfiguration.getHead(parent);
}
int rootDepthDelta2 = 0;
parent = parseConfiguration.getHead(newHead2);
while (parent != null) {
rootDepthDelta2++;
parent = parseConfiguration.getHead(parent);
}
switch(strategy) {
case LeastLinearDistance:
if (linearDistance1 < linearDistance2) {
newHead2 = null;
break;
} else if (linearDistance2 < linearDistance1) {
newHead1 = null;
break;
}
// break left out on purpose
case LeastDepthDifference:
if (depthDelta1 < depthDelta2) {
newHead2 = null;
break;
} else if (depthDelta2 < depthDelta1) {
newHead1 = null;
break;
}
// break left out on purpose
case GreatestDepth:
if (rootDepthDelta1 < rootDepthDelta2) {
newHead1 = null;
break;
} else {
newHead2 = null;
break;
}
}
}
if (newHead1 != null && newHead2 == null) {
parseConfiguration.removeDependency(pair.arc1);
String newLabel = pair.arc1.getLabel();
if (this.nonProjectiveArcSuffix.length() > 0 && !newLabel.endsWith(this.nonProjectiveArcSuffix))
newLabel += this.nonProjectiveArcSuffix;
parseConfiguration.addDependency(newHead1, pair.arc1.getDependent(), newLabel, null);
// for the other arc, copy the non-projective version, in case
// there is an attempt at manual projectivisation
DependencyArc otherProjArc = parseConfiguration.getGoverningDependency(pair.arc2.getDependent());
parseConfiguration.removeDependency(otherProjArc);
parseConfiguration.addDependency(pair.arc2.getHead(), pair.arc2.getDependent(), pair.arc2.getLabel(), null);
} else if (newHead1 == null && newHead2 != null) {
parseConfiguration.removeDependency(pair.arc2);
String newLabel = pair.arc2.getLabel();
if (this.nonProjectiveArcSuffix.length() > 0 && !newLabel.endsWith(this.nonProjectiveArcSuffix))
newLabel += this.nonProjectiveArcSuffix;
parseConfiguration.addDependency(newHead2, pair.arc2.getDependent(), newLabel, null);
// for the other arc, copy the non-projective version, in case
// there is an attempt at manual projectivisation
DependencyArc otherProjArc = parseConfiguration.getGoverningDependency(pair.arc1.getDependent());
parseConfiguration.removeDependency(otherProjArc);
parseConfiguration.addDependency(pair.arc1.getHead(), pair.arc1.getDependent(), pair.arc1.getLabel(), null);
} else {
throw new TalismaneException("Cannot deprojectify " + pair + ". Could not find projective parents.");
}
parseConfiguration.clearMemory();
arcs = new ArrayList<DependencyArc>(parseConfiguration.getDependencies());
pair = this.getNextPair(arcs);
}
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class StandoffReader method hasNextSentence.
@Override
public boolean hasNextSentence() throws TalismaneException, IOException {
if (this.getMaxSentenceCount() > 0 && sentenceCount >= this.getMaxSentenceCount()) {
// we've reached the end, do nothing
} else {
if (configuration == null && sentenceIndex < sentences.size()) {
List<StandoffToken> tokens = sentences.get(sentenceIndex++);
LinguisticRules rules = TalismaneSession.get(sessionId).getLinguisticRules();
if (rules == null)
throw new RuntimeException("Linguistic rules have not been set.");
String text = "";
for (StandoffToken standoffToken : tokens) {
String word = standoffToken.text;
if (rules.shouldAddSpace(text, word))
text += " ";
text += word;
}
Sentence sentence = new Sentence(text, sessionId);
for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) {
annotator.annotate(sentence);
}
PretokenisedSequence tokenSequence = new PretokenisedSequence(sentence, sessionId);
PosTagSequence posTagSequence = new PosTagSequence(tokenSequence);
Map<String, PosTaggedToken> idTokenMap = new HashMap<String, PosTaggedToken>();
for (StandoffToken standoffToken : tokens) {
Token token = tokenSequence.addToken(standoffToken.text);
Decision posTagDecision = new Decision(standoffToken.posTag.getCode());
PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
if (LOG.isTraceEnabled()) {
LOG.trace(posTaggedToken.toString());
}
posTaggedToken.setComment(standoffToken.comment);
posTagSequence.addPosTaggedToken(posTaggedToken);
idTokenMap.put(standoffToken.id, posTaggedToken);
LOG.debug("Found token " + standoffToken.id + ", " + posTaggedToken);
}
tokenSequence.setWithRoot(true);
configuration = new ParseConfiguration(posTagSequence);
for (StandoffToken standoffToken : tokens) {
StandoffRelation relation = relationMap.get(standoffToken.id);
if (relation != null) {
PosTaggedToken head = idTokenMap.get(relation.fromToken);
PosTaggedToken dependent = idTokenMap.get(relation.toToken);
if (head == null) {
throw new TalismaneException("No token found for head id: " + relation.fromToken);
}
if (dependent == null) {
throw new TalismaneException("No token found for dependent id: " + relation.toToken);
}
DependencyArc arc = configuration.addDependency(head, dependent, relation.label, null);
arc.setComment(relation.comment);
} else if (standoffToken.posTag.getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION) {
if (punctuationDepLabel != null) {
PosTaggedToken dependent = idTokenMap.get(standoffToken.id);
for (int i = dependent.getIndex() - 1; i >= 0; i--) {
PosTaggedToken head = posTagSequence.get(i);
if (head.getTag().getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION)
continue;
configuration.addDependency(head, dependent, punctuationDepLabel, null);
break;
}
}
}
}
}
}
return (configuration != null);
}
Aggregations