use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class ParseTreeNode method getGapHeads.
/**
* Non-projectivity: for all tokens in the gaps of the current node's yield,
* returns the arcs governing the heads of the disjoint subtrees in these
* gaps.
*/
public List<DependencyArc> getGapHeads() {
List<DependencyArc> gapHeads = new ArrayList<>();
NavigableSet<PosTaggedToken> yield = this.getYield();
int i = yield.first().getIndex();
int j = yield.last().getIndex();
for (int k = i + 1; k < j; k++) {
PosTaggedToken other = this.parseTree.getPosTaggedTokens().get(k);
if (!yield.contains(other)) {
DependencyArc otherArc = this.parseTree.getGoverningArc(other);
if (otherArc != null) {
PosTaggedToken otherHead = otherArc.getHead();
if (otherHead.getIndex() < i || otherHead.getIndex() > j) {
gapHeads.add(otherArc);
}
}
}
}
return gapHeads;
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class ParseTreeNode method isProjective.
/**
* Is this node's yield projective - meaning is it a continuous interval with
* no gaps.
*/
public boolean isProjective() {
Set<PosTaggedToken> yield = this.getYield();
int currentIndex = -1;
boolean contiguous = true;
for (PosTaggedToken token : yield) {
if (currentIndex < 0) {
currentIndex = token.getIndex();
} else if (token.getIndex() == currentIndex + 1) {
currentIndex++;
} else {
contiguous = false;
break;
}
}
return contiguous;
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class ParserRegexBasedCorpusReader method processSentence.
@Override
protected void processSentence(Sentence sentence, List<CorpusLine> corpusLines) throws TalismaneException, IOException {
try {
super.processSentence(sentence, corpusLines);
PosTaggedToken rootToken = posTagSequence.prependRoot();
idTokenMap.put(0, rootToken);
TransitionSystem transitionSystem = TalismaneSession.get(sessionId).getTransitionSystem();
Set<DependencyArc> dependencies = new TreeSet<>();
for (CorpusLine dataLine : corpusLines) {
int headIndex = 0;
if (dataLine.hasElement(CorpusElement.GOVERNOR))
headIndex = Integer.parseInt(dataLine.getElement(CorpusElement.GOVERNOR));
PosTaggedToken head = idTokenMap.get(headIndex);
PosTaggedToken dependent = idTokenMap.get(dataLine.getIndex());
String dependencyLabel = dataLine.getElement(CorpusElement.LABEL);
if (transitionSystem.getDependencyLabels().size() > 1) {
if (dependencyLabel.length() > 0 && !transitionSystem.getDependencyLabels().contains(dependencyLabel)) {
throw new UnknownDependencyLabelException((this.getCurrentFile() == null ? "" : this.getCurrentFile().getPath()), dataLine.getLineNumber(), dependencyLabel);
}
String nonProjectiveLabel = dataLine.getElement(CorpusElement.NON_PROJ_LABEL);
if (nonProjectiveLabel != null && nonProjectiveLabel.length() > 0 && !transitionSystem.getDependencyLabels().contains(nonProjectiveLabel)) {
throw new UnknownDependencyLabelException((this.getCurrentFile() == null ? "" : this.getCurrentFile().getPath()), dataLine.getLineNumber(), nonProjectiveLabel);
}
}
DependencyArc arc = new DependencyArc(head, dependent, dependencyLabel);
if (LOG.isTraceEnabled())
LOG.trace(arc.toString());
dependencies.add(arc);
if (dataLine.hasElement(CorpusElement.DEP_COMMENT))
arc.setComment(dataLine.getElement(CorpusElement.DEP_COMMENT));
}
configuration = new ParseConfiguration(posTagSequence);
if (this.predictTransitions) {
transitionSystem.predictTransitions(configuration, dependencies);
} else {
for (DependencyArc arc : dependencies) {
configuration.addDependency(arc.getHead(), arc.getDependent(), arc.getLabel(), null);
}
}
// if there are any
if (this.getCorpusLineReader().hasPlaceholder(CorpusElement.NON_PROJ_GOVERNOR)) {
Set<DependencyArc> nonProjDeps = new TreeSet<>();
if (LOG.isTraceEnabled())
LOG.trace("Non projective dependencies: ");
for (CorpusLine dataLine : corpusLines) {
int headIndex = 0;
if (dataLine.hasElement(CorpusElement.NON_PROJ_GOVERNOR))
headIndex = Integer.parseInt(dataLine.getElement(CorpusElement.NON_PROJ_GOVERNOR));
PosTaggedToken head = idTokenMap.get(headIndex);
PosTaggedToken dependent = idTokenMap.get(dataLine.getIndex());
DependencyArc nonProjArc = new DependencyArc(head, dependent, dataLine.getElement(CorpusElement.NON_PROJ_LABEL));
if (LOG.isTraceEnabled())
LOG.trace(nonProjArc.toString());
nonProjDeps.add(nonProjArc);
if (dataLine.hasElement(CorpusElement.DEP_COMMENT))
nonProjArc.setComment(dataLine.getElement(CorpusElement.DEP_COMMENT));
}
for (DependencyArc nonProjArc : nonProjDeps) {
configuration.addManualNonProjectiveDependency(nonProjArc.getHead(), nonProjArc.getDependent(), nonProjArc.getLabel());
}
}
} catch (TalismaneException e) {
this.clearSentence();
throw e;
}
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class ReduceTransition method checkPreconditions.
@Override
public boolean checkPreconditions(ParseConfiguration configuration) {
if (configuration.getStack().isEmpty()) {
if (LOG.isTraceEnabled()) {
LOG.trace("Cannot apply " + this.toString() + ": stack is empty");
}
return false;
}
// top of stack must already have a governor
PosTaggedToken topOfStack = configuration.getStack().peek();
PosTaggedToken governor = configuration.getHead(topOfStack);
if (governor == null) {
if (LOG.isTraceEnabled()) {
LOG.trace("Cannot apply " + this.toString() + ": top of stack " + topOfStack + " doesn't yet have a governor.");
}
return false;
}
return true;
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class RightArcEagerTransition method checkPreconditions.
@Override
public boolean checkPreconditions(ParseConfiguration configuration) {
if (configuration.getBuffer().isEmpty() || configuration.getStack().isEmpty()) {
if (LOG.isTraceEnabled()) {
LOG.trace("Cannot apply " + this.toString() + ": buffer or stack is empty");
}
return false;
}
PosTaggedToken topOfBuffer = configuration.getBuffer().peekFirst();
// the top-of-buffer must not yet have a governor
PosTaggedToken governor = configuration.getHead(topOfBuffer);
if (governor != null) {
if (LOG.isTraceEnabled()) {
LOG.trace("Cannot apply " + this.toString() + ": top of buffer " + topOfBuffer + " already has governor " + governor);
}
return false;
}
return true;
}
Aggregations