Search in sources :

Example 6 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class ParseTreeNode method getGapHeads.

/**
 * Non-projectivity: for all tokens in the gaps of the current node's yield,
 * returns the arcs governing the heads of the disjoint subtrees in these
 * gaps.
 */
public List<DependencyArc> getGapHeads() {
    List<DependencyArc> gapHeads = new ArrayList<>();
    NavigableSet<PosTaggedToken> yield = this.getYield();
    int i = yield.first().getIndex();
    int j = yield.last().getIndex();
    for (int k = i + 1; k < j; k++) {
        PosTaggedToken other = this.parseTree.getPosTaggedTokens().get(k);
        if (!yield.contains(other)) {
            DependencyArc otherArc = this.parseTree.getGoverningArc(other);
            if (otherArc != null) {
                PosTaggedToken otherHead = otherArc.getHead();
                if (otherHead.getIndex() < i || otherHead.getIndex() > j) {
                    gapHeads.add(otherArc);
                }
            }
        }
    }
    return gapHeads;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken)

Example 7 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class ParseTreeNode method isProjective.

/**
 * Is this node's yield projective - meaning is it a continuous interval with
 * no gaps.
 */
public boolean isProjective() {
    Set<PosTaggedToken> yield = this.getYield();
    int currentIndex = -1;
    boolean contiguous = true;
    for (PosTaggedToken token : yield) {
        if (currentIndex < 0) {
            currentIndex = token.getIndex();
        } else if (token.getIndex() == currentIndex + 1) {
            currentIndex++;
        } else {
            contiguous = false;
            break;
        }
    }
    return contiguous;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken)

Example 8 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class ParserRegexBasedCorpusReader method processSentence.

@Override
protected void processSentence(Sentence sentence, List<CorpusLine> corpusLines) throws TalismaneException, IOException {
    try {
        super.processSentence(sentence, corpusLines);
        PosTaggedToken rootToken = posTagSequence.prependRoot();
        idTokenMap.put(0, rootToken);
        TransitionSystem transitionSystem = TalismaneSession.get(sessionId).getTransitionSystem();
        Set<DependencyArc> dependencies = new TreeSet<>();
        for (CorpusLine dataLine : corpusLines) {
            int headIndex = 0;
            if (dataLine.hasElement(CorpusElement.GOVERNOR))
                headIndex = Integer.parseInt(dataLine.getElement(CorpusElement.GOVERNOR));
            PosTaggedToken head = idTokenMap.get(headIndex);
            PosTaggedToken dependent = idTokenMap.get(dataLine.getIndex());
            String dependencyLabel = dataLine.getElement(CorpusElement.LABEL);
            if (transitionSystem.getDependencyLabels().size() > 1) {
                if (dependencyLabel.length() > 0 && !transitionSystem.getDependencyLabels().contains(dependencyLabel)) {
                    throw new UnknownDependencyLabelException((this.getCurrentFile() == null ? "" : this.getCurrentFile().getPath()), dataLine.getLineNumber(), dependencyLabel);
                }
                String nonProjectiveLabel = dataLine.getElement(CorpusElement.NON_PROJ_LABEL);
                if (nonProjectiveLabel != null && nonProjectiveLabel.length() > 0 && !transitionSystem.getDependencyLabels().contains(nonProjectiveLabel)) {
                    throw new UnknownDependencyLabelException((this.getCurrentFile() == null ? "" : this.getCurrentFile().getPath()), dataLine.getLineNumber(), nonProjectiveLabel);
                }
            }
            DependencyArc arc = new DependencyArc(head, dependent, dependencyLabel);
            if (LOG.isTraceEnabled())
                LOG.trace(arc.toString());
            dependencies.add(arc);
            if (dataLine.hasElement(CorpusElement.DEP_COMMENT))
                arc.setComment(dataLine.getElement(CorpusElement.DEP_COMMENT));
        }
        configuration = new ParseConfiguration(posTagSequence);
        if (this.predictTransitions) {
            transitionSystem.predictTransitions(configuration, dependencies);
        } else {
            for (DependencyArc arc : dependencies) {
                configuration.addDependency(arc.getHead(), arc.getDependent(), arc.getLabel(), null);
            }
        }
        // if there are any
        if (this.getCorpusLineReader().hasPlaceholder(CorpusElement.NON_PROJ_GOVERNOR)) {
            Set<DependencyArc> nonProjDeps = new TreeSet<>();
            if (LOG.isTraceEnabled())
                LOG.trace("Non projective dependencies: ");
            for (CorpusLine dataLine : corpusLines) {
                int headIndex = 0;
                if (dataLine.hasElement(CorpusElement.NON_PROJ_GOVERNOR))
                    headIndex = Integer.parseInt(dataLine.getElement(CorpusElement.NON_PROJ_GOVERNOR));
                PosTaggedToken head = idTokenMap.get(headIndex);
                PosTaggedToken dependent = idTokenMap.get(dataLine.getIndex());
                DependencyArc nonProjArc = new DependencyArc(head, dependent, dataLine.getElement(CorpusElement.NON_PROJ_LABEL));
                if (LOG.isTraceEnabled())
                    LOG.trace(nonProjArc.toString());
                nonProjDeps.add(nonProjArc);
                if (dataLine.hasElement(CorpusElement.DEP_COMMENT))
                    nonProjArc.setComment(dataLine.getElement(CorpusElement.DEP_COMMENT));
            }
            for (DependencyArc nonProjArc : nonProjDeps) {
                configuration.addManualNonProjectiveDependency(nonProjArc.getHead(), nonProjArc.getDependent(), nonProjArc.getLabel());
            }
        }
    } catch (TalismaneException e) {
        this.clearSentence();
        throw e;
    }
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) TalismaneException(com.joliciel.talismane.TalismaneException) TreeSet(java.util.TreeSet) CorpusLine(com.joliciel.talismane.corpus.CorpusLine)

Example 9 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class ReduceTransition method checkPreconditions.

@Override
public boolean checkPreconditions(ParseConfiguration configuration) {
    if (configuration.getStack().isEmpty()) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("Cannot apply " + this.toString() + ": stack is empty");
        }
        return false;
    }
    // top of stack must already have a governor
    PosTaggedToken topOfStack = configuration.getStack().peek();
    PosTaggedToken governor = configuration.getHead(topOfStack);
    if (governor == null) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("Cannot apply " + this.toString() + ": top of stack " + topOfStack + " doesn't yet have a governor.");
        }
        return false;
    }
    return true;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken)

Example 10 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class RightArcEagerTransition method checkPreconditions.

@Override
public boolean checkPreconditions(ParseConfiguration configuration) {
    if (configuration.getBuffer().isEmpty() || configuration.getStack().isEmpty()) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("Cannot apply " + this.toString() + ": buffer or stack is empty");
        }
        return false;
    }
    PosTaggedToken topOfBuffer = configuration.getBuffer().peekFirst();
    // the top-of-buffer must not yet have a governor
    PosTaggedToken governor = configuration.getHead(topOfBuffer);
    if (governor != null) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("Cannot apply " + this.toString() + ": top of buffer " + topOfBuffer + " already has governor " + governor);
        }
        return false;
    }
    return true;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken)

Aggregations

PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)77 ParseConfiguration (com.joliciel.talismane.parser.ParseConfiguration)24 PosTaggedTokenWrapper (com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper)20 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)14 Token (com.joliciel.talismane.tokeniser.Token)11 DependencyArc (com.joliciel.talismane.parser.DependencyArc)9 TalismaneException (com.joliciel.talismane.TalismaneException)8 Decision (com.joliciel.talismane.machineLearning.Decision)8 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)8 Sentence (com.joliciel.talismane.rawText.Sentence)8 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)8 HashMap (java.util.HashMap)7 List (java.util.List)7 TalismaneTest (com.joliciel.talismane.TalismaneTest)6 PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)6 PosTaggerContextImpl (com.joliciel.talismane.posTagger.PosTaggerContextImpl)6 Config (com.typesafe.config.Config)6 ArrayList (java.util.ArrayList)6 Test (org.junit.Test)6 StringLiteralFeature (com.joliciel.talismane.machineLearning.features.StringLiteralFeature)5