Search in sources :

Example 71 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class LeftArcEagerTransition method checkPreconditions.

@Override
public boolean checkPreconditions(ParseConfiguration configuration) {
    if (configuration.getBuffer().isEmpty() || configuration.getStack().isEmpty()) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("Cannot apply " + this.toString() + ": buffer or stack is empty");
        }
        return false;
    }
    // left arc cannot be applied to the root
    PosTaggedToken topOfStack = configuration.getStack().peek();
    if (topOfStack.getTag().equals(PosTag.ROOT_POS_TAG)) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("Cannot apply " + this.toString() + ": top-of-stack is ROOT");
        }
        return false;
    }
    // the top-of-stack must not yet have a governor
    PosTaggedToken governor = configuration.getHead(topOfStack);
    if (governor != null) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("Cannot apply " + this.toString() + ": top of stack " + topOfStack + " already has governor " + governor);
        }
        return false;
    }
    return true;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken)

Example 72 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class LeftArcTransition method applyInternal.

@Override
protected void applyInternal(ParseConfiguration configuration) throws CircularDependencyException {
    PosTaggedToken head = configuration.getBuffer().getFirst();
    PosTaggedToken dependent = configuration.getStack().pop();
    configuration.addDependency(head, dependent, label, this);
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken)

Example 73 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class ArcEagerTransitionSystem method predictTransitions.

@Override
public void predictTransitions(ParseConfiguration configuration, Set<DependencyArc> targetDependencies) throws UnknownDependencyLabelException, NonPredictableParseTreeException, CircularDependencyException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("predictTransitions");
        LOG.debug(configuration.getSentence().getText().toString());
        LOG.debug(configuration.toString());
        LOG.debug(targetDependencies.toString());
    }
    Map<PosTaggedToken, DependencyArc> ungovernedTokens = new HashMap<PosTaggedToken, DependencyArc>();
    for (DependencyArc arc : targetDependencies) {
        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0)) {
            ungovernedTokens.put(arc.getDependent(), arc);
        }
    }
    while (!configuration.getBuffer().isEmpty()) {
        PosTaggedToken stackHead = configuration.getStack().peek();
        PosTaggedToken bufferHead = configuration.getBuffer().peekFirst();
        if (LOG.isTraceEnabled()) {
            LOG.trace("S0: " + stackHead);
            LOG.trace("B0: " + bufferHead);
        }
        Transition transition = null;
        DependencyArc currentDep = null;
        for (DependencyArc arc : targetDependencies) {
            if (arc.getHead().equals(bufferHead) && arc.getDependent().equals(stackHead)) {
                try {
                    transition = this.getTransitionForCode("LeftArc[" + arc.getLabel() + "]");
                } catch (UnknownDependencyLabelException udle) {
                    throw new UnknownDependencyLabelException(arc.getDependent().getIndex(), arc.getLabel());
                } catch (UnknownTransitionException e) {
                    // should never happen
                    LOG.error(e.getMessage(), e);
                    throw new RuntimeException(e);
                }
                currentDep = arc;
                break;
            }
            if (arc.getHead().equals(stackHead) && arc.getDependent().equals(bufferHead)) {
                try {
                    transition = this.getTransitionForCode("RightArc[" + arc.getLabel() + "]");
                } catch (UnknownDependencyLabelException udle) {
                    throw new UnknownDependencyLabelException(arc.getDependent().getIndex(), arc.getLabel());
                } catch (UnknownTransitionException e) {
                    // should never happen
                    LOG.error(e.getMessage(), e);
                    throw new RuntimeException(e);
                }
                currentDep = arc;
                break;
            }
        }
        if (transition == null) {
            boolean stackHeadHasGovernor = configuration.getHead(stackHead) != null;
            boolean stackHeadUngoverned = ungovernedTokens.containsKey(stackHead);
            boolean stackHeadHasDependents = false;
            if (stackHeadHasGovernor || stackHeadUngoverned) {
                for (DependencyArc arc : targetDependencies) {
                    if (arc.getHead().equals(stackHead)) {
                        stackHeadHasDependents = true;
                        break;
                    }
                }
            }
            if (!stackHeadHasDependents) {
                if (stackHeadHasGovernor) {
                    try {
                        transition = this.getTransitionForCode("Reduce");
                    } catch (UnknownTransitionException e) {
                        // should never happen
                        throw new RuntimeException(e);
                    }
                } else if (stackHeadUngoverned) {
                    // ungoverned punctuation only
                    try {
                        transition = this.getTransitionForCode("ForceReduce");
                    } catch (UnknownTransitionException e) {
                        // should never happen
                        throw new RuntimeException(e);
                    }
                    currentDep = ungovernedTokens.get(stackHead);
                }
            }
        }
        if (transition == null) {
            try {
                transition = this.getTransitionForCode("Shift");
            } catch (UnknownTransitionException e) {
                // should never happen
                throw new RuntimeException(e);
            }
        }
        if (currentDep != null)
            targetDependencies.remove(currentDep);
        try {
            transition.apply(configuration);
        } catch (InvalidTransitionException e) {
            // should never happen
            LOG.error("Should never happen", e);
            throw new RuntimeException(e);
        }
        if (LOG.isTraceEnabled()) {
            LOG.trace("Transition: " + transition);
            LOG.trace("Configuration: " + configuration);
        }
    }
    if (targetDependencies.size() > 0) {
        throw new NonPredictableParseTreeException("Wasn't able to predict: " + targetDependencies);
    }
    LOG.debug("Full prediction complete");
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) HashMap(java.util.HashMap)

Example 74 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class CorpusProjectifier method onNextParseConfiguration.

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException {
    List<DependencyArc> arcs = new ArrayList<DependencyArc>(parseConfiguration.getNonProjectiveDependencies());
    NonProjectivePair pair = this.getNextPair(arcs);
    if (pair != null) {
        // set so that it stays untouched
        for (DependencyArc arc : arcs) {
            parseConfiguration.addManualNonProjectiveDependency(arc.getHead(), arc.getDependent(), arc.getLabel());
        }
    }
    while (pair != null) {
        PosTaggedToken newHead1 = null;
        PosTaggedToken parent1 = parseConfiguration.getHead(pair.arc1.getHead());
        int depIndex1 = pair.arc1.getDependent().getToken().getIndex();
        int depthDelta1 = 1;
        while (parent1 != null) {
            int headIndex = parent1.getToken().getIndex();
            int startIndex = headIndex < depIndex1 ? headIndex : depIndex1;
            int endIndex = headIndex >= depIndex1 ? headIndex : depIndex1;
            if (isProjective(startIndex, endIndex, pair.arc2)) {
                newHead1 = parent1;
                break;
            }
            parent1 = parseConfiguration.getHead(parent1);
            depthDelta1++;
        }
        PosTaggedToken newHead2 = null;
        PosTaggedToken parent2 = parseConfiguration.getHead(pair.arc2.getHead());
        int depIndex2 = pair.arc2.getDependent().getToken().getIndex();
        int depthDelta2 = 1;
        while (parent2 != null) {
            int headIndex = parent2.getToken().getIndex();
            int startIndex = headIndex < depIndex2 ? headIndex : depIndex2;
            int endIndex = headIndex >= depIndex2 ? headIndex : depIndex2;
            if (isProjective(startIndex, endIndex, pair.arc2)) {
                newHead2 = parent2;
                break;
            }
            parent2 = parseConfiguration.getHead(parent2);
            depthDelta2++;
        }
        if (newHead1 != null && newHead2 != null) {
            int linearDistance1 = Math.abs(newHead1.getIndex() - depIndex1);
            int linearDistance2 = Math.abs(newHead2.getIndex() - depIndex2);
            int rootDepthDelta1 = 0;
            PosTaggedToken parent = parseConfiguration.getHead(newHead1);
            while (parent != null) {
                rootDepthDelta1++;
                parent = parseConfiguration.getHead(parent);
            }
            int rootDepthDelta2 = 0;
            parent = parseConfiguration.getHead(newHead2);
            while (parent != null) {
                rootDepthDelta2++;
                parent = parseConfiguration.getHead(parent);
            }
            switch(strategy) {
                case LeastLinearDistance:
                    if (linearDistance1 < linearDistance2) {
                        newHead2 = null;
                        break;
                    } else if (linearDistance2 < linearDistance1) {
                        newHead1 = null;
                        break;
                    }
                // break left out on purpose
                case LeastDepthDifference:
                    if (depthDelta1 < depthDelta2) {
                        newHead2 = null;
                        break;
                    } else if (depthDelta2 < depthDelta1) {
                        newHead1 = null;
                        break;
                    }
                // break left out on purpose
                case GreatestDepth:
                    if (rootDepthDelta1 < rootDepthDelta2) {
                        newHead1 = null;
                        break;
                    } else {
                        newHead2 = null;
                        break;
                    }
            }
        }
        if (newHead1 != null && newHead2 == null) {
            parseConfiguration.removeDependency(pair.arc1);
            String newLabel = pair.arc1.getLabel();
            if (this.nonProjectiveArcSuffix.length() > 0 && !newLabel.endsWith(this.nonProjectiveArcSuffix))
                newLabel += this.nonProjectiveArcSuffix;
            parseConfiguration.addDependency(newHead1, pair.arc1.getDependent(), newLabel, null);
            // for the other arc, copy the non-projective version, in case
            // there is an attempt at manual projectivisation
            DependencyArc otherProjArc = parseConfiguration.getGoverningDependency(pair.arc2.getDependent());
            parseConfiguration.removeDependency(otherProjArc);
            parseConfiguration.addDependency(pair.arc2.getHead(), pair.arc2.getDependent(), pair.arc2.getLabel(), null);
        } else if (newHead1 == null && newHead2 != null) {
            parseConfiguration.removeDependency(pair.arc2);
            String newLabel = pair.arc2.getLabel();
            if (this.nonProjectiveArcSuffix.length() > 0 && !newLabel.endsWith(this.nonProjectiveArcSuffix))
                newLabel += this.nonProjectiveArcSuffix;
            parseConfiguration.addDependency(newHead2, pair.arc2.getDependent(), newLabel, null);
            // for the other arc, copy the non-projective version, in case
            // there is an attempt at manual projectivisation
            DependencyArc otherProjArc = parseConfiguration.getGoverningDependency(pair.arc1.getDependent());
            parseConfiguration.removeDependency(otherProjArc);
            parseConfiguration.addDependency(pair.arc1.getHead(), pair.arc1.getDependent(), pair.arc1.getLabel(), null);
        } else {
            throw new TalismaneException("Cannot deprojectify " + pair + ". Could not find projective parents.");
        }
        parseConfiguration.clearMemory();
        arcs = new ArrayList<DependencyArc>(parseConfiguration.getDependencies());
        pair = this.getNextPair(arcs);
    }
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) DependencyArc(com.joliciel.talismane.parser.DependencyArc)

Example 75 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class StandoffReader method hasNextSentence.

@Override
public boolean hasNextSentence() throws TalismaneException, IOException {
    if (this.getMaxSentenceCount() > 0 && sentenceCount >= this.getMaxSentenceCount()) {
    // we've reached the end, do nothing
    } else {
        if (configuration == null && sentenceIndex < sentences.size()) {
            List<StandoffToken> tokens = sentences.get(sentenceIndex++);
            LinguisticRules rules = TalismaneSession.get(sessionId).getLinguisticRules();
            if (rules == null)
                throw new RuntimeException("Linguistic rules have not been set.");
            String text = "";
            for (StandoffToken standoffToken : tokens) {
                String word = standoffToken.text;
                if (rules.shouldAddSpace(text, word))
                    text += " ";
                text += word;
            }
            Sentence sentence = new Sentence(text, sessionId);
            for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) {
                annotator.annotate(sentence);
            }
            PretokenisedSequence tokenSequence = new PretokenisedSequence(sentence, sessionId);
            PosTagSequence posTagSequence = new PosTagSequence(tokenSequence);
            Map<String, PosTaggedToken> idTokenMap = new HashMap<String, PosTaggedToken>();
            for (StandoffToken standoffToken : tokens) {
                Token token = tokenSequence.addToken(standoffToken.text);
                Decision posTagDecision = new Decision(standoffToken.posTag.getCode());
                PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
                if (LOG.isTraceEnabled()) {
                    LOG.trace(posTaggedToken.toString());
                }
                posTaggedToken.setComment(standoffToken.comment);
                posTagSequence.addPosTaggedToken(posTaggedToken);
                idTokenMap.put(standoffToken.id, posTaggedToken);
                LOG.debug("Found token " + standoffToken.id + ", " + posTaggedToken);
            }
            tokenSequence.setWithRoot(true);
            configuration = new ParseConfiguration(posTagSequence);
            for (StandoffToken standoffToken : tokens) {
                StandoffRelation relation = relationMap.get(standoffToken.id);
                if (relation != null) {
                    PosTaggedToken head = idTokenMap.get(relation.fromToken);
                    PosTaggedToken dependent = idTokenMap.get(relation.toToken);
                    if (head == null) {
                        throw new TalismaneException("No token found for head id: " + relation.fromToken);
                    }
                    if (dependent == null) {
                        throw new TalismaneException("No token found for dependent id: " + relation.toToken);
                    }
                    DependencyArc arc = configuration.addDependency(head, dependent, relation.label, null);
                    arc.setComment(relation.comment);
                } else if (standoffToken.posTag.getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION) {
                    if (punctuationDepLabel != null) {
                        PosTaggedToken dependent = idTokenMap.get(standoffToken.id);
                        for (int i = dependent.getIndex() - 1; i >= 0; i--) {
                            PosTaggedToken head = posTagSequence.get(i);
                            if (head.getTag().getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION)
                                continue;
                            configuration.addDependency(head, dependent, punctuationDepLabel, null);
                            break;
                        }
                    }
                }
            }
        }
    }
    return (configuration != null);
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TalismaneException(com.joliciel.talismane.TalismaneException) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) Decision(com.joliciel.talismane.machineLearning.Decision) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) PretokenisedSequence(com.joliciel.talismane.tokeniser.PretokenisedSequence) LinguisticRules(com.joliciel.talismane.LinguisticRules) SentenceAnnotator(com.joliciel.talismane.sentenceAnnotators.SentenceAnnotator) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) DependencyArc(com.joliciel.talismane.parser.DependencyArc) Sentence(com.joliciel.talismane.rawText.Sentence)

Aggregations

PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)77 ParseConfiguration (com.joliciel.talismane.parser.ParseConfiguration)24 PosTaggedTokenWrapper (com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper)20 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)14 Token (com.joliciel.talismane.tokeniser.Token)11 DependencyArc (com.joliciel.talismane.parser.DependencyArc)9 TalismaneException (com.joliciel.talismane.TalismaneException)8 Decision (com.joliciel.talismane.machineLearning.Decision)8 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)8 Sentence (com.joliciel.talismane.rawText.Sentence)8 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)8 HashMap (java.util.HashMap)7 List (java.util.List)7 TalismaneTest (com.joliciel.talismane.TalismaneTest)6 PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)6 PosTaggerContextImpl (com.joliciel.talismane.posTagger.PosTaggerContextImpl)6 Config (com.typesafe.config.Config)6 ArrayList (java.util.ArrayList)6 Test (org.junit.Test)6 StringLiteralFeature (com.joliciel.talismane.machineLearning.features.StringLiteralFeature)5