Search in sources :

Example 11 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class CorpusModifier method onNextParseConfiguration.

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws CircularDependencyException {
    List<DependencyArc> arcs = new ArrayList<DependencyArc>(parseConfiguration.getDependencies());
    for (DependencyArc arc : arcs) {
        for (ModifyCommand command : commands) {
            boolean applyCommand = true;
            if (!command.govPosTag.equals(WILDCARD) && !command.govPosTag.equals(arc.getHead().getTag().getCode())) {
                applyCommand = false;
            }
            if (!command.governor.equals(WILDCARD) && !command.governor.equals(arc.getHead().getToken().getOriginalText().toLowerCase())) {
                applyCommand = false;
            }
            if (!command.depPosTag.equals(WILDCARD) && !command.depPosTag.equals(arc.getDependent().getTag().getCode())) {
                applyCommand = false;
            }
            if (!command.dependent.equals(WILDCARD) && !command.dependent.equals(arc.getDependent().getToken().getOriginalText().toLowerCase())) {
                applyCommand = false;
            }
            if (!command.label.equals(WILDCARD) && !command.label.equals(arc.getLabel())) {
                applyCommand = false;
            }
            if (applyCommand) {
                parseConfiguration.removeDependency(arc);
                if (command.command == ModifyCommandType.Replace)
                    parseConfiguration.addDependency(arc.getHead(), arc.getDependent(), command.newLabel, null);
            }
        }
    }
    parseConfiguration.clearMemory();
}
Also used : ArrayList(java.util.ArrayList) DependencyArc(com.joliciel.talismane.parser.DependencyArc)

Example 12 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class CorpusProjectifier method onNextParseConfiguration.

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException {
    List<DependencyArc> arcs = new ArrayList<DependencyArc>(parseConfiguration.getNonProjectiveDependencies());
    NonProjectivePair pair = this.getNextPair(arcs);
    if (pair != null) {
        // set so that it stays untouched
        for (DependencyArc arc : arcs) {
            parseConfiguration.addManualNonProjectiveDependency(arc.getHead(), arc.getDependent(), arc.getLabel());
        }
    }
    while (pair != null) {
        PosTaggedToken newHead1 = null;
        PosTaggedToken parent1 = parseConfiguration.getHead(pair.arc1.getHead());
        int depIndex1 = pair.arc1.getDependent().getToken().getIndex();
        int depthDelta1 = 1;
        while (parent1 != null) {
            int headIndex = parent1.getToken().getIndex();
            int startIndex = headIndex < depIndex1 ? headIndex : depIndex1;
            int endIndex = headIndex >= depIndex1 ? headIndex : depIndex1;
            if (isProjective(startIndex, endIndex, pair.arc2)) {
                newHead1 = parent1;
                break;
            }
            parent1 = parseConfiguration.getHead(parent1);
            depthDelta1++;
        }
        PosTaggedToken newHead2 = null;
        PosTaggedToken parent2 = parseConfiguration.getHead(pair.arc2.getHead());
        int depIndex2 = pair.arc2.getDependent().getToken().getIndex();
        int depthDelta2 = 1;
        while (parent2 != null) {
            int headIndex = parent2.getToken().getIndex();
            int startIndex = headIndex < depIndex2 ? headIndex : depIndex2;
            int endIndex = headIndex >= depIndex2 ? headIndex : depIndex2;
            if (isProjective(startIndex, endIndex, pair.arc2)) {
                newHead2 = parent2;
                break;
            }
            parent2 = parseConfiguration.getHead(parent2);
            depthDelta2++;
        }
        if (newHead1 != null && newHead2 != null) {
            int linearDistance1 = Math.abs(newHead1.getIndex() - depIndex1);
            int linearDistance2 = Math.abs(newHead2.getIndex() - depIndex2);
            int rootDepthDelta1 = 0;
            PosTaggedToken parent = parseConfiguration.getHead(newHead1);
            while (parent != null) {
                rootDepthDelta1++;
                parent = parseConfiguration.getHead(parent);
            }
            int rootDepthDelta2 = 0;
            parent = parseConfiguration.getHead(newHead2);
            while (parent != null) {
                rootDepthDelta2++;
                parent = parseConfiguration.getHead(parent);
            }
            switch(strategy) {
                case LeastLinearDistance:
                    if (linearDistance1 < linearDistance2) {
                        newHead2 = null;
                        break;
                    } else if (linearDistance2 < linearDistance1) {
                        newHead1 = null;
                        break;
                    }
                // break left out on purpose
                case LeastDepthDifference:
                    if (depthDelta1 < depthDelta2) {
                        newHead2 = null;
                        break;
                    } else if (depthDelta2 < depthDelta1) {
                        newHead1 = null;
                        break;
                    }
                // break left out on purpose
                case GreatestDepth:
                    if (rootDepthDelta1 < rootDepthDelta2) {
                        newHead1 = null;
                        break;
                    } else {
                        newHead2 = null;
                        break;
                    }
            }
        }
        if (newHead1 != null && newHead2 == null) {
            parseConfiguration.removeDependency(pair.arc1);
            String newLabel = pair.arc1.getLabel();
            if (this.nonProjectiveArcSuffix.length() > 0 && !newLabel.endsWith(this.nonProjectiveArcSuffix))
                newLabel += this.nonProjectiveArcSuffix;
            parseConfiguration.addDependency(newHead1, pair.arc1.getDependent(), newLabel, null);
            // for the other arc, copy the non-projective version, in case
            // there is an attempt at manual projectivisation
            DependencyArc otherProjArc = parseConfiguration.getGoverningDependency(pair.arc2.getDependent());
            parseConfiguration.removeDependency(otherProjArc);
            parseConfiguration.addDependency(pair.arc2.getHead(), pair.arc2.getDependent(), pair.arc2.getLabel(), null);
        } else if (newHead1 == null && newHead2 != null) {
            parseConfiguration.removeDependency(pair.arc2);
            String newLabel = pair.arc2.getLabel();
            if (this.nonProjectiveArcSuffix.length() > 0 && !newLabel.endsWith(this.nonProjectiveArcSuffix))
                newLabel += this.nonProjectiveArcSuffix;
            parseConfiguration.addDependency(newHead2, pair.arc2.getDependent(), newLabel, null);
            // for the other arc, copy the non-projective version, in case
            // there is an attempt at manual projectivisation
            DependencyArc otherProjArc = parseConfiguration.getGoverningDependency(pair.arc1.getDependent());
            parseConfiguration.removeDependency(otherProjArc);
            parseConfiguration.addDependency(pair.arc1.getHead(), pair.arc1.getDependent(), pair.arc1.getLabel(), null);
        } else {
            throw new TalismaneException("Cannot deprojectify " + pair + ". Could not find projective parents.");
        }
        parseConfiguration.clearMemory();
        arcs = new ArrayList<DependencyArc>(parseConfiguration.getDependencies());
        pair = this.getNextPair(arcs);
    }
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) DependencyArc(com.joliciel.talismane.parser.DependencyArc)

Example 13 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class CorpusProjectifier method getNextPair.

private NonProjectivePair getNextPair(List<DependencyArc> arcs) {
    NonProjectivePair pair = null;
    DependencyArc arc = null;
    DependencyArc otherArc = null;
    for (int i = 0; i < arcs.size(); i++) {
        arc = arcs.get(i);
        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0))
            continue;
        int headIndex = arc.getHead().getToken().getIndex();
        int depIndex = arc.getDependent().getToken().getIndex();
        int startIndex = headIndex < depIndex ? headIndex : depIndex;
        int endIndex = headIndex >= depIndex ? headIndex : depIndex;
        for (int j = i + 1; j < arcs.size(); j++) {
            otherArc = arcs.get(j);
            if (otherArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (otherArc.getLabel() == null || otherArc.getLabel().length() == 0))
                continue;
            if (!isProjective(startIndex, endIndex, otherArc)) {
                pair = new NonProjectivePair(arc, otherArc);
                break;
            }
        }
        if (pair != null)
            break;
    }
    return pair;
}
Also used : DependencyArc(com.joliciel.talismane.parser.DependencyArc)

Example 14 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class StandoffReader method hasNextSentence.

@Override
public boolean hasNextSentence() throws TalismaneException, IOException {
    if (this.getMaxSentenceCount() > 0 && sentenceCount >= this.getMaxSentenceCount()) {
    // we've reached the end, do nothing
    } else {
        if (configuration == null && sentenceIndex < sentences.size()) {
            List<StandoffToken> tokens = sentences.get(sentenceIndex++);
            LinguisticRules rules = TalismaneSession.get(sessionId).getLinguisticRules();
            if (rules == null)
                throw new RuntimeException("Linguistic rules have not been set.");
            String text = "";
            for (StandoffToken standoffToken : tokens) {
                String word = standoffToken.text;
                if (rules.shouldAddSpace(text, word))
                    text += " ";
                text += word;
            }
            Sentence sentence = new Sentence(text, sessionId);
            for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) {
                annotator.annotate(sentence);
            }
            PretokenisedSequence tokenSequence = new PretokenisedSequence(sentence, sessionId);
            PosTagSequence posTagSequence = new PosTagSequence(tokenSequence);
            Map<String, PosTaggedToken> idTokenMap = new HashMap<String, PosTaggedToken>();
            for (StandoffToken standoffToken : tokens) {
                Token token = tokenSequence.addToken(standoffToken.text);
                Decision posTagDecision = new Decision(standoffToken.posTag.getCode());
                PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
                if (LOG.isTraceEnabled()) {
                    LOG.trace(posTaggedToken.toString());
                }
                posTaggedToken.setComment(standoffToken.comment);
                posTagSequence.addPosTaggedToken(posTaggedToken);
                idTokenMap.put(standoffToken.id, posTaggedToken);
                LOG.debug("Found token " + standoffToken.id + ", " + posTaggedToken);
            }
            tokenSequence.setWithRoot(true);
            configuration = new ParseConfiguration(posTagSequence);
            for (StandoffToken standoffToken : tokens) {
                StandoffRelation relation = relationMap.get(standoffToken.id);
                if (relation != null) {
                    PosTaggedToken head = idTokenMap.get(relation.fromToken);
                    PosTaggedToken dependent = idTokenMap.get(relation.toToken);
                    if (head == null) {
                        throw new TalismaneException("No token found for head id: " + relation.fromToken);
                    }
                    if (dependent == null) {
                        throw new TalismaneException("No token found for dependent id: " + relation.toToken);
                    }
                    DependencyArc arc = configuration.addDependency(head, dependent, relation.label, null);
                    arc.setComment(relation.comment);
                } else if (standoffToken.posTag.getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION) {
                    if (punctuationDepLabel != null) {
                        PosTaggedToken dependent = idTokenMap.get(standoffToken.id);
                        for (int i = dependent.getIndex() - 1; i >= 0; i--) {
                            PosTaggedToken head = posTagSequence.get(i);
                            if (head.getTag().getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION)
                                continue;
                            configuration.addDependency(head, dependent, punctuationDepLabel, null);
                            break;
                        }
                    }
                }
            }
        }
    }
    return (configuration != null);
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TalismaneException(com.joliciel.talismane.TalismaneException) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) Decision(com.joliciel.talismane.machineLearning.Decision) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) PretokenisedSequence(com.joliciel.talismane.tokeniser.PretokenisedSequence) LinguisticRules(com.joliciel.talismane.LinguisticRules) SentenceAnnotator(com.joliciel.talismane.sentenceAnnotators.SentenceAnnotator) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) DependencyArc(com.joliciel.talismane.parser.DependencyArc) Sentence(com.joliciel.talismane.rawText.Sentence)

Aggregations

DependencyArc (com.joliciel.talismane.parser.DependencyArc)14 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)9 ParseConfiguration (com.joliciel.talismane.parser.ParseConfiguration)7 TalismaneException (com.joliciel.talismane.TalismaneException)4 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Token (com.joliciel.talismane.tokeniser.Token)3 HashSet (java.util.HashSet)3 Transition (com.joliciel.talismane.parser.Transition)2 LinguisticRules (com.joliciel.talismane.LinguisticRules)1 CorpusLine (com.joliciel.talismane.corpus.CorpusLine)1 CorpusElement (com.joliciel.talismane.corpus.CorpusLine.CorpusElement)1 Decision (com.joliciel.talismane.machineLearning.Decision)1 ParseTree (com.joliciel.talismane.parser.ParseTree)1 ParseTreeNode (com.joliciel.talismane.parser.ParseTreeNode)1 ParseConfigurationOutput (com.joliciel.talismane.parser.output.ParseConfigurationOutput)1 PosTaggedTokenWrapper (com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper)1 Sentence (com.joliciel.talismane.rawText.Sentence)1 SentenceAnnotator (com.joliciel.talismane.sentenceAnnotators.SentenceAnnotator)1