Search in sources :

Example 1 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class ParseOutputRewriter method getCorpusLines.

List<CorpusLine> getCorpusLines(ParseConfiguration parseConfiguration) throws TalismaneException {
    // first convert the parse configuration to a list of corpus lines
    List<CorpusLine> corpusLines = new ArrayList<>();
    for (PosTaggedToken posTaggedToken : parseConfiguration.getPosTagSequence()) {
        if (!posTaggedToken.isRoot()) {
            DependencyArc arc = parseConfiguration.getGoverningDependency(posTaggedToken);
            DependencyArc nonProjArc = parseConfiguration.getGoverningDependency(posTaggedToken, false);
            String line = posTaggedToken.getIndex() + "\t" + posTaggedToken.getToken().getOriginalText() + "\t" + posTaggedToken.getLemmaForCoNLL() + "\t" + posTaggedToken.getTag().getCode() + "\t" + posTaggedToken.getTag().getCode() + "\t" + posTaggedToken.getMorphologyForCoNLL() + "\t" + (arc != null ? arc.getHead().getIndex() : 0) + "\t" + (arc != null ? arc.getLabel() : "_");
            CorpusLine corpusLine = new CorpusLine(line, posTaggedToken.getToken().getLineNumber());
            corpusLine.setIndex(posTaggedToken.getIndex());
            corpusLine.setToken(posTaggedToken.getToken().getOriginalText());
            corpusLine.setLemma(posTaggedToken.getLemmaForCoNLL());
            corpusLine.setPosTag(posTaggedToken.getTag().getCode());
            String morphology = posTaggedToken.getMorphologyForCoNLL();
            corpusLine.setMorphology(morphology.length() == 0 ? "_" : morphology);
            corpusLine.setGovernorIndex(arc != null ? arc.getHead().getIndex() : 0);
            corpusLine.setLabel(arc != null ? arc.getLabel() : "_");
            corpusLine.setNonProjGovernorIndex(nonProjArc != null ? nonProjArc.getHead().getIndex() : 0);
            corpusLine.setNonProjLabel(nonProjArc != null ? nonProjArc.getLabel() : "_");
            if (posTaggedToken.getToken().getPrecedingRawOutput() != null)
                corpusLine.setElement(CorpusElement.PRECEDING_RAW_OUTPUT, posTaggedToken.getToken().getPrecedingRawOutput());
            if (posTaggedToken.getToken().getTrailingRawOutput() != null)
                corpusLine.setElement(CorpusElement.TRAILING_RAW_OUTPUT, posTaggedToken.getToken().getTrailingRawOutput());
            corpusLine.setTokenProbability(posTaggedToken.getToken().getProbability());
            corpusLine.setPosTagProbability(posTaggedToken.getProbability());
            if (arc != null)
                corpusLine.setParseProbability(arc.getProbability());
            corpusLines.add(corpusLine);
        }
    }
    Map<CorpusLine, SplitAction> splitActions = new HashMap<>();
    for (CorpusLine corpusLine : corpusLines) {
        if (LOG.isDebugEnabled())
            LOG.debug(corpusLine.toString());
        for (RewriteRule rewriteRule : rewriteRules) {
            boolean matches = true;
            conditionLoop: for (CorpusElement corpusElement : rewriteRule.conditions.keySet()) {
                Pattern pattern = rewriteRule.conditions.get(corpusElement);
                if (LOG.isTraceEnabled())
                    LOG.trace("For " + corpusElement.name() + ", matching " + pattern.pattern());
                switch(corpusElement) {
                    case POSTAG:
                        if (!pattern.matcher(corpusLine.getPosTag()).matches()) {
                            if (LOG.isTraceEnabled())
                                LOG.trace("Match failed for " + corpusLine.getPosTag());
                            matches = false;
                            break conditionLoop;
                        }
                        break;
                    case TOKEN:
                        if (!pattern.matcher(corpusLine.getToken()).matches()) {
                            matches = false;
                            break conditionLoop;
                        }
                        break;
                    case LEMMA:
                        if (!pattern.matcher(corpusLine.getLemma()).matches()) {
                            matches = false;
                            break conditionLoop;
                        }
                        break;
                    case LABEL:
                        if (!pattern.matcher(corpusLine.getLabel()).matches()) {
                            matches = false;
                            break conditionLoop;
                        }
                        break;
                    default:
                        throw new TalismaneException(ParseOutputRewriter.class.getSimpleName() + " cannot match on " + corpusElement.name());
                }
            }
            if (matches) {
                if (rewriteRule.action instanceof SplitAction) {
                    SplitAction splitAction = (SplitAction) rewriteRule.action;
                    splitActions.put(corpusLine, splitAction);
                }
            }
        }
    }
    if (splitActions.size() > 0) {
        List<CorpusLine> newCorpusLines = new ArrayList<>();
        Map<Integer, Integer> oldToNewIndexMap = new HashMap<>();
        oldToNewIndexMap.put(0, 0);
        int currentIndex = 1;
        for (int i = 0; i < corpusLines.size(); i++) {
            CorpusLine corpusLine = corpusLines.get(i);
            oldToNewIndexMap.put(i + 1, currentIndex);
            if (splitActions.containsKey(corpusLine)) {
                SplitAction splitAction = splitActions.get(corpusLine);
                currentIndex += splitAction.elementValues.size();
            } else {
                currentIndex++;
            }
        }
        for (int i = 0; i < corpusLines.size(); i++) {
            CorpusLine corpusLine = corpusLines.get(i);
            CorpusLine newCorpusLine = corpusLine.cloneCorpusLine();
            newCorpusLine.setIndex(oldToNewIndexMap.get(corpusLine.getIndex()));
            newCorpusLine.setGovernorIndex(oldToNewIndexMap.get(corpusLine.getGovernorIndex()));
            newCorpusLine.setNonProjGovernorIndex(oldToNewIndexMap.get(corpusLine.getNonProjGovernorIndex()));
            if (splitActions.containsKey(corpusLine)) {
                SplitAction splitAction = splitActions.get(corpusLine);
                for (int j = 0; j < splitAction.elementValues.size(); j++) {
                    CorpusLine splitCorpusLine = new CorpusLine(corpusLine.getLine(), corpusLine.getLineNumber());
                    splitCorpusLine.setIndex(oldToNewIndexMap.get(corpusLine.getIndex()) + j);
                    Map<CorpusElement, String> elementValues = splitAction.elementValues.get(j);
                    this.setElementValues(elementValues, oldToNewIndexMap, newCorpusLine, splitCorpusLine);
                    // The first matching element in each group will be applied
                    // The default element marks the end of each group, and will be
                    // applied if no other match has applied.
                    List<ConditionalAction> conditionalActions = splitAction.conditionalValues.get(j);
                    boolean groupHasMatch = false;
                    for (ConditionalAction conditionalAction : conditionalActions) {
                        CorpusLine baseLine = corpusLines.get(i + conditionalAction.relativeIndex);
                        if (conditionalAction.isDefault) {
                            if (!groupHasMatch) {
                                Map<CorpusElement, String> conditionalElementValues = conditionalAction.elementValues;
                                this.setElementValues(conditionalElementValues, oldToNewIndexMap, newCorpusLine, splitCorpusLine);
                            }
                            // The default action marks the end of each matching group.
                            groupHasMatch = false;
                        } else {
                            boolean match = true;
                            for (CorpusElement corpusElement : conditionalAction.conditions.keySet()) {
                                String origValue = baseLine.getElement(corpusElement);
                                Pattern pattern = conditionalAction.conditions.get(corpusElement);
                                if (!pattern.matcher(origValue).matches()) {
                                    match = false;
                                    break;
                                }
                            }
                            if (match) {
                                Map<CorpusElement, String> conditionalElementValues = conditionalAction.elementValues;
                                this.setElementValues(conditionalElementValues, oldToNewIndexMap, newCorpusLine, splitCorpusLine);
                                groupHasMatch = true;
                            }
                        // did this action match?
                        }
                    // default action?
                    }
                    // next conditional action
                    newCorpusLines.add(splitCorpusLine);
                }
            // next split
            } else {
                newCorpusLines.add(newCorpusLine);
            }
        // should line be split?
        }
        // next corpus line
        corpusLines = newCorpusLines;
    }
    return corpusLines;
}
Also used : CorpusElement(com.joliciel.talismane.corpus.CorpusLine.CorpusElement) Pattern(java.util.regex.Pattern) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) HashMap(java.util.HashMap) TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) CorpusLine(com.joliciel.talismane.corpus.CorpusLine) DependencyArc(com.joliciel.talismane.parser.DependencyArc)

Example 2 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class TransitionLogWriter method onParseEnd.

@Override
public void onParseEnd(ParseConfiguration refConfiguration, List<ParseConfiguration> guessedConfigurations) throws TalismaneException, IOException {
    boolean includeMe = true;
    if (errorLabels != null && errorLabels.size() > 0) {
        includeMe = false;
        int i = 0;
        ParseConfiguration guessConfiguration = guessedConfigurations.get(0);
        Set<PosTaggedToken> refTokensToExplain = new HashSet<PosTaggedToken>();
        Set<PosTaggedToken> guessTokensToExplain = new HashSet<PosTaggedToken>();
        Set<PosTaggedToken> refTokensToHighlight = new HashSet<PosTaggedToken>();
        Set<PosTaggedToken> guessTokensToHighlight = new HashSet<PosTaggedToken>();
        for (PosTaggedToken refToken : refConfiguration.getPosTagSequence()) {
            if (i != 0) {
                DependencyArc refArc = refConfiguration.getGoverningDependency(refToken);
                if (refArc != null) {
                    PosTaggedToken guessToken = guessConfiguration.getPosTagSequence().get(i);
                    if (errorLabels.contains(refArc.getLabel())) {
                        DependencyArc guessArc = guessConfiguration.getGoverningDependency(guessToken);
                        if (guessArc == null || !refArc.getLabel().equals(guessArc.getLabel()) || (refArc.getHead() == null && guessArc.getHead() != null) || (refArc.getHead() != null && guessArc.getHead() == null) || refArc.getHead().getIndex() != guessArc.getHead().getIndex()) {
                            refTokensToExplain.add(refToken);
                            if (refArc.getHead() != null)
                                refTokensToHighlight.add(refArc.getHead());
                            guessTokensToExplain.add(guessToken);
                            if (guessArc != null && guessArc.getHead() != null)
                                guessTokensToHighlight.add(guessArc.getHead());
                            includeMe = true;
                        }
                    }
                }
            // have refArc
            }
            i++;
        }
        StringBuilder refBuilder = new StringBuilder();
        for (PosTaggedToken refToken : refConfiguration.getPosTagSequence()) {
            if (refTokensToExplain.contains(refToken)) {
                DependencyArc refArc = refConfiguration.getGoverningDependency(refToken);
                if (refArc == null)
                    refBuilder.append("#" + refToken.getToken().getOriginalText().replace(' ', '_') + "|" + refToken.getTag().getCode() + "|" + refToken.getIndex() + "|Gov0|null# ");
                else
                    refBuilder.append("#" + refToken.getToken().getOriginalText().replace(' ', '_') + "|" + refToken.getTag().getCode() + "|" + refToken.getIndex() + "|Gov" + (refArc.getHead() == null ? 0 : refArc.getHead().getIndex()) + "|" + refArc.getLabel() + "# ");
            } else if (refTokensToHighlight.contains(refToken)) {
                refBuilder.append("#" + refToken.getToken().getOriginalText().replace(' ', '_') + "|" + refToken.getTag().getCode() + "|" + refToken.getIndex() + "# ");
            } else {
                refBuilder.append(refToken.getToken().getOriginalText().replace(' ', '_') + "|" + refToken.getTag().getCode() + "|" + refToken.getIndex() + " ");
            }
        }
        StringBuilder guessBuilder = new StringBuilder();
        for (PosTaggedToken guessToken : guessConfiguration.getPosTagSequence()) {
            if (guessTokensToExplain.contains(guessToken)) {
                DependencyArc guessArc = guessConfiguration.getGoverningDependency(guessToken);
                if (guessArc == null)
                    guessBuilder.append("#" + guessToken.getToken().getOriginalText().replace(' ', '_') + "|" + guessToken.getTag().getCode() + "|" + guessToken.getIndex() + "|Gov0|null# ");
                else
                    guessBuilder.append("#" + guessToken.getToken().getOriginalText().replace(' ', '_') + "|" + guessToken.getTag().getCode() + "|" + guessToken.getIndex() + "|Gov" + (guessArc.getHead() == null ? 0 : guessArc.getHead().getIndex()) + "|" + guessArc.getLabel() + "# ");
            } else if (guessTokensToHighlight.contains(guessToken)) {
                guessBuilder.append("#" + guessToken.getToken().getOriginalText().replace(' ', '_') + "|" + guessToken.getTag().getCode() + "|" + guessToken.getIndex() + "# ");
            } else {
                guessBuilder.append(guessToken.getToken().getOriginalText().replace(' ', '_') + "|" + guessToken.getTag().getCode() + "|" + guessToken.getIndex() + " ");
            }
        }
        if (includeMe) {
            writer.write("\n");
            writer.write(refBuilder.toString() + "\n");
            writer.write(guessBuilder.toString() + "\n");
        }
    }
    if (includeMe)
        this.onNextParseConfiguration(guessedConfigurations.get(0));
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) DependencyArc(com.joliciel.talismane.parser.DependencyArc) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) HashSet(java.util.HashSet)

Example 3 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class ParserFScoreCalculator method onParseEnd.

@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) throws TalismaneException {
    PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
    ParseConfiguration bestGuess = guessedConfigurations.get(0);
    int mismatchedTokens = 0;
    for (PosTaggedToken posTaggedToken : posTagSequence) {
        if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
            DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken, projective);
            DependencyArc guessedArc = null;
            boolean foundToken = false;
            for (PosTaggedToken guessedToken : bestGuess.getPosTagSequence()) {
                if (guessedToken.getToken().getStartIndex() == posTaggedToken.getToken().getStartIndex()) {
                    if (guessedToken.getToken().isEmpty() && !posTaggedToken.getToken().isEmpty())
                        continue;
                    if (!guessedToken.getToken().isEmpty() && posTaggedToken.getToken().isEmpty())
                        continue;
                    foundToken = true;
                    guessedArc = bestGuess.getGoverningDependency(guessedToken, projective);
                    break;
                }
            }
            if (!foundToken) {
                LOG.info("Mismatched token :" + posTaggedToken.getToken().getOriginalText() + ", index " + posTaggedToken.getToken().getIndex());
                mismatchedTokens += 1;
            }
            String realLabel = realArc == null ? "noHead" : labeledEvaluation ? realArc.getLabel() : "head";
            String guessedLabel = guessedArc == null ? "noHead" : labeledEvaluation ? guessedArc.getLabel() : "head";
            if (realLabel == null || realLabel.length() == 0)
                realLabel = "noLabel";
            if (guessedLabel == null || guessedLabel.length() == 0)
                guessedLabel = "noLabel";
            // should be considered a "no head" rather than "no label"
            if (realArc != null && realArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && realLabel.equals("noLabel"))
                realLabel = "noHead";
            if (guessedArc != null && guessedArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && guessedLabel.equals("noLabel"))
                guessedLabel = "noHead";
            if (realArc == null || guessedArc == null) {
                fscoreCalculator.increment(realLabel, guessedLabel);
            } else {
                boolean sameHead = realArc.getHead().getToken().getStartIndex() == guessedArc.getHead().getToken().getStartIndex();
                if (sameHead) {
                    fscoreCalculator.increment(realLabel, guessedLabel);
                } else if (guessedLabel.equals("noHead")) {
                    fscoreCalculator.increment(realLabel, "noHead");
                } else if (realArc.getLabel().equals(guessedArc.getLabel())) {
                    fscoreCalculator.increment(realLabel, "wrongHead");
                } else {
                    fscoreCalculator.increment(realLabel, "wrongHeadWrongLabel");
                }
            }
        // have one of the arcs
        }
    // is root tag?
    }
    if ((double) mismatchedTokens / (double) posTagSequence.size() > 0.5) {
        // more than half of the tokens mismatched?
        throw new TalismaneException("Too many mismatched tokens in sentence: " + posTagSequence.getTokenSequence().getSentence().getText());
    }
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) TalismaneException(com.joliciel.talismane.TalismaneException) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) DependencyArc(com.joliciel.talismane.parser.DependencyArc) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Example 4 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class ParserFScoreCalculatorByDistance method onParseEnd.

@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) {
    PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
    ParseConfiguration bestGuess = guessedConfigurations.get(0);
    for (PosTaggedToken posTaggedToken : posTagSequence) {
        if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG))
            continue;
        DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
        int depDistance = realArc.getHead().getToken().getIndex() - realArc.getDependent().getToken().getIndex();
        if (depDistance < 0)
            depDistance = 0 - depDistance;
        FScoreCalculator<String> fscoreCalculator = fscoreByDistanceMap.get(depDistance);
        if (fscoreCalculator == null) {
            fscoreCalculator = new FScoreCalculator<String>(depDistance);
            fscoreByDistanceMap.put(depDistance, fscoreCalculator);
        }
        DependencyArc guessedArc = null;
        if (!hasTokeniser && !hasPosTagger) {
            guessedArc = bestGuess.getGoverningDependency(posTaggedToken);
        } else {
            for (PosTaggedToken guessedToken : bestGuess.getPosTagSequence()) {
                if (guessedToken.getToken().getStartIndex() == posTaggedToken.getToken().getStartIndex()) {
                    guessedArc = bestGuess.getGoverningDependency(guessedToken);
                    break;
                }
            }
        }
        String realLabel = realArc == null ? "noHead" : labeledEvaluation ? realArc.getLabel() : "head";
        String guessedLabel = guessedArc == null ? "noHead" : labeledEvaluation ? guessedArc.getLabel() : "head";
        if (realLabel == null || realLabel.length() == 0)
            realLabel = "noLabel";
        if (guessedLabel == null || guessedLabel.length() == 0)
            guessedLabel = "noLabel";
        // should be considered a "no head" rather than "no label"
        if (realArc != null && realArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && realLabel.equals("noLabel"))
            realLabel = "noHead";
        if (guessedArc != null && guessedArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && guessedLabel.equals("noLabel"))
            guessedLabel = "noHead";
        if (realLabel.equals(skipLabel))
            return;
        if (realArc == null || guessedArc == null) {
            fscoreCalculator.increment(realLabel, guessedLabel);
        } else {
            boolean sameHead = false;
            if (hasTokeniser || hasPosTagger)
                sameHead = realArc.getHead().getToken().getStartIndex() == guessedArc.getHead().getToken().getStartIndex();
            else
                sameHead = realArc.getHead().equals(guessedArc.getHead());
            if (sameHead) {
                fscoreCalculator.increment(realLabel, guessedLabel);
            } else if (guessedLabel.equals("noHead")) {
                fscoreCalculator.increment(realLabel, "noHead");
            } else if (realArc.getLabel().equals(guessedArc.getLabel())) {
                fscoreCalculator.increment(realLabel, "wrongHead");
            } else {
                fscoreCalculator.increment(realLabel, "wrongHeadWrongLabel");
            }
        }
    }
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) DependencyArc(com.joliciel.talismane.parser.DependencyArc) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Example 5 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class CorpusStatisticsWriter method onNextParseConfiguration.

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) {
    stats.sentenceCount++;
    stats.sentenceLengthStats.addValue(parseConfiguration.getPosTagSequence().size());
    for (PosTaggedToken posTaggedToken : parseConfiguration.getPosTagSequence()) {
        if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG))
            continue;
        Token token = posTaggedToken.getToken();
        String word = token.getOriginalText();
        stats.words.add(word);
        if (referenceStats != null) {
            if (!referenceStats.words.contains(word))
                stats.unknownTokenCount++;
        }
        if (alphanumeric.matcher(token.getOriginalText()).find()) {
            String lowercase = word.toLowerCase(TalismaneSession.get(sessionId).getLocale());
            stats.lowerCaseWords.add(lowercase);
            stats.alphanumericCount++;
            if (referenceStats != null) {
                if (!referenceStats.lowerCaseWords.contains(lowercase))
                    stats.unknownAlphanumericCount++;
            }
        }
        stats.tokenCount++;
        Integer countObj = stats.posTagCounts.get(posTaggedToken.getTag().getCode());
        int count = countObj == null ? 0 : countObj.intValue();
        count++;
        stats.posTagCounts.put(posTaggedToken.getTag().getCode(), count);
    }
    int maxDepth = 0;
    DescriptiveStatistics avgSyntaxDepthForSentenceStats = new DescriptiveStatistics();
    for (DependencyArc arc : parseConfiguration.getNonProjectiveDependencies()) {
        Integer countObj = stats.depLabelCounts.get(arc.getLabel());
        int count = countObj == null ? 0 : countObj.intValue();
        count++;
        stats.depLabelCounts.put(arc.getLabel(), count);
        stats.totalDepCount++;
        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0)) {
        // do nothing for unattached stuff (e.g. punctuation)
        } else if (arc.getLabel().equals("ponct")) {
        // do nothing for punctuation
        } else {
            int depth = 0;
            DependencyArc theArc = arc;
            while (theArc != null && !theArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)) {
                theArc = parseConfiguration.getGoverningDependency(theArc.getHead());
                depth++;
            }
            if (depth > maxDepth)
                maxDepth = depth;
            stats.syntaxDepthStats.addValue(depth);
            avgSyntaxDepthForSentenceStats.addValue(depth);
            int distance = Math.abs(arc.getHead().getToken().getIndex() - arc.getDependent().getToken().getIndex());
            stats.syntaxDistanceStats.addValue(distance);
        }
    }
    stats.maxSyntaxDepthStats.addValue(maxDepth);
    if (avgSyntaxDepthForSentenceStats.getN() > 0)
        stats.avgSyntaxDepthStats.addValue(avgSyntaxDepthForSentenceStats.getMean());
    if (maxDepth > stats.maxDepthCorpus)
        stats.maxDepthCorpus = maxDepth;
    // we cheat a little bit by only allowing each arc to count once
    // there could be a situation where there are two independent
    // non-projective arcs
    // crossing the same mother arc, but we prefer here to underestimate,
    // as this phenomenon is quite rare.
    Set<DependencyArc> nonProjectiveArcs = new HashSet<DependencyArc>();
    int i = 0;
    for (DependencyArc arc : parseConfiguration.getNonProjectiveDependencies()) {
        i++;
        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0))
            continue;
        if (nonProjectiveArcs.contains(arc))
            continue;
        int headIndex = arc.getHead().getToken().getIndex();
        int depIndex = arc.getDependent().getToken().getIndex();
        int startIndex = headIndex < depIndex ? headIndex : depIndex;
        int endIndex = headIndex >= depIndex ? headIndex : depIndex;
        int j = 0;
        for (DependencyArc otherArc : parseConfiguration.getNonProjectiveDependencies()) {
            j++;
            if (j <= i)
                continue;
            if (otherArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (otherArc.getLabel() == null || otherArc.getLabel().length() == 0))
                continue;
            if (nonProjectiveArcs.contains(otherArc))
                continue;
            int headIndex2 = otherArc.getHead().getToken().getIndex();
            int depIndex2 = otherArc.getDependent().getToken().getIndex();
            int startIndex2 = headIndex2 < depIndex2 ? headIndex2 : depIndex2;
            int endIndex2 = headIndex2 >= depIndex2 ? headIndex2 : depIndex2;
            boolean nonProjective = false;
            if (startIndex2 < startIndex && endIndex2 > startIndex && endIndex2 < endIndex) {
                nonProjective = true;
            } else if (startIndex2 > startIndex && startIndex2 < endIndex && endIndex2 > endIndex) {
                nonProjective = true;
            }
            if (nonProjective) {
                nonProjectiveArcs.add(arc);
                nonProjectiveArcs.add(otherArc);
                stats.nonProjectiveCount++;
                LOG.debug("Non-projective arcs in sentence: " + parseConfiguration.getSentence().getText());
                LOG.debug(arc.toString());
                LOG.debug(otherArc.toString());
                break;
            }
        }
    }
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) DependencyArc(com.joliciel.talismane.parser.DependencyArc) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) HashSet(java.util.HashSet)

Aggregations

DependencyArc (com.joliciel.talismane.parser.DependencyArc)14 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)9 ParseConfiguration (com.joliciel.talismane.parser.ParseConfiguration)7 TalismaneException (com.joliciel.talismane.TalismaneException)4 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Token (com.joliciel.talismane.tokeniser.Token)3 HashSet (java.util.HashSet)3 Transition (com.joliciel.talismane.parser.Transition)2 LinguisticRules (com.joliciel.talismane.LinguisticRules)1 CorpusLine (com.joliciel.talismane.corpus.CorpusLine)1 CorpusElement (com.joliciel.talismane.corpus.CorpusLine.CorpusElement)1 Decision (com.joliciel.talismane.machineLearning.Decision)1 ParseTree (com.joliciel.talismane.parser.ParseTree)1 ParseTreeNode (com.joliciel.talismane.parser.ParseTreeNode)1 ParseConfigurationOutput (com.joliciel.talismane.parser.output.ParseConfigurationOutput)1 PosTaggedTokenWrapper (com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper)1 Sentence (com.joliciel.talismane.rawText.Sentence)1 SentenceAnnotator (com.joliciel.talismane.sentenceAnnotators.SentenceAnnotator)1