Search in sources :

Example 21 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class TransitionLogWriter method getTopOfStack.

private String getTopOfStack(ParseConfiguration configuration) {
    StringBuilder sb = new StringBuilder();
    Iterator<PosTaggedToken> stackIterator = configuration.getStack().iterator();
    int i = 0;
    while (stackIterator.hasNext()) {
        if (i == 5) {
            sb.insert(0, "... ");
            break;
        }
        PosTaggedToken token = stackIterator.next();
        sb.insert(0, token.getToken().getOriginalText().replace(' ', '_') + "|" + token.getTag().getCode() + " ");
        i++;
    }
    return sb.toString();
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken)

Example 22 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class TransitionLogWriter method getTopOfBuffer.

private String getTopOfBuffer(ParseConfiguration configuration) {
    StringBuilder sb = new StringBuilder();
    Iterator<PosTaggedToken> bufferIterator = configuration.getBuffer().iterator();
    int i = 0;
    while (bufferIterator.hasNext()) {
        if (i == 5) {
            sb.append(" ...");
            break;
        }
        PosTaggedToken token = bufferIterator.next();
        sb.append(" " + token.getToken().getOriginalText().replace(' ', '_') + "|" + token.getTag().getCode());
        i++;
    }
    return sb.toString();
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken)

Example 23 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class ParserFScoreCalculator method onParseEnd.

@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) throws TalismaneException {
    PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
    ParseConfiguration bestGuess = guessedConfigurations.get(0);
    int mismatchedTokens = 0;
    for (PosTaggedToken posTaggedToken : posTagSequence) {
        if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
            DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken, projective);
            DependencyArc guessedArc = null;
            boolean foundToken = false;
            for (PosTaggedToken guessedToken : bestGuess.getPosTagSequence()) {
                if (guessedToken.getToken().getStartIndex() == posTaggedToken.getToken().getStartIndex()) {
                    if (guessedToken.getToken().isEmpty() && !posTaggedToken.getToken().isEmpty())
                        continue;
                    if (!guessedToken.getToken().isEmpty() && posTaggedToken.getToken().isEmpty())
                        continue;
                    foundToken = true;
                    guessedArc = bestGuess.getGoverningDependency(guessedToken, projective);
                    break;
                }
            }
            if (!foundToken) {
                LOG.info("Mismatched token :" + posTaggedToken.getToken().getOriginalText() + ", index " + posTaggedToken.getToken().getIndex());
                mismatchedTokens += 1;
            }
            String realLabel = realArc == null ? "noHead" : labeledEvaluation ? realArc.getLabel() : "head";
            String guessedLabel = guessedArc == null ? "noHead" : labeledEvaluation ? guessedArc.getLabel() : "head";
            if (realLabel == null || realLabel.length() == 0)
                realLabel = "noLabel";
            if (guessedLabel == null || guessedLabel.length() == 0)
                guessedLabel = "noLabel";
            // should be considered a "no head" rather than "no label"
            if (realArc != null && realArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && realLabel.equals("noLabel"))
                realLabel = "noHead";
            if (guessedArc != null && guessedArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && guessedLabel.equals("noLabel"))
                guessedLabel = "noHead";
            if (realArc == null || guessedArc == null) {
                fscoreCalculator.increment(realLabel, guessedLabel);
            } else {
                boolean sameHead = realArc.getHead().getToken().getStartIndex() == guessedArc.getHead().getToken().getStartIndex();
                if (sameHead) {
                    fscoreCalculator.increment(realLabel, guessedLabel);
                } else if (guessedLabel.equals("noHead")) {
                    fscoreCalculator.increment(realLabel, "noHead");
                } else if (realArc.getLabel().equals(guessedArc.getLabel())) {
                    fscoreCalculator.increment(realLabel, "wrongHead");
                } else {
                    fscoreCalculator.increment(realLabel, "wrongHeadWrongLabel");
                }
            }
        // have one of the arcs
        }
    // is root tag?
    }
    if ((double) mismatchedTokens / (double) posTagSequence.size() > 0.5) {
        // more than half of the tokens mismatched?
        throw new TalismaneException("Too many mismatched tokens in sentence: " + posTagSequence.getTokenSequence().getSentence().getText());
    }
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) TalismaneException(com.joliciel.talismane.TalismaneException) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) DependencyArc(com.joliciel.talismane.parser.DependencyArc) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Example 24 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class ParserFScoreCalculatorByDistance method onParseEnd.

@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) {
    PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
    ParseConfiguration bestGuess = guessedConfigurations.get(0);
    for (PosTaggedToken posTaggedToken : posTagSequence) {
        if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG))
            continue;
        DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
        int depDistance = realArc.getHead().getToken().getIndex() - realArc.getDependent().getToken().getIndex();
        if (depDistance < 0)
            depDistance = 0 - depDistance;
        FScoreCalculator<String> fscoreCalculator = fscoreByDistanceMap.get(depDistance);
        if (fscoreCalculator == null) {
            fscoreCalculator = new FScoreCalculator<String>(depDistance);
            fscoreByDistanceMap.put(depDistance, fscoreCalculator);
        }
        DependencyArc guessedArc = null;
        if (!hasTokeniser && !hasPosTagger) {
            guessedArc = bestGuess.getGoverningDependency(posTaggedToken);
        } else {
            for (PosTaggedToken guessedToken : bestGuess.getPosTagSequence()) {
                if (guessedToken.getToken().getStartIndex() == posTaggedToken.getToken().getStartIndex()) {
                    guessedArc = bestGuess.getGoverningDependency(guessedToken);
                    break;
                }
            }
        }
        String realLabel = realArc == null ? "noHead" : labeledEvaluation ? realArc.getLabel() : "head";
        String guessedLabel = guessedArc == null ? "noHead" : labeledEvaluation ? guessedArc.getLabel() : "head";
        if (realLabel == null || realLabel.length() == 0)
            realLabel = "noLabel";
        if (guessedLabel == null || guessedLabel.length() == 0)
            guessedLabel = "noLabel";
        // should be considered a "no head" rather than "no label"
        if (realArc != null && realArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && realLabel.equals("noLabel"))
            realLabel = "noHead";
        if (guessedArc != null && guessedArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && guessedLabel.equals("noLabel"))
            guessedLabel = "noHead";
        if (realLabel.equals(skipLabel))
            return;
        if (realArc == null || guessedArc == null) {
            fscoreCalculator.increment(realLabel, guessedLabel);
        } else {
            boolean sameHead = false;
            if (hasTokeniser || hasPosTagger)
                sameHead = realArc.getHead().getToken().getStartIndex() == guessedArc.getHead().getToken().getStartIndex();
            else
                sameHead = realArc.getHead().equals(guessedArc.getHead());
            if (sameHead) {
                fscoreCalculator.increment(realLabel, guessedLabel);
            } else if (guessedLabel.equals("noHead")) {
                fscoreCalculator.increment(realLabel, "noHead");
            } else if (realArc.getLabel().equals(guessedArc.getLabel())) {
                fscoreCalculator.increment(realLabel, "wrongHead");
            } else {
                fscoreCalculator.increment(realLabel, "wrongHeadWrongLabel");
            }
        }
    }
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) DependencyArc(com.joliciel.talismane.parser.DependencyArc) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Example 25 with PosTaggedToken

use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.

the class PosTaggerEvaluator method evaluate.

/**
 * Evaluate a given pos tagger.
 *
 * @throws TalismaneException
 * @throws IOException
 */
public void evaluate() throws TalismaneException, IOException {
    while (corpusReader.hasNextSentence()) {
        PosTagSequence realPosTagSequence = corpusReader.nextPosTagSequence();
        List<TokenSequence> tokenSequences = null;
        List<PosTagSequence> guessedSequences = null;
        TokenSequence tokenSequence = realPosTagSequence.getTokenSequence();
        PosTagSequence guessedSequence = null;
        if (this.tokeniser != null) {
            Sentence sentence = tokenSequence.getSentence();
            tokenSequences = tokeniser.tokenise(sentence);
            tokenSequence = tokenSequences.get(0);
        } else {
            tokenSequences = new ArrayList<TokenSequence>();
            tokenSequences.add(tokenSequence);
        }
        if (posTagger instanceof NonDeterministicPosTagger) {
            NonDeterministicPosTagger nonDeterministicPosTagger = (NonDeterministicPosTagger) posTagger;
            guessedSequences = nonDeterministicPosTagger.tagSentence(tokenSequences);
            guessedSequence = guessedSequences.get(0);
        } else {
            guessedSequence = posTagger.tagSentence(tokenSequence);
        }
        if (LOG.isDebugEnabled()) {
            StringBuilder stringBuilder = new StringBuilder();
            for (PosTaggedToken posTaggedToken : guessedSequence) {
                Set<String> lemmas = new TreeSet<String>();
                stringBuilder.append(posTaggedToken.getToken().getOriginalText());
                stringBuilder.append("[" + posTaggedToken.getTag());
                List<LexicalEntry> entries = posTaggedToken.getLexicalEntries();
                boolean dropCurrentWord = false;
                if (entries.size() > 1)
                    dropCurrentWord = true;
                for (LexicalEntry entry : posTaggedToken.getLexicalEntries()) {
                    if (!lemmas.contains(entry.getLemma())) {
                        if (dropCurrentWord && posTaggedToken.getToken().getText().equals(entry.getLemma())) {
                            dropCurrentWord = false;
                            continue;
                        }
                        stringBuilder.append("|" + entry.getLemma());
                        // stringBuilder.append("/" + entry.getCategory());
                        stringBuilder.append("/" + entry.getMorphology());
                        lemmas.add(entry.getLemma());
                    }
                }
                stringBuilder.append("] ");
            }
            LOG.debug(stringBuilder.toString());
        }
        for (PosTagEvaluationObserver observer : this.observers) {
            observer.onNextPosTagSequence(realPosTagSequence, guessedSequences);
        }
    }
    for (PosTagEvaluationObserver observer : this.observers) {
        observer.onEvaluationComplete();
    }
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) NonDeterministicPosTagger(com.joliciel.talismane.posTagger.NonDeterministicPosTagger) TreeSet(java.util.TreeSet) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) LexicalEntry(com.joliciel.talismane.lexicon.LexicalEntry) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) Sentence(com.joliciel.talismane.rawText.Sentence)

Aggregations

PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)77 ParseConfiguration (com.joliciel.talismane.parser.ParseConfiguration)24 PosTaggedTokenWrapper (com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper)20 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)14 Token (com.joliciel.talismane.tokeniser.Token)11 DependencyArc (com.joliciel.talismane.parser.DependencyArc)9 TalismaneException (com.joliciel.talismane.TalismaneException)8 Decision (com.joliciel.talismane.machineLearning.Decision)8 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)8 Sentence (com.joliciel.talismane.rawText.Sentence)8 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)8 HashMap (java.util.HashMap)7 List (java.util.List)7 TalismaneTest (com.joliciel.talismane.TalismaneTest)6 PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)6 PosTaggerContextImpl (com.joliciel.talismane.posTagger.PosTaggerContextImpl)6 Config (com.typesafe.config.Config)6 ArrayList (java.util.ArrayList)6 Test (org.junit.Test)6 StringLiteralFeature (com.joliciel.talismane.machineLearning.features.StringLiteralFeature)5