use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class TransitionLogWriter method getTopOfStack.
private String getTopOfStack(ParseConfiguration configuration) {
StringBuilder sb = new StringBuilder();
Iterator<PosTaggedToken> stackIterator = configuration.getStack().iterator();
int i = 0;
while (stackIterator.hasNext()) {
if (i == 5) {
sb.insert(0, "... ");
break;
}
PosTaggedToken token = stackIterator.next();
sb.insert(0, token.getToken().getOriginalText().replace(' ', '_') + "|" + token.getTag().getCode() + " ");
i++;
}
return sb.toString();
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class TransitionLogWriter method getTopOfBuffer.
private String getTopOfBuffer(ParseConfiguration configuration) {
StringBuilder sb = new StringBuilder();
Iterator<PosTaggedToken> bufferIterator = configuration.getBuffer().iterator();
int i = 0;
while (bufferIterator.hasNext()) {
if (i == 5) {
sb.append(" ...");
break;
}
PosTaggedToken token = bufferIterator.next();
sb.append(" " + token.getToken().getOriginalText().replace(' ', '_') + "|" + token.getTag().getCode());
i++;
}
return sb.toString();
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class ParserFScoreCalculator method onParseEnd.
@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) throws TalismaneException {
PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
ParseConfiguration bestGuess = guessedConfigurations.get(0);
int mismatchedTokens = 0;
for (PosTaggedToken posTaggedToken : posTagSequence) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken, projective);
DependencyArc guessedArc = null;
boolean foundToken = false;
for (PosTaggedToken guessedToken : bestGuess.getPosTagSequence()) {
if (guessedToken.getToken().getStartIndex() == posTaggedToken.getToken().getStartIndex()) {
if (guessedToken.getToken().isEmpty() && !posTaggedToken.getToken().isEmpty())
continue;
if (!guessedToken.getToken().isEmpty() && posTaggedToken.getToken().isEmpty())
continue;
foundToken = true;
guessedArc = bestGuess.getGoverningDependency(guessedToken, projective);
break;
}
}
if (!foundToken) {
LOG.info("Mismatched token :" + posTaggedToken.getToken().getOriginalText() + ", index " + posTaggedToken.getToken().getIndex());
mismatchedTokens += 1;
}
String realLabel = realArc == null ? "noHead" : labeledEvaluation ? realArc.getLabel() : "head";
String guessedLabel = guessedArc == null ? "noHead" : labeledEvaluation ? guessedArc.getLabel() : "head";
if (realLabel == null || realLabel.length() == 0)
realLabel = "noLabel";
if (guessedLabel == null || guessedLabel.length() == 0)
guessedLabel = "noLabel";
// should be considered a "no head" rather than "no label"
if (realArc != null && realArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && realLabel.equals("noLabel"))
realLabel = "noHead";
if (guessedArc != null && guessedArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && guessedLabel.equals("noLabel"))
guessedLabel = "noHead";
if (realArc == null || guessedArc == null) {
fscoreCalculator.increment(realLabel, guessedLabel);
} else {
boolean sameHead = realArc.getHead().getToken().getStartIndex() == guessedArc.getHead().getToken().getStartIndex();
if (sameHead) {
fscoreCalculator.increment(realLabel, guessedLabel);
} else if (guessedLabel.equals("noHead")) {
fscoreCalculator.increment(realLabel, "noHead");
} else if (realArc.getLabel().equals(guessedArc.getLabel())) {
fscoreCalculator.increment(realLabel, "wrongHead");
} else {
fscoreCalculator.increment(realLabel, "wrongHeadWrongLabel");
}
}
// have one of the arcs
}
// is root tag?
}
if ((double) mismatchedTokens / (double) posTagSequence.size() > 0.5) {
// more than half of the tokens mismatched?
throw new TalismaneException("Too many mismatched tokens in sentence: " + posTagSequence.getTokenSequence().getSentence().getText());
}
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class ParserFScoreCalculatorByDistance method onParseEnd.
@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) {
PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
ParseConfiguration bestGuess = guessedConfigurations.get(0);
for (PosTaggedToken posTaggedToken : posTagSequence) {
if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG))
continue;
DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
int depDistance = realArc.getHead().getToken().getIndex() - realArc.getDependent().getToken().getIndex();
if (depDistance < 0)
depDistance = 0 - depDistance;
FScoreCalculator<String> fscoreCalculator = fscoreByDistanceMap.get(depDistance);
if (fscoreCalculator == null) {
fscoreCalculator = new FScoreCalculator<String>(depDistance);
fscoreByDistanceMap.put(depDistance, fscoreCalculator);
}
DependencyArc guessedArc = null;
if (!hasTokeniser && !hasPosTagger) {
guessedArc = bestGuess.getGoverningDependency(posTaggedToken);
} else {
for (PosTaggedToken guessedToken : bestGuess.getPosTagSequence()) {
if (guessedToken.getToken().getStartIndex() == posTaggedToken.getToken().getStartIndex()) {
guessedArc = bestGuess.getGoverningDependency(guessedToken);
break;
}
}
}
String realLabel = realArc == null ? "noHead" : labeledEvaluation ? realArc.getLabel() : "head";
String guessedLabel = guessedArc == null ? "noHead" : labeledEvaluation ? guessedArc.getLabel() : "head";
if (realLabel == null || realLabel.length() == 0)
realLabel = "noLabel";
if (guessedLabel == null || guessedLabel.length() == 0)
guessedLabel = "noLabel";
// should be considered a "no head" rather than "no label"
if (realArc != null && realArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && realLabel.equals("noLabel"))
realLabel = "noHead";
if (guessedArc != null && guessedArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && guessedLabel.equals("noLabel"))
guessedLabel = "noHead";
if (realLabel.equals(skipLabel))
return;
if (realArc == null || guessedArc == null) {
fscoreCalculator.increment(realLabel, guessedLabel);
} else {
boolean sameHead = false;
if (hasTokeniser || hasPosTagger)
sameHead = realArc.getHead().getToken().getStartIndex() == guessedArc.getHead().getToken().getStartIndex();
else
sameHead = realArc.getHead().equals(guessedArc.getHead());
if (sameHead) {
fscoreCalculator.increment(realLabel, guessedLabel);
} else if (guessedLabel.equals("noHead")) {
fscoreCalculator.increment(realLabel, "noHead");
} else if (realArc.getLabel().equals(guessedArc.getLabel())) {
fscoreCalculator.increment(realLabel, "wrongHead");
} else {
fscoreCalculator.increment(realLabel, "wrongHeadWrongLabel");
}
}
}
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class PosTaggerEvaluator method evaluate.
/**
* Evaluate a given pos tagger.
*
* @throws TalismaneException
* @throws IOException
*/
public void evaluate() throws TalismaneException, IOException {
while (corpusReader.hasNextSentence()) {
PosTagSequence realPosTagSequence = corpusReader.nextPosTagSequence();
List<TokenSequence> tokenSequences = null;
List<PosTagSequence> guessedSequences = null;
TokenSequence tokenSequence = realPosTagSequence.getTokenSequence();
PosTagSequence guessedSequence = null;
if (this.tokeniser != null) {
Sentence sentence = tokenSequence.getSentence();
tokenSequences = tokeniser.tokenise(sentence);
tokenSequence = tokenSequences.get(0);
} else {
tokenSequences = new ArrayList<TokenSequence>();
tokenSequences.add(tokenSequence);
}
if (posTagger instanceof NonDeterministicPosTagger) {
NonDeterministicPosTagger nonDeterministicPosTagger = (NonDeterministicPosTagger) posTagger;
guessedSequences = nonDeterministicPosTagger.tagSentence(tokenSequences);
guessedSequence = guessedSequences.get(0);
} else {
guessedSequence = posTagger.tagSentence(tokenSequence);
}
if (LOG.isDebugEnabled()) {
StringBuilder stringBuilder = new StringBuilder();
for (PosTaggedToken posTaggedToken : guessedSequence) {
Set<String> lemmas = new TreeSet<String>();
stringBuilder.append(posTaggedToken.getToken().getOriginalText());
stringBuilder.append("[" + posTaggedToken.getTag());
List<LexicalEntry> entries = posTaggedToken.getLexicalEntries();
boolean dropCurrentWord = false;
if (entries.size() > 1)
dropCurrentWord = true;
for (LexicalEntry entry : posTaggedToken.getLexicalEntries()) {
if (!lemmas.contains(entry.getLemma())) {
if (dropCurrentWord && posTaggedToken.getToken().getText().equals(entry.getLemma())) {
dropCurrentWord = false;
continue;
}
stringBuilder.append("|" + entry.getLemma());
// stringBuilder.append("/" + entry.getCategory());
stringBuilder.append("/" + entry.getMorphology());
lemmas.add(entry.getLemma());
}
}
stringBuilder.append("] ");
}
LOG.debug(stringBuilder.toString());
}
for (PosTagEvaluationObserver observer : this.observers) {
observer.onNextPosTagSequence(realPosTagSequence, guessedSequences);
}
}
for (PosTagEvaluationObserver observer : this.observers) {
observer.onEvaluationComplete();
}
}
Aggregations