use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.
the class ParseComparator method evaluate.
/**
* @throws TalismaneException
* if sentences mismatched in the two corpora
* @throws IOException
*/
public void evaluate() throws TalismaneException, IOException {
while (referenceCorpusReader.hasNextSentence()) {
ParseConfiguration realConfiguration = referenceCorpusReader.nextConfiguration();
ParseConfiguration guessConfiguaration = evaluationCorpusReader.nextConfiguration();
List<ParseConfiguration> guessConfigurations = new ArrayList<ParseConfiguration>();
guessConfigurations.add(guessConfiguaration);
double realLength = realConfiguration.getPosTagSequence().getTokenSequence().getSentence().getText().length();
double guessedLength = guessConfiguaration.getPosTagSequence().getTokenSequence().getSentence().getText().length();
double ratio = realLength > guessedLength ? guessedLength / realLength : realLength / guessedLength;
if (ratio < 0.9) {
LOG.info("Mismatched sentences");
LOG.info(realConfiguration.getPosTagSequence().getTokenSequence().getSentence().getText().toString());
LOG.info(guessConfiguaration.getPosTagSequence().getTokenSequence().getSentence().getText().toString());
throw new TalismaneException("Mismatched sentences");
}
for (ParseEvaluationObserver observer : this.observers) {
observer.onParseEnd(realConfiguration, guessConfigurations);
}
}
for (ParseEvaluationObserver observer : this.observers) {
observer.onEvaluationComplete();
}
}
use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.
the class ParseFeatureTester method onNextParseConfiguration.
@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException {
ParseConfiguration currentConfiguration = new ParseConfiguration(parseConfiguration.getPosTagSequence());
for (Transition transition : parseConfiguration.getTransitions()) {
StringBuilder sb = new StringBuilder();
for (PosTaggedToken taggedToken : currentConfiguration.getPosTagSequence()) {
if (taggedToken.equals(currentConfiguration.getStack().getFirst())) {
sb.append(" #[" + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString() + "]#");
} else if (taggedToken.equals(currentConfiguration.getBuffer().getFirst())) {
sb.append(" #[" + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString() + "]#");
} else {
sb.append(" " + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString());
}
}
sb.append(" ## Line: " + parseConfiguration.getSentence().getStartLineNumber());
if (LOG.isTraceEnabled())
LOG.trace(sb.toString());
List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>();
for (ParseConfigurationFeature<?> parseFeature : parseFeatures) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = parseFeature.check(currentConfiguration, env);
if (featureResult != null) {
parseFeatureResults.add(featureResult);
if (LOG.isTraceEnabled()) {
LOG.trace(featureResult.toString());
}
}
}
String classification = transition.getCode();
for (FeatureResult<?> featureResult : parseFeatureResults) {
Map<String, List<String>> classificationMap = featureResultMap.get(featureResult.toString());
if (classificationMap == null) {
classificationMap = new TreeMap<String, List<String>>();
featureResultMap.put(featureResult.toString(), classificationMap);
}
List<String> sentences = classificationMap.get(classification);
if (sentences == null) {
sentences = new ArrayList<String>();
classificationMap.put(classification, sentences);
}
sentences.add(sb.toString());
}
// apply the transition and up the index
currentConfiguration = new ParseConfiguration(currentConfiguration);
transition.apply(currentConfiguration);
}
}
use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.
the class TransitionLogWriter method onParseEnd.
@Override
public void onParseEnd(ParseConfiguration refConfiguration, List<ParseConfiguration> guessedConfigurations) throws TalismaneException, IOException {
boolean includeMe = true;
if (errorLabels != null && errorLabels.size() > 0) {
includeMe = false;
int i = 0;
ParseConfiguration guessConfiguration = guessedConfigurations.get(0);
Set<PosTaggedToken> refTokensToExplain = new HashSet<PosTaggedToken>();
Set<PosTaggedToken> guessTokensToExplain = new HashSet<PosTaggedToken>();
Set<PosTaggedToken> refTokensToHighlight = new HashSet<PosTaggedToken>();
Set<PosTaggedToken> guessTokensToHighlight = new HashSet<PosTaggedToken>();
for (PosTaggedToken refToken : refConfiguration.getPosTagSequence()) {
if (i != 0) {
DependencyArc refArc = refConfiguration.getGoverningDependency(refToken);
if (refArc != null) {
PosTaggedToken guessToken = guessConfiguration.getPosTagSequence().get(i);
if (errorLabels.contains(refArc.getLabel())) {
DependencyArc guessArc = guessConfiguration.getGoverningDependency(guessToken);
if (guessArc == null || !refArc.getLabel().equals(guessArc.getLabel()) || (refArc.getHead() == null && guessArc.getHead() != null) || (refArc.getHead() != null && guessArc.getHead() == null) || refArc.getHead().getIndex() != guessArc.getHead().getIndex()) {
refTokensToExplain.add(refToken);
if (refArc.getHead() != null)
refTokensToHighlight.add(refArc.getHead());
guessTokensToExplain.add(guessToken);
if (guessArc != null && guessArc.getHead() != null)
guessTokensToHighlight.add(guessArc.getHead());
includeMe = true;
}
}
}
// have refArc
}
i++;
}
StringBuilder refBuilder = new StringBuilder();
for (PosTaggedToken refToken : refConfiguration.getPosTagSequence()) {
if (refTokensToExplain.contains(refToken)) {
DependencyArc refArc = refConfiguration.getGoverningDependency(refToken);
if (refArc == null)
refBuilder.append("#" + refToken.getToken().getOriginalText().replace(' ', '_') + "|" + refToken.getTag().getCode() + "|" + refToken.getIndex() + "|Gov0|null# ");
else
refBuilder.append("#" + refToken.getToken().getOriginalText().replace(' ', '_') + "|" + refToken.getTag().getCode() + "|" + refToken.getIndex() + "|Gov" + (refArc.getHead() == null ? 0 : refArc.getHead().getIndex()) + "|" + refArc.getLabel() + "# ");
} else if (refTokensToHighlight.contains(refToken)) {
refBuilder.append("#" + refToken.getToken().getOriginalText().replace(' ', '_') + "|" + refToken.getTag().getCode() + "|" + refToken.getIndex() + "# ");
} else {
refBuilder.append(refToken.getToken().getOriginalText().replace(' ', '_') + "|" + refToken.getTag().getCode() + "|" + refToken.getIndex() + " ");
}
}
StringBuilder guessBuilder = new StringBuilder();
for (PosTaggedToken guessToken : guessConfiguration.getPosTagSequence()) {
if (guessTokensToExplain.contains(guessToken)) {
DependencyArc guessArc = guessConfiguration.getGoverningDependency(guessToken);
if (guessArc == null)
guessBuilder.append("#" + guessToken.getToken().getOriginalText().replace(' ', '_') + "|" + guessToken.getTag().getCode() + "|" + guessToken.getIndex() + "|Gov0|null# ");
else
guessBuilder.append("#" + guessToken.getToken().getOriginalText().replace(' ', '_') + "|" + guessToken.getTag().getCode() + "|" + guessToken.getIndex() + "|Gov" + (guessArc.getHead() == null ? 0 : guessArc.getHead().getIndex()) + "|" + guessArc.getLabel() + "# ");
} else if (guessTokensToHighlight.contains(guessToken)) {
guessBuilder.append("#" + guessToken.getToken().getOriginalText().replace(' ', '_') + "|" + guessToken.getTag().getCode() + "|" + guessToken.getIndex() + "# ");
} else {
guessBuilder.append(guessToken.getToken().getOriginalText().replace(' ', '_') + "|" + guessToken.getTag().getCode() + "|" + guessToken.getIndex() + " ");
}
}
if (includeMe) {
writer.write("\n");
writer.write(refBuilder.toString() + "\n");
writer.write(guessBuilder.toString() + "\n");
}
}
if (includeMe)
this.onNextParseConfiguration(guessedConfigurations.get(0));
}
use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.
the class ParserEvaluator method evaluate.
/**
* @throws TalismaneException
* if an attempt is made to evaluate with a tokeniser but no
* pos-tagger
* @throws IOException
*/
public void evaluate() throws TalismaneException, IOException {
while (corpusReader.hasNextSentence()) {
ParseConfiguration realConfiguration = corpusReader.nextConfiguration();
List<PosTagSequence> posTagSequences = null;
List<TokenSequence> tokenSequences = null;
if (tokeniser != null) {
if (posTagger == null)
throw new TalismaneException("Cannot evaluate with tokeniser but no pos-tagger");
Sentence sentence = realConfiguration.getPosTagSequence().getTokenSequence().getSentence();
// annotate the sentence for pre token filters
for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) {
annotator.annotate(sentence);
if (LOG.isTraceEnabled()) {
LOG.trace("TokenFilter: " + annotator);
LOG.trace("annotations: " + sentence.getAnnotations());
}
}
tokenSequences = tokeniser.tokenise(sentence);
} else {
tokenSequences = new ArrayList<TokenSequence>();
PosTagSequence posTagSequence = realConfiguration.getPosTagSequence().clonePosTagSequence();
posTagSequence.removeRoot();
tokenSequences.add(posTagSequence.getTokenSequence());
}
if (posTagger != null) {
if (posTagger instanceof NonDeterministicPosTagger) {
NonDeterministicPosTagger nonDeterministicPosTagger = (NonDeterministicPosTagger) posTagger;
posTagSequences = nonDeterministicPosTagger.tagSentence(tokenSequences);
} else {
posTagSequences = new ArrayList<PosTagSequence>();
PosTagSequence posTagSequence = null;
posTagSequence = posTagger.tagSentence(tokenSequences.get(0));
posTagSequences.add(posTagSequence);
}
} else {
PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
posTagSequences = new ArrayList<PosTagSequence>();
posTagSequences.add(posTagSequence);
}
for (ParseEvaluationObserver observer : this.observers) {
observer.onParseStart(realConfiguration, posTagSequences);
}
List<ParseConfiguration> guessedConfigurations = null;
if (parser instanceof NonDeterministicParser) {
NonDeterministicParser nonDeterministicParser = (NonDeterministicParser) parser;
guessedConfigurations = nonDeterministicParser.parseSentence(posTagSequences);
} else {
ParseConfiguration bestGuess = parser.parseSentence(posTagSequences.get(0));
guessedConfigurations = new ArrayList<ParseConfiguration>();
guessedConfigurations.add(bestGuess);
}
for (ParseEvaluationObserver observer : this.observers) {
observer.onParseEnd(realConfiguration, guessedConfigurations);
}
}
for (ParseEvaluationObserver observer : this.observers) {
observer.onEvaluationComplete();
}
}
use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.
the class ParserFScoreCalculator method onParseEnd.
@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) throws TalismaneException {
PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
ParseConfiguration bestGuess = guessedConfigurations.get(0);
int mismatchedTokens = 0;
for (PosTaggedToken posTaggedToken : posTagSequence) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken, projective);
DependencyArc guessedArc = null;
boolean foundToken = false;
for (PosTaggedToken guessedToken : bestGuess.getPosTagSequence()) {
if (guessedToken.getToken().getStartIndex() == posTaggedToken.getToken().getStartIndex()) {
if (guessedToken.getToken().isEmpty() && !posTaggedToken.getToken().isEmpty())
continue;
if (!guessedToken.getToken().isEmpty() && posTaggedToken.getToken().isEmpty())
continue;
foundToken = true;
guessedArc = bestGuess.getGoverningDependency(guessedToken, projective);
break;
}
}
if (!foundToken) {
LOG.info("Mismatched token :" + posTaggedToken.getToken().getOriginalText() + ", index " + posTaggedToken.getToken().getIndex());
mismatchedTokens += 1;
}
String realLabel = realArc == null ? "noHead" : labeledEvaluation ? realArc.getLabel() : "head";
String guessedLabel = guessedArc == null ? "noHead" : labeledEvaluation ? guessedArc.getLabel() : "head";
if (realLabel == null || realLabel.length() == 0)
realLabel = "noLabel";
if (guessedLabel == null || guessedLabel.length() == 0)
guessedLabel = "noLabel";
// should be considered a "no head" rather than "no label"
if (realArc != null && realArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && realLabel.equals("noLabel"))
realLabel = "noHead";
if (guessedArc != null && guessedArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && guessedLabel.equals("noLabel"))
guessedLabel = "noHead";
if (realArc == null || guessedArc == null) {
fscoreCalculator.increment(realLabel, guessedLabel);
} else {
boolean sameHead = realArc.getHead().getToken().getStartIndex() == guessedArc.getHead().getToken().getStartIndex();
if (sameHead) {
fscoreCalculator.increment(realLabel, guessedLabel);
} else if (guessedLabel.equals("noHead")) {
fscoreCalculator.increment(realLabel, "noHead");
} else if (realArc.getLabel().equals(guessedArc.getLabel())) {
fscoreCalculator.increment(realLabel, "wrongHead");
} else {
fscoreCalculator.increment(realLabel, "wrongHeadWrongLabel");
}
}
// have one of the arcs
}
// is root tag?
}
if ((double) mismatchedTokens / (double) posTagSequence.size() > 0.5) {
// more than half of the tokens mismatched?
throw new TalismaneException("Too many mismatched tokens in sentence: " + posTagSequence.getTokenSequence().getSentence().getText());
}
}
Aggregations