use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class PosTaggerStatisticsWriter method onNextPosTagSequence.
@Override
public void onNextPosTagSequence(PosTagSequence posTagSequence) throws TalismaneException {
stats.sentenceCount++;
stats.sentenceLengthStats.addValue(posTagSequence.size());
for (PosTaggedToken posTaggedToken : posTagSequence) {
if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG))
continue;
Token token = posTaggedToken.getToken();
boolean knownInRefCorpus = false;
boolean knownInLexicon = false;
if (token.getPossiblePosTags().size() > 0)
knownInLexicon = true;
String word = token.getOriginalText();
stats.words.add(word);
if (referenceStats != null)
if (referenceStats.words.contains(word))
knownInRefCorpus = true;
if (!knownInLexicon) {
stats.unknownInLexiconCount++;
}
if (posTaggedToken.getTag().getOpenClassIndicator() == PosTagOpenClassIndicator.CLOSED) {
stats.closedClassCount++;
if (!knownInRefCorpus)
stats.closedClassUnknownInRefCorpus++;
if (!knownInLexicon)
stats.closedClassUnknownInLexicon++;
} else if (posTaggedToken.getTag().getOpenClassIndicator() == PosTagOpenClassIndicator.OPEN) {
stats.openClassCount++;
if (!knownInRefCorpus)
stats.openClassUnknownInRefCorpus++;
if (!knownInLexicon)
stats.openClassUnknownInLexicon++;
}
if (!knownInRefCorpus)
stats.unknownTokenCount++;
if (alphanumeric.matcher(token.getOriginalText()).find()) {
String lowercase = word.toLowerCase(TalismaneSession.get(sessionId).getLocale());
stats.lowerCaseWords.add(lowercase);
stats.alphanumericCount++;
if (!knownInRefCorpus)
stats.unknownAlphanumericCount++;
if (!knownInLexicon)
stats.unknownAlphaInLexiconCount++;
}
stats.tokenCount++;
Integer countObj = stats.posTagCounts.get(posTaggedToken.getTag().getCode());
int count = countObj == null ? 0 : countObj.intValue();
count++;
stats.posTagCounts.put(posTaggedToken.getTag().getCode(), count);
}
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class SerializationTest method testSerialize.
@Test
public void testSerialize() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
String sessionId = "test";
Sentence sentence = new Sentence("Il aime les pommes", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
tokenSequence.addToken("".length(), "Il".length());
tokenSequence.addToken("Il ".length(), "Il aime".length());
tokenSequence.addToken("Il aime ".length(), "Il aime les".length());
tokenSequence.addToken("Il aime les ".length(), "Il aime les pommes".length());
PosTagSequence posTagSequence = new PosTagSequence(tokenSequence);
posTagSequence.addPosTaggedToken(new PosTaggedToken(posTagSequence.getNextToken(), new Decision("CLS", 0.90), sessionId));
posTagSequence.addPosTaggedToken(new PosTaggedToken(posTagSequence.getNextToken(), new Decision("V", 0.70), sessionId));
posTagSequence.addPosTaggedToken(new PosTaggedToken(posTagSequence.getNextToken(), new Decision("DET", 0.60), sessionId));
posTagSequence.addPosTaggedToken(new PosTaggedToken(posTagSequence.getNextToken(), new Decision("NC", 0.80), sessionId));
posTagSequence.prependRoot();
ParseConfiguration configuration = new ParseConfiguration(posTagSequence);
LOG.debug(configuration.toString());
// ROOT ... il
new ShiftTransition().apply(configuration);
LOG.debug("Shift -> " + configuration.toString());
// ROOT il <- aime
new LeftArcEagerTransition("suj").apply(configuration);
LOG.debug("Left -> " + configuration.toString());
// ROOT -> aime
new RightArcEagerTransition("root").apply(configuration);
LOG.debug("Right -> " + configuration.toString());
// ROOT aime ... les
new ShiftTransition().apply(configuration);
LOG.debug("Shift -> " + configuration.toString());
// ROOT aime les <- pommes
new LeftArcEagerTransition("det").apply(configuration);
LOG.debug("Left -> " + configuration.toString());
// ROOT aime -> pommes
new RightArcEagerTransition("obj").apply(configuration);
LOG.debug("Right -> " + configuration.toString());
ParseTree parseTree = new ParseTree(configuration, true);
LOG.debug(parseTree.toString());
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(bos);
oos.writeObject(sentence);
oos.writeObject(tokenSequence);
oos.writeObject(posTagSequence);
oos.writeObject(configuration);
oos.writeObject(parseTree);
byte[] bytes = bos.toByteArray();
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));
Sentence sentence2 = (Sentence) ois.readObject();
TokenSequence tokenSequence2 = (TokenSequence) ois.readObject();
PosTagSequence posTagSequence2 = (PosTagSequence) ois.readObject();
ParseConfiguration configuration2 = (ParseConfiguration) ois.readObject();
ParseTree parseTree2 = (ParseTree) ois.readObject();
assertEquals(sentence, sentence2);
assertEquals(tokenSequence, tokenSequence2);
assertEquals(posTagSequence, posTagSequence2);
assertEquals(configuration, configuration2);
assertEquals(parseTree, parseTree2);
}
Aggregations