use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.
the class PosTagEvaluationSentenceWriter method onNextPosTagSequence.
@Override
public void onNextPosTagSequence(PosTagSequence realSequence, List<PosTagSequence> guessedSequences) throws IOException {
for (int i = 0; i < realSequence.size(); i++) {
String token = realSequence.get(i).getToken().getAnalyisText();
writer.write(CSV.format(token));
}
writer.write("\n");
for (int i = 0; i < realSequence.size(); i++) writer.write(CSV.format(realSequence.get(i).getTag().getCode()));
writer.write("\n");
for (int k = 0; k < guessCount; k++) {
PosTagSequence posTagSequence = null;
if (k < guessedSequences.size()) {
posTagSequence = guessedSequences.get(k);
} else {
writer.write("\n");
writer.write("\n");
continue;
}
int j = 0;
String probs = "";
for (int i = 0; i < realSequence.size(); i++) {
TaggedToken<PosTag> realToken = realSequence.get(i);
TaggedToken<PosTag> testToken = posTagSequence.get(j);
boolean tokenError = false;
if (realToken.getToken().getStartIndex() == testToken.getToken().getStartIndex() && realToken.getToken().getEndIndex() == testToken.getToken().getEndIndex()) {
// no token error
j++;
if (j == posTagSequence.size()) {
j--;
}
} else {
tokenError = true;
while (realToken.getToken().getEndIndex() >= testToken.getToken().getEndIndex()) {
j++;
if (j == posTagSequence.size()) {
j--;
break;
}
testToken = posTagSequence.get(j);
}
}
if (tokenError) {
writer.write(CSV.format("BAD_TOKEN"));
} else {
writer.write(CSV.format(testToken.getTag().getCode()));
}
probs += CSV.format(testToken.getDecision().getProbability());
}
writer.write("\n");
writer.write(probs + "\n");
}
writer.flush();
}
use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.
the class DefaultPosTagMapper method getPosTags.
@Override
public Set<PosTag> getPosTags(LexicalEntry lexicalEntry) {
if (lexicalEntry.getCategory() == null)
return Collections.emptySet();
Set<PosTag> posTags = posTagsPerCategory.get(lexicalEntry.getCategory());
if (posTags == null) {
PosTag posTag = null;
try {
posTag = posTagSet.getPosTag(lexicalEntry.getCategory());
} catch (UnknownPosTagException e) {
// unknown posTag, do nothing
}
if (posTag == null)
posTags = Collections.emptySet();
else {
posTags = new HashSet<>();
posTags.add(posTag);
}
posTagsPerCategory.put(lexicalEntry.getCategory(), posTags);
}
return posTags;
}
use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.
the class LexiconReaderTest method testReadLexicons.
@Test
public void testReadLexicons() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
PosTaggerLexicon lexicon = TalismaneSession.get(sessionId).getMergedLexicon();
List<LexicalEntry> entries = lexicon.getEntries("dame");
for (LexicalEntry entry : entries) {
System.out.println(entry);
}
assertEquals(9, entries.size());
PosTagSet posTagSet = TalismaneSession.get(sessionId).getPosTagSet();
entries = lexicon.findLexicalEntries("dame", posTagSet.getPosTag("NC"));
for (LexicalEntry entry : entries) {
System.out.println(entry);
}
assertEquals(2, entries.size());
Set<PosTag> posTags = lexicon.findPossiblePosTags("dame");
System.out.println(posTags);
assertEquals(4, posTags.size());
System.clearProperty("config.file");
ConfigFactory.invalidateCaches();
}
use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.
the class LexiconPosTagFeature method checkInternal.
@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
if (innerWrapper == null)
return null;
Token token = innerWrapper.getToken();
FeatureResult<Boolean> result = null;
boolean matches = false;
for (StringFeature<TokenWrapper> posTagFeature : posTagFeatures) {
FeatureResult<String> posTagResult = posTagFeature.check(innerWrapper, env);
if (posTagResult != null) {
PosTag posTag = TalismaneSession.get(sessionId).getPosTagSet().getPosTag(posTagResult.getOutcome());
boolean hasPosTag = (token.getPossiblePosTags().contains(posTag));
if (hasPosTag) {
matches = true;
break;
}
}
}
result = this.generateResult(matches);
return result;
}
use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.
the class LexiconPosTagForStringFeature method checkInternal.
@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
if (innerWrapper == null)
return null;
FeatureResult<Boolean> result = null;
FeatureResult<String> wordToCheckResult = wordToCheckFeature.check(innerWrapper, env);
if (wordToCheckResult != null) {
FeatureResult<String> posTagResult = posTagFeature.check(innerWrapper, env);
if (posTagResult != null) {
PosTag posTag = TalismaneSession.get(sessionId).getPosTagSet().getPosTag(posTagResult.getOutcome());
String wordToCheck = wordToCheckResult.getOutcome();
PosTaggerLexicon lexicon = TalismaneSession.get(sessionId).getMergedLexicon();
Set<PosTag> posTags = lexicon.findPossiblePosTags(wordToCheck);
boolean hasPosTag = (posTags.contains(posTag));
result = this.generateResult(hasPosTag);
}
}
return result;
}
Aggregations