Search in sources :

Example 11 with PosTag

use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.

the class PosTagEvaluationSentenceWriter method onNextPosTagSequence.

@Override
public void onNextPosTagSequence(PosTagSequence realSequence, List<PosTagSequence> guessedSequences) throws IOException {
    for (int i = 0; i < realSequence.size(); i++) {
        String token = realSequence.get(i).getToken().getAnalyisText();
        writer.write(CSV.format(token));
    }
    writer.write("\n");
    for (int i = 0; i < realSequence.size(); i++) writer.write(CSV.format(realSequence.get(i).getTag().getCode()));
    writer.write("\n");
    for (int k = 0; k < guessCount; k++) {
        PosTagSequence posTagSequence = null;
        if (k < guessedSequences.size()) {
            posTagSequence = guessedSequences.get(k);
        } else {
            writer.write("\n");
            writer.write("\n");
            continue;
        }
        int j = 0;
        String probs = "";
        for (int i = 0; i < realSequence.size(); i++) {
            TaggedToken<PosTag> realToken = realSequence.get(i);
            TaggedToken<PosTag> testToken = posTagSequence.get(j);
            boolean tokenError = false;
            if (realToken.getToken().getStartIndex() == testToken.getToken().getStartIndex() && realToken.getToken().getEndIndex() == testToken.getToken().getEndIndex()) {
                // no token error
                j++;
                if (j == posTagSequence.size()) {
                    j--;
                }
            } else {
                tokenError = true;
                while (realToken.getToken().getEndIndex() >= testToken.getToken().getEndIndex()) {
                    j++;
                    if (j == posTagSequence.size()) {
                        j--;
                        break;
                    }
                    testToken = posTagSequence.get(j);
                }
            }
            if (tokenError) {
                writer.write(CSV.format("BAD_TOKEN"));
            } else {
                writer.write(CSV.format(testToken.getTag().getCode()));
            }
            probs += CSV.format(testToken.getDecision().getProbability());
        }
        writer.write("\n");
        writer.write(probs + "\n");
    }
    writer.flush();
}
Also used : PosTag(com.joliciel.talismane.posTagger.PosTag) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence)

Example 12 with PosTag

use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.

the class DefaultPosTagMapper method getPosTags.

@Override
public Set<PosTag> getPosTags(LexicalEntry lexicalEntry) {
    if (lexicalEntry.getCategory() == null)
        return Collections.emptySet();
    Set<PosTag> posTags = posTagsPerCategory.get(lexicalEntry.getCategory());
    if (posTags == null) {
        PosTag posTag = null;
        try {
            posTag = posTagSet.getPosTag(lexicalEntry.getCategory());
        } catch (UnknownPosTagException e) {
        // unknown posTag, do nothing
        }
        if (posTag == null)
            posTags = Collections.emptySet();
        else {
            posTags = new HashSet<>();
            posTags.add(posTag);
        }
        posTagsPerCategory.put(lexicalEntry.getCategory(), posTags);
    }
    return posTags;
}
Also used : PosTag(com.joliciel.talismane.posTagger.PosTag) UnknownPosTagException(com.joliciel.talismane.posTagger.UnknownPosTagException)

Example 13 with PosTag

use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.

the class LexiconReaderTest method testReadLexicons.

@Test
public void testReadLexicons() throws Exception {
    System.setProperty("config.file", "src/test/resources/testWithLex.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    PosTaggerLexicon lexicon = TalismaneSession.get(sessionId).getMergedLexicon();
    List<LexicalEntry> entries = lexicon.getEntries("dame");
    for (LexicalEntry entry : entries) {
        System.out.println(entry);
    }
    assertEquals(9, entries.size());
    PosTagSet posTagSet = TalismaneSession.get(sessionId).getPosTagSet();
    entries = lexicon.findLexicalEntries("dame", posTagSet.getPosTag("NC"));
    for (LexicalEntry entry : entries) {
        System.out.println(entry);
    }
    assertEquals(2, entries.size());
    Set<PosTag> posTags = lexicon.findPossiblePosTags("dame");
    System.out.println(posTags);
    assertEquals(4, posTags.size());
    System.clearProperty("config.file");
    ConfigFactory.invalidateCaches();
}
Also used : PosTagSet(com.joliciel.talismane.posTagger.PosTagSet) PosTag(com.joliciel.talismane.posTagger.PosTag) Config(com.typesafe.config.Config) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Example 14 with PosTag

use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.

the class LexiconPosTagFeature method checkInternal.

@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
    if (innerWrapper == null)
        return null;
    Token token = innerWrapper.getToken();
    FeatureResult<Boolean> result = null;
    boolean matches = false;
    for (StringFeature<TokenWrapper> posTagFeature : posTagFeatures) {
        FeatureResult<String> posTagResult = posTagFeature.check(innerWrapper, env);
        if (posTagResult != null) {
            PosTag posTag = TalismaneSession.get(sessionId).getPosTagSet().getPosTag(posTagResult.getOutcome());
            boolean hasPosTag = (token.getPossiblePosTags().contains(posTag));
            if (hasPosTag) {
                matches = true;
                break;
            }
        }
    }
    result = this.generateResult(matches);
    return result;
}
Also used : PosTag(com.joliciel.talismane.posTagger.PosTag) Token(com.joliciel.talismane.tokeniser.Token)

Example 15 with PosTag

use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.

the class LexiconPosTagForStringFeature method checkInternal.

@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
    if (innerWrapper == null)
        return null;
    FeatureResult<Boolean> result = null;
    FeatureResult<String> wordToCheckResult = wordToCheckFeature.check(innerWrapper, env);
    if (wordToCheckResult != null) {
        FeatureResult<String> posTagResult = posTagFeature.check(innerWrapper, env);
        if (posTagResult != null) {
            PosTag posTag = TalismaneSession.get(sessionId).getPosTagSet().getPosTag(posTagResult.getOutcome());
            String wordToCheck = wordToCheckResult.getOutcome();
            PosTaggerLexicon lexicon = TalismaneSession.get(sessionId).getMergedLexicon();
            Set<PosTag> posTags = lexicon.findPossiblePosTags(wordToCheck);
            boolean hasPosTag = (posTags.contains(posTag));
            result = this.generateResult(hasPosTag);
        }
    }
    return result;
}
Also used : PosTag(com.joliciel.talismane.posTagger.PosTag) PosTaggerLexicon(com.joliciel.talismane.lexicon.PosTaggerLexicon)

Aggregations

PosTag (com.joliciel.talismane.posTagger.PosTag)17 ArrayList (java.util.ArrayList)6 Token (com.joliciel.talismane.tokeniser.Token)5 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)3 PosTagSet (com.joliciel.talismane.posTagger.PosTagSet)3 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)3 List (java.util.List)3 LexicalEntry (com.joliciel.talismane.lexicon.LexicalEntry)2 PosTaggerLexicon (com.joliciel.talismane.lexicon.PosTaggerLexicon)2 TalismaneException (com.joliciel.talismane.TalismaneException)1 TalismaneTest (com.joliciel.talismane.TalismaneTest)1 BooleanFeature (com.joliciel.talismane.machineLearning.features.BooleanFeature)1 FunctionDescriptor (com.joliciel.talismane.machineLearning.features.FunctionDescriptor)1 FunctionDescriptorParser (com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser)1 PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)1 UnknownPosTagException (com.joliciel.talismane.posTagger.UnknownPosTagException)1 Config (com.typesafe.config.Config)1 FileOutputStream (java.io.FileOutputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 ZipEntry (java.util.zip.ZipEntry)1