Search in sources :

Example 6 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class ParseConfigurationAddress method getToken.

@Override
public Token getToken() throws TalismaneException {
    PosTaggedToken posTaggedToken = this.getPosTaggedToken();
    Token token = null;
    if (posTaggedToken != null)
        token = posTaggedToken.getToken();
    return token;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token)

Example 7 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class PatternMatchWordFormFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(TokenPatternMatch tokenPatternMatch, RuntimeEnvironment env) {
    FeatureResult<String> result = null;
    String unigram = "";
    for (int i = 0; i < tokenPatternMatch.getSequence().getTokenSequence().size(); i++) {
        Token aToken = tokenPatternMatch.getSequence().getTokenSequence().get(i);
        if (i == 0 && tokenPatternMatch.getSequence().getTokenPattern().isSeparatorClass(i))
            continue;
        if (i == tokenPatternMatch.getSequence().getTokenSequence().size() - 1 && tokenPatternMatch.getSequence().getTokenPattern().isSeparatorClass(i))
            continue;
        if (aToken != null) {
            unigram += aToken.getAnalyisText();
        }
    }
    result = this.generateResult(unigram);
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token)

Example 8 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class PatternOffsetAddressFunction method checkInternal.

@Override
public FeatureResult<TokenWrapper> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    Token token = tokenWrapper.getToken();
    FeatureResult<TokenWrapper> result = null;
    FeatureResult<String> tokenPatternResult = tokenPatternFeature.check(tokenWrapper, env);
    if (tokenPatternResult != null) {
        // If we have a token pattern, then this is the first token to be
        // tested in that pattern
        TokenPattern tokenPattern = this.patternMap.get(tokenPatternResult.getOutcome());
        int testIndex = tokenPattern.getIndexesToTest().get(0);
        FeatureResult<Integer> offsetResult = offsetFeature.check(tokenWrapper, env);
        if (offsetResult != null) {
            int offset = offsetResult.getOutcome();
            if (offset == 0) {
                throw new TalismaneException("Cannot do a pattern offset with offset of 0");
            }
            Token offsetToken = null;
            // baseIndex should be the last non-whitespace word in the
            // pattern if offset > 0
            // or the first non-whitespace word in the pattern if offset < 0
            int baseIndex = 0;
            int j = token.getIndexWithWhiteSpace() - testIndex;
            for (int i = 0; i < tokenPattern.getTokenCount(); i++) {
                if (j >= 0 && j < token.getTokenSequence().listWithWhiteSpace().size()) {
                    Token tokenInPattern = token.getTokenSequence().listWithWhiteSpace().get(j);
                    if (!tokenInPattern.isWhiteSpace()) {
                        baseIndex = tokenInPattern.getIndex();
                        if (offset < 0) {
                            break;
                        }
                    }
                }
                j++;
            }
            int offsetIndex = baseIndex + offset;
            if (offsetIndex >= 0 && offsetIndex < token.getTokenSequence().size()) {
                offsetToken = token.getTokenSequence().get(offsetIndex);
            }
            if (offsetToken != null) {
                result = this.generateResult(offsetToken);
            }
        // we have an offset token
        }
    // we have an offset result
    }
    return result;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) Token(com.joliciel.talismane.tokeniser.Token) TokenPattern(com.joliciel.talismane.tokeniser.patterns.TokenPattern)

Example 9 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class LemmaForPosTagFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
    if (innerWrapper == null)
        return null;
    Token token = innerWrapper.getToken();
    FeatureResult<String> featureResult = null;
    List<String> posTagCodes = new ArrayList<String>();
    for (StringFeature<TokenWrapper> posTagCodeFeature : posTagCodeFeatures) {
        FeatureResult<String> posTagCodeResult = posTagCodeFeature.check(innerWrapper, env);
        if (posTagCodeResult != null)
            posTagCodes.add(posTagCodeResult.getOutcome());
    }
    for (String posTagCode : posTagCodes) {
        PosTag posTag = TalismaneSession.get(sessionId).getPosTagSet().getPosTag(posTagCode);
        LexicalEntry lexicalEntry = token.getLexicalEntry(posTag);
        if (lexicalEntry != null) {
            featureResult = this.generateResult(lexicalEntry.getLemma());
            break;
        }
    }
    return featureResult;
}
Also used : PosTag(com.joliciel.talismane.posTagger.PosTag) ArrayList(java.util.ArrayList) Token(com.joliciel.talismane.tokeniser.Token) LexicalEntry(com.joliciel.talismane.lexicon.LexicalEntry)

Example 10 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class LexiconAllPosTagsFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
    if (innerWrapper == null)
        return null;
    Token token = innerWrapper.getToken();
    FeatureResult<String> result = null;
    if (token.getPossiblePosTags().size() > 0) {
        StringBuilder sb = new StringBuilder();
        boolean firstPosTag = true;
        for (PosTag posTag : token.getPossiblePosTags()) {
            if (!firstPosTag)
                sb.append(',');
            firstPosTag = false;
            sb.append(posTag.getCode());
        }
        result = this.generateResult(sb.toString());
    }
    return result;
}
Also used : PosTag(com.joliciel.talismane.posTagger.PosTag) Token(com.joliciel.talismane.tokeniser.Token)

Aggregations

Token (com.joliciel.talismane.tokeniser.Token)69 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)16 ArrayList (java.util.ArrayList)15 Sentence (com.joliciel.talismane.rawText.Sentence)14 Decision (com.joliciel.talismane.machineLearning.Decision)12 Config (com.typesafe.config.Config)12 TalismaneTest (com.joliciel.talismane.TalismaneTest)11 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)11 Test (org.junit.Test)11 TalismaneException (com.joliciel.talismane.TalismaneException)7 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)7 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)7 TokeniserOutcome (com.joliciel.talismane.tokeniser.TokeniserOutcome)7 List (java.util.List)7 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)6 HashMap (java.util.HashMap)6 StringLiteralFeature (com.joliciel.talismane.machineLearning.features.StringLiteralFeature)5 PosTag (com.joliciel.talismane.posTagger.PosTag)5 PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)5 PosTaggerContextImpl (com.joliciel.talismane.posTagger.PosTaggerContextImpl)5