Search in sources :

Example 51 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class PatternMatchOffsetAddressFunction method checkInternal.

@Override
public FeatureResult<TokenWrapper> checkInternal(TokenPatternMatch tokenPatternMatch, RuntimeEnvironment env) throws TalismaneException {
    Token token = tokenPatternMatch.getToken();
    FeatureResult<TokenWrapper> result = null;
    TokenPattern tokenPattern = tokenPatternMatch.getPattern();
    int testIndex = tokenPattern.getIndexesToTest().get(0);
    FeatureResult<Integer> offsetResult = offsetFeature.check(tokenPatternMatch, env);
    if (offsetResult != null) {
        int offset = offsetResult.getOutcome();
        Token offsetToken = null;
        if (offset == 0)
            offsetToken = token;
        else {
            // baseIndex should be the last non-whitespace word in the
            // pattern if offset > 0
            // or the first non-whitespace word in the pattern if offset < 0
            int baseIndex = 0;
            int j = token.getIndexWithWhiteSpace() - testIndex;
            for (int i = 0; i < tokenPattern.getTokenCount(); i++) {
                if (j >= 0 && j < token.getTokenSequence().listWithWhiteSpace().size()) {
                    Token tokenInPattern = token.getTokenSequence().listWithWhiteSpace().get(j);
                    if (!tokenInPattern.isWhiteSpace()) {
                        baseIndex = tokenInPattern.getIndex();
                        if (offset < 0) {
                            break;
                        }
                    }
                }
                j++;
            }
            int offsetIndex = baseIndex + offset;
            if (offsetIndex >= 0 && offsetIndex < token.getTokenSequence().size()) {
                offsetToken = token.getTokenSequence().get(offsetIndex);
            }
        }
        if (offsetToken != null) {
            result = this.generateResult(offsetToken);
        }
    // we have an offset token
    }
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token) TokenPattern(com.joliciel.talismane.tokeniser.patterns.TokenPattern)

Example 52 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class PatternWordFormFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    Token token = tokenWrapper.getToken();
    FeatureResult<String> result = null;
    FeatureResult<String> tokenPatternResult = tokenPatternFeature.check(tokenWrapper, env);
    if (tokenPatternResult != null) {
        // If we have a token pattern, then this is the first token to be
        // tested in that pattern
        TokenPattern tokenPattern = this.patternMap.get(tokenPatternResult.getOutcome());
        TokenPatternMatch theMatch = null;
        for (TokenPatternMatch tokenMatch : token.getMatches(tokenPattern)) {
            if (tokenMatch.getPattern().equals(tokenPattern) && tokenMatch.getIndex() == tokenPattern.getIndexesToTest().get(0)) {
                theMatch = tokenMatch;
                break;
            }
        }
        if (theMatch != null) {
            String unigram = "";
            for (int i = 0; i < tokenPattern.getTokenCount(); i++) {
                int index = token.getIndexWithWhiteSpace() - theMatch.getIndex() + i;
                Token aToken = token.getTokenSequence().listWithWhiteSpace().get(index);
                unigram += aToken.getAnalyisText();
            }
            result = this.generateResult(unigram);
        }
    // the current token matches the tokeniserPattern at it's first
    // test index
    }
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token) TokenPatternMatch(com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch) TokenPattern(com.joliciel.talismane.tokeniser.patterns.TokenPattern)

Example 53 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class RegexFeature method checkInternal.

@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
    if (innerWrapper == null)
        return null;
    Token token = innerWrapper.getToken();
    FeatureResult<Boolean> result = null;
    FeatureResult<String> regexResult = regexFeature.check(innerWrapper, env);
    if (regexResult != null) {
        String regex = regexResult.getOutcome();
        this.pattern = Pattern.compile(regex, Pattern.UNICODE_CHARACTER_CLASS);
        boolean matches = this.pattern.matcher(token.getAnalyisText()).matches();
        result = this.generateResult(matches);
    }
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token)

Example 54 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class PatternTokeniser method applyDecision.

TokenisedAtomicTokenSequence applyDecision(Token token, Decision decision, TokenisedAtomicTokenSequence history, TokenPatternMatchSequence matchSequence, Decision defaultDecision) {
    TaggedToken<TokeniserOutcome> taggedToken = new TaggedToken<>(token, decision, TokeniserOutcome.valueOf(decision.getOutcome()));
    TokenisedAtomicTokenSequence tokenisedSequence = new TokenisedAtomicTokenSequence(history);
    tokenisedSequence.add(taggedToken);
    if (decision.isStatistical())
        tokenisedSequence.addDecision(decision);
    if (matchSequence != null) {
        for (Token otherToken : matchSequence.getTokensToCheck()) {
            if (otherToken.equals(token)) {
                continue;
            }
            TaggedToken<TokeniserOutcome> anotherTaggedToken = new TaggedToken<>(otherToken, decision, TokeniserOutcome.valueOf(decision.getOutcome()));
            tokenisedSequence.add(anotherTaggedToken);
        }
    }
    return tokenisedSequence;
}
Also used : TaggedToken(com.joliciel.talismane.tokeniser.TaggedToken) TaggedToken(com.joliciel.talismane.tokeniser.TaggedToken) Token(com.joliciel.talismane.tokeniser.Token) TokeniserOutcome(com.joliciel.talismane.tokeniser.TokeniserOutcome) TokenisedAtomicTokenSequence(com.joliciel.talismane.tokeniser.TokenisedAtomicTokenSequence)

Example 55 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class TokenWordFormFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
    if (innerWrapper == null)
        return null;
    Token token = innerWrapper.getToken();
    FeatureResult<String> result = null;
    String string = token.getAnalyisText();
    result = this.generateResult(string);
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token)

Aggregations

Token (com.joliciel.talismane.tokeniser.Token)69 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)16 ArrayList (java.util.ArrayList)15 Sentence (com.joliciel.talismane.rawText.Sentence)14 Decision (com.joliciel.talismane.machineLearning.Decision)12 Config (com.typesafe.config.Config)12 TalismaneTest (com.joliciel.talismane.TalismaneTest)11 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)11 Test (org.junit.Test)11 TalismaneException (com.joliciel.talismane.TalismaneException)7 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)7 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)7 TokeniserOutcome (com.joliciel.talismane.tokeniser.TokeniserOutcome)7 List (java.util.List)7 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)6 HashMap (java.util.HashMap)6 StringLiteralFeature (com.joliciel.talismane.machineLearning.features.StringLiteralFeature)5 PosTag (com.joliciel.talismane.posTagger.PosTag)5 PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)5 PosTaggerContextImpl (com.joliciel.talismane.posTagger.PosTaggerContextImpl)5