Search in sources :

Example 1 with TokenPatternMatch

use of com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch in project talismane by joliciel-informatique.

the class TokeniserPatternsAndIndexesFeature method checkInternal.

@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    Token token = tokenWrapper.getToken();
    List<WeightedOutcome<String>> resultList = new ArrayList<WeightedOutcome<String>>();
    for (TokenPatternMatch tokenMatch : token.getMatches()) {
        if (tokenMatch.getIndex() != tokenMatch.getPattern().getIndexesToTest().get(0)) {
            resultList.add(new WeightedOutcome<String>(tokenMatch.getPattern().getName() + "ยค" + tokenMatch.getIndex(), 1.0));
        }
    }
    return this.generateResult(resultList);
}
Also used : ArrayList(java.util.ArrayList) WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome) Token(com.joliciel.talismane.tokeniser.Token) TokenPatternMatch(com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch)

Example 2 with TokenPatternMatch

use of com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch in project talismane by joliciel-informatique.

the class TokeniserPatternsFeature method checkInternal.

@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    Token token = tokenWrapper.getToken();
    List<WeightedOutcome<String>> resultList = new ArrayList<WeightedOutcome<String>>();
    for (TokenPatternMatch tokenMatch : token.getMatches()) {
        if (tokenMatch.getIndex() == tokenMatch.getPattern().getIndexesToTest().get(0)) {
            resultList.add(new WeightedOutcome<String>(tokenMatch.getPattern().getName(), 1.0));
        }
    }
    return this.generateResult(resultList);
}
Also used : ArrayList(java.util.ArrayList) WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome) Token(com.joliciel.talismane.tokeniser.Token) TokenPatternMatch(com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch)

Example 3 with TokenPatternMatch

use of com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch in project talismane by joliciel-informatique.

the class PatternIndexInSentenceFeature method checkInternal.

@Override
public FeatureResult<Integer> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    Token token = tokenWrapper.getToken();
    FeatureResult<Integer> result = null;
    FeatureResult<String> tokenPatternResult = tokenPatternFeature.check(tokenWrapper, env);
    if (tokenPatternResult != null) {
        // If we have a token pattern, then this is the first token to be
        // tested in that pattern
        TokenPattern tokenPattern = this.patternMap.get(tokenPatternResult.getOutcome());
        TokenPatternMatch theMatch = null;
        for (TokenPatternMatch tokenMatch : token.getMatches(tokenPattern)) {
            if (tokenMatch.getPattern().equals(tokenPattern) && tokenMatch.getIndex() == tokenPattern.getIndexesToTest().get(0)) {
                theMatch = tokenMatch;
                break;
            }
        }
        if (theMatch != null) {
            // note - if a match is found, this is actually the second token
            // in the pattern
            // therefore, we want the index of the first token in the
            // pattern.
            int indexWithWhiteSpace = token.getIndexWithWhiteSpace() - theMatch.getIndex();
            Token firstToken = token.getTokenSequence().listWithWhiteSpace().get(indexWithWhiteSpace);
            int patternIndex = firstToken.getIndex();
            result = this.generateResult(patternIndex);
        }
    // the current token matches the tokeniserPattern at it's first
    // test index
    }
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token) TokenPatternMatch(com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch) TokenPattern(com.joliciel.talismane.tokeniser.patterns.TokenPattern)

Example 4 with TokenPatternMatch

use of com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch in project talismane by joliciel-informatique.

the class PatternWordFormFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    Token token = tokenWrapper.getToken();
    FeatureResult<String> result = null;
    FeatureResult<String> tokenPatternResult = tokenPatternFeature.check(tokenWrapper, env);
    if (tokenPatternResult != null) {
        // If we have a token pattern, then this is the first token to be
        // tested in that pattern
        TokenPattern tokenPattern = this.patternMap.get(tokenPatternResult.getOutcome());
        TokenPatternMatch theMatch = null;
        for (TokenPatternMatch tokenMatch : token.getMatches(tokenPattern)) {
            if (tokenMatch.getPattern().equals(tokenPattern) && tokenMatch.getIndex() == tokenPattern.getIndexesToTest().get(0)) {
                theMatch = tokenMatch;
                break;
            }
        }
        if (theMatch != null) {
            String unigram = "";
            for (int i = 0; i < tokenPattern.getTokenCount(); i++) {
                int index = token.getIndexWithWhiteSpace() - theMatch.getIndex() + i;
                Token aToken = token.getTokenSequence().listWithWhiteSpace().get(index);
                unigram += aToken.getAnalyisText();
            }
            result = this.generateResult(unigram);
        }
    // the current token matches the tokeniserPattern at it's first
    // test index
    }
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token) TokenPatternMatch(com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch) TokenPattern(com.joliciel.talismane.tokeniser.patterns.TokenPattern)

Example 5 with TokenPatternMatch

use of com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch in project talismane by joliciel-informatique.

the class Token method getMatches.

/**
 * Get all matches for a given pattern.
 */
public List<TokenPatternMatch> getMatches(TokenPattern pattern) {
    if (matchesPerPattern == null) {
        matchesPerPattern = new HashMap<String, List<TokenPatternMatch>>();
        for (TokenPatternMatch match : this.getMatches()) {
            List<TokenPatternMatch> matchesForPattern = matchesPerPattern.get(match.getPattern().getName());
            if (matchesForPattern == null) {
                matchesForPattern = new ArrayList<TokenPatternMatch>();
                matchesPerPattern.put(match.getPattern().getName(), matchesForPattern);
            }
            matchesForPattern.add(match);
        }
    }
    return matchesPerPattern.get(pattern.getName());
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) TokenPatternMatch(com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch)

Aggregations

TokenPatternMatch (com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch)5 Token (com.joliciel.talismane.tokeniser.Token)4 ArrayList (java.util.ArrayList)3 TokenPattern (com.joliciel.talismane.tokeniser.patterns.TokenPattern)2 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)2 List (java.util.List)1