Search in sources :

Example 1 with TokenPattern

use of com.joliciel.talismane.tokeniser.patterns.TokenPattern in project talismane by joliciel-informatique.

the class PatternNameFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(TokenPatternMatch tokenPatternMatch, RuntimeEnvironment env) {
    FeatureResult<String> result = null;
    TokenPattern pattern = tokenPatternMatch.getPattern();
    result = this.generateResult(pattern.getName());
    return result;
}
Also used : TokenPattern(com.joliciel.talismane.tokeniser.patterns.TokenPattern)

Example 2 with TokenPattern

use of com.joliciel.talismane.tokeniser.patterns.TokenPattern in project talismane by joliciel-informatique.

the class PatternOffsetAddressFunction method checkInternal.

@Override
public FeatureResult<TokenWrapper> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    Token token = tokenWrapper.getToken();
    FeatureResult<TokenWrapper> result = null;
    FeatureResult<String> tokenPatternResult = tokenPatternFeature.check(tokenWrapper, env);
    if (tokenPatternResult != null) {
        // If we have a token pattern, then this is the first token to be
        // tested in that pattern
        TokenPattern tokenPattern = this.patternMap.get(tokenPatternResult.getOutcome());
        int testIndex = tokenPattern.getIndexesToTest().get(0);
        FeatureResult<Integer> offsetResult = offsetFeature.check(tokenWrapper, env);
        if (offsetResult != null) {
            int offset = offsetResult.getOutcome();
            if (offset == 0) {
                throw new TalismaneException("Cannot do a pattern offset with offset of 0");
            }
            Token offsetToken = null;
            // baseIndex should be the last non-whitespace word in the
            // pattern if offset > 0
            // or the first non-whitespace word in the pattern if offset < 0
            int baseIndex = 0;
            int j = token.getIndexWithWhiteSpace() - testIndex;
            for (int i = 0; i < tokenPattern.getTokenCount(); i++) {
                if (j >= 0 && j < token.getTokenSequence().listWithWhiteSpace().size()) {
                    Token tokenInPattern = token.getTokenSequence().listWithWhiteSpace().get(j);
                    if (!tokenInPattern.isWhiteSpace()) {
                        baseIndex = tokenInPattern.getIndex();
                        if (offset < 0) {
                            break;
                        }
                    }
                }
                j++;
            }
            int offsetIndex = baseIndex + offset;
            if (offsetIndex >= 0 && offsetIndex < token.getTokenSequence().size()) {
                offsetToken = token.getTokenSequence().get(offsetIndex);
            }
            if (offsetToken != null) {
                result = this.generateResult(offsetToken);
            }
        // we have an offset token
        }
    // we have an offset result
    }
    return result;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) Token(com.joliciel.talismane.tokeniser.Token) TokenPattern(com.joliciel.talismane.tokeniser.patterns.TokenPattern)

Example 3 with TokenPattern

use of com.joliciel.talismane.tokeniser.patterns.TokenPattern in project talismane by joliciel-informatique.

the class PatternGroupNameFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(TokenPatternMatch tokenPatternMatch, RuntimeEnvironment env) {
    FeatureResult<String> result = null;
    TokenPattern pattern = tokenPatternMatch.getPattern();
    if (pattern.getGroupName() != null)
        result = this.generateResult(pattern.getGroupName());
    return result;
}
Also used : TokenPattern(com.joliciel.talismane.tokeniser.patterns.TokenPattern)

Example 4 with TokenPattern

use of com.joliciel.talismane.tokeniser.patterns.TokenPattern in project talismane by joliciel-informatique.

the class PatternIndexInSentenceFeature method checkInternal.

@Override
public FeatureResult<Integer> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    Token token = tokenWrapper.getToken();
    FeatureResult<Integer> result = null;
    FeatureResult<String> tokenPatternResult = tokenPatternFeature.check(tokenWrapper, env);
    if (tokenPatternResult != null) {
        // If we have a token pattern, then this is the first token to be
        // tested in that pattern
        TokenPattern tokenPattern = this.patternMap.get(tokenPatternResult.getOutcome());
        TokenPatternMatch theMatch = null;
        for (TokenPatternMatch tokenMatch : token.getMatches(tokenPattern)) {
            if (tokenMatch.getPattern().equals(tokenPattern) && tokenMatch.getIndex() == tokenPattern.getIndexesToTest().get(0)) {
                theMatch = tokenMatch;
                break;
            }
        }
        if (theMatch != null) {
            // note - if a match is found, this is actually the second token
            // in the pattern
            // therefore, we want the index of the first token in the
            // pattern.
            int indexWithWhiteSpace = token.getIndexWithWhiteSpace() - theMatch.getIndex();
            Token firstToken = token.getTokenSequence().listWithWhiteSpace().get(indexWithWhiteSpace);
            int patternIndex = firstToken.getIndex();
            result = this.generateResult(patternIndex);
        }
    // the current token matches the tokeniserPattern at it's first
    // test index
    }
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token) TokenPatternMatch(com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch) TokenPattern(com.joliciel.talismane.tokeniser.patterns.TokenPattern)

Example 5 with TokenPattern

use of com.joliciel.talismane.tokeniser.patterns.TokenPattern in project talismane by joliciel-informatique.

the class PatternMatchOffsetAddressFunction method checkInternal.

@Override
public FeatureResult<TokenWrapper> checkInternal(TokenPatternMatch tokenPatternMatch, RuntimeEnvironment env) throws TalismaneException {
    Token token = tokenPatternMatch.getToken();
    FeatureResult<TokenWrapper> result = null;
    TokenPattern tokenPattern = tokenPatternMatch.getPattern();
    int testIndex = tokenPattern.getIndexesToTest().get(0);
    FeatureResult<Integer> offsetResult = offsetFeature.check(tokenPatternMatch, env);
    if (offsetResult != null) {
        int offset = offsetResult.getOutcome();
        Token offsetToken = null;
        if (offset == 0)
            offsetToken = token;
        else {
            // baseIndex should be the last non-whitespace word in the
            // pattern if offset > 0
            // or the first non-whitespace word in the pattern if offset < 0
            int baseIndex = 0;
            int j = token.getIndexWithWhiteSpace() - testIndex;
            for (int i = 0; i < tokenPattern.getTokenCount(); i++) {
                if (j >= 0 && j < token.getTokenSequence().listWithWhiteSpace().size()) {
                    Token tokenInPattern = token.getTokenSequence().listWithWhiteSpace().get(j);
                    if (!tokenInPattern.isWhiteSpace()) {
                        baseIndex = tokenInPattern.getIndex();
                        if (offset < 0) {
                            break;
                        }
                    }
                }
                j++;
            }
            int offsetIndex = baseIndex + offset;
            if (offsetIndex >= 0 && offsetIndex < token.getTokenSequence().size()) {
                offsetToken = token.getTokenSequence().get(offsetIndex);
            }
        }
        if (offsetToken != null) {
            result = this.generateResult(offsetToken);
        }
    // we have an offset token
    }
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token) TokenPattern(com.joliciel.talismane.tokeniser.patterns.TokenPattern)

Aggregations

TokenPattern (com.joliciel.talismane.tokeniser.patterns.TokenPattern)7 Token (com.joliciel.talismane.tokeniser.Token)5 TalismaneException (com.joliciel.talismane.TalismaneException)2 TokenPatternMatch (com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch)2 Decision (com.joliciel.talismane.machineLearning.Decision)1 Sentence (com.joliciel.talismane.rawText.Sentence)1 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)1 TokenisedAtomicTokenSequence (com.joliciel.talismane.tokeniser.TokenisedAtomicTokenSequence)1 TokeniserOutcome (com.joliciel.talismane.tokeniser.TokeniserOutcome)1 TokenPatternMatchSequence (com.joliciel.talismane.tokeniser.patterns.TokenPatternMatchSequence)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 TreeSet (java.util.TreeSet)1