use of com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch in project talismane by joliciel-informatique.
the class TokeniserPatternsAndIndexesFeature method checkInternal.
@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
Token token = tokenWrapper.getToken();
List<WeightedOutcome<String>> resultList = new ArrayList<WeightedOutcome<String>>();
for (TokenPatternMatch tokenMatch : token.getMatches()) {
if (tokenMatch.getIndex() != tokenMatch.getPattern().getIndexesToTest().get(0)) {
resultList.add(new WeightedOutcome<String>(tokenMatch.getPattern().getName() + "ยค" + tokenMatch.getIndex(), 1.0));
}
}
return this.generateResult(resultList);
}
use of com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch in project talismane by joliciel-informatique.
the class TokeniserPatternsFeature method checkInternal.
@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
Token token = tokenWrapper.getToken();
List<WeightedOutcome<String>> resultList = new ArrayList<WeightedOutcome<String>>();
for (TokenPatternMatch tokenMatch : token.getMatches()) {
if (tokenMatch.getIndex() == tokenMatch.getPattern().getIndexesToTest().get(0)) {
resultList.add(new WeightedOutcome<String>(tokenMatch.getPattern().getName(), 1.0));
}
}
return this.generateResult(resultList);
}
use of com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch in project talismane by joliciel-informatique.
the class PatternIndexInSentenceFeature method checkInternal.
@Override
public FeatureResult<Integer> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
Token token = tokenWrapper.getToken();
FeatureResult<Integer> result = null;
FeatureResult<String> tokenPatternResult = tokenPatternFeature.check(tokenWrapper, env);
if (tokenPatternResult != null) {
// If we have a token pattern, then this is the first token to be
// tested in that pattern
TokenPattern tokenPattern = this.patternMap.get(tokenPatternResult.getOutcome());
TokenPatternMatch theMatch = null;
for (TokenPatternMatch tokenMatch : token.getMatches(tokenPattern)) {
if (tokenMatch.getPattern().equals(tokenPattern) && tokenMatch.getIndex() == tokenPattern.getIndexesToTest().get(0)) {
theMatch = tokenMatch;
break;
}
}
if (theMatch != null) {
// note - if a match is found, this is actually the second token
// in the pattern
// therefore, we want the index of the first token in the
// pattern.
int indexWithWhiteSpace = token.getIndexWithWhiteSpace() - theMatch.getIndex();
Token firstToken = token.getTokenSequence().listWithWhiteSpace().get(indexWithWhiteSpace);
int patternIndex = firstToken.getIndex();
result = this.generateResult(patternIndex);
}
// the current token matches the tokeniserPattern at it's first
// test index
}
return result;
}
use of com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch in project talismane by joliciel-informatique.
the class PatternWordFormFeature method checkInternal.
@Override
public FeatureResult<String> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
Token token = tokenWrapper.getToken();
FeatureResult<String> result = null;
FeatureResult<String> tokenPatternResult = tokenPatternFeature.check(tokenWrapper, env);
if (tokenPatternResult != null) {
// If we have a token pattern, then this is the first token to be
// tested in that pattern
TokenPattern tokenPattern = this.patternMap.get(tokenPatternResult.getOutcome());
TokenPatternMatch theMatch = null;
for (TokenPatternMatch tokenMatch : token.getMatches(tokenPattern)) {
if (tokenMatch.getPattern().equals(tokenPattern) && tokenMatch.getIndex() == tokenPattern.getIndexesToTest().get(0)) {
theMatch = tokenMatch;
break;
}
}
if (theMatch != null) {
String unigram = "";
for (int i = 0; i < tokenPattern.getTokenCount(); i++) {
int index = token.getIndexWithWhiteSpace() - theMatch.getIndex() + i;
Token aToken = token.getTokenSequence().listWithWhiteSpace().get(index);
unigram += aToken.getAnalyisText();
}
result = this.generateResult(unigram);
}
// the current token matches the tokeniserPattern at it's first
// test index
}
return result;
}
use of com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch in project talismane by joliciel-informatique.
the class Token method getMatches.
/**
* Get all matches for a given pattern.
*/
public List<TokenPatternMatch> getMatches(TokenPattern pattern) {
if (matchesPerPattern == null) {
matchesPerPattern = new HashMap<String, List<TokenPatternMatch>>();
for (TokenPatternMatch match : this.getMatches()) {
List<TokenPatternMatch> matchesForPattern = matchesPerPattern.get(match.getPattern().getName());
if (matchesForPattern == null) {
matchesForPattern = new ArrayList<TokenPatternMatch>();
matchesPerPattern.put(match.getPattern().getName(), matchesForPattern);
}
matchesForPattern.add(match);
}
}
return matchesPerPattern.get(pattern.getName());
}
Aggregations