use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class PatternMatchOffsetAddressFunction method checkInternal.
@Override
public FeatureResult<TokenWrapper> checkInternal(TokenPatternMatch tokenPatternMatch, RuntimeEnvironment env) throws TalismaneException {
Token token = tokenPatternMatch.getToken();
FeatureResult<TokenWrapper> result = null;
TokenPattern tokenPattern = tokenPatternMatch.getPattern();
int testIndex = tokenPattern.getIndexesToTest().get(0);
FeatureResult<Integer> offsetResult = offsetFeature.check(tokenPatternMatch, env);
if (offsetResult != null) {
int offset = offsetResult.getOutcome();
Token offsetToken = null;
if (offset == 0)
offsetToken = token;
else {
// baseIndex should be the last non-whitespace word in the
// pattern if offset > 0
// or the first non-whitespace word in the pattern if offset < 0
int baseIndex = 0;
int j = token.getIndexWithWhiteSpace() - testIndex;
for (int i = 0; i < tokenPattern.getTokenCount(); i++) {
if (j >= 0 && j < token.getTokenSequence().listWithWhiteSpace().size()) {
Token tokenInPattern = token.getTokenSequence().listWithWhiteSpace().get(j);
if (!tokenInPattern.isWhiteSpace()) {
baseIndex = tokenInPattern.getIndex();
if (offset < 0) {
break;
}
}
}
j++;
}
int offsetIndex = baseIndex + offset;
if (offsetIndex >= 0 && offsetIndex < token.getTokenSequence().size()) {
offsetToken = token.getTokenSequence().get(offsetIndex);
}
}
if (offsetToken != null) {
result = this.generateResult(offsetToken);
}
// we have an offset token
}
return result;
}
use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class PatternWordFormFeature method checkInternal.
@Override
public FeatureResult<String> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
Token token = tokenWrapper.getToken();
FeatureResult<String> result = null;
FeatureResult<String> tokenPatternResult = tokenPatternFeature.check(tokenWrapper, env);
if (tokenPatternResult != null) {
// If we have a token pattern, then this is the first token to be
// tested in that pattern
TokenPattern tokenPattern = this.patternMap.get(tokenPatternResult.getOutcome());
TokenPatternMatch theMatch = null;
for (TokenPatternMatch tokenMatch : token.getMatches(tokenPattern)) {
if (tokenMatch.getPattern().equals(tokenPattern) && tokenMatch.getIndex() == tokenPattern.getIndexesToTest().get(0)) {
theMatch = tokenMatch;
break;
}
}
if (theMatch != null) {
String unigram = "";
for (int i = 0; i < tokenPattern.getTokenCount(); i++) {
int index = token.getIndexWithWhiteSpace() - theMatch.getIndex() + i;
Token aToken = token.getTokenSequence().listWithWhiteSpace().get(index);
unigram += aToken.getAnalyisText();
}
result = this.generateResult(unigram);
}
// the current token matches the tokeniserPattern at it's first
// test index
}
return result;
}
use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class RegexFeature method checkInternal.
@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
if (innerWrapper == null)
return null;
Token token = innerWrapper.getToken();
FeatureResult<Boolean> result = null;
FeatureResult<String> regexResult = regexFeature.check(innerWrapper, env);
if (regexResult != null) {
String regex = regexResult.getOutcome();
this.pattern = Pattern.compile(regex, Pattern.UNICODE_CHARACTER_CLASS);
boolean matches = this.pattern.matcher(token.getAnalyisText()).matches();
result = this.generateResult(matches);
}
return result;
}
use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class PatternTokeniser method applyDecision.
TokenisedAtomicTokenSequence applyDecision(Token token, Decision decision, TokenisedAtomicTokenSequence history, TokenPatternMatchSequence matchSequence, Decision defaultDecision) {
TaggedToken<TokeniserOutcome> taggedToken = new TaggedToken<>(token, decision, TokeniserOutcome.valueOf(decision.getOutcome()));
TokenisedAtomicTokenSequence tokenisedSequence = new TokenisedAtomicTokenSequence(history);
tokenisedSequence.add(taggedToken);
if (decision.isStatistical())
tokenisedSequence.addDecision(decision);
if (matchSequence != null) {
for (Token otherToken : matchSequence.getTokensToCheck()) {
if (otherToken.equals(token)) {
continue;
}
TaggedToken<TokeniserOutcome> anotherTaggedToken = new TaggedToken<>(otherToken, decision, TokeniserOutcome.valueOf(decision.getOutcome()));
tokenisedSequence.add(anotherTaggedToken);
}
}
return tokenisedSequence;
}
use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class TokenWordFormFeature method checkInternal.
@Override
public FeatureResult<String> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
if (innerWrapper == null)
return null;
Token token = innerWrapper.getToken();
FeatureResult<String> result = null;
String string = token.getAnalyisText();
result = this.generateResult(string);
return result;
}
Aggregations