use of com.joliciel.talismane.tokeniser.patterns.TokenPattern in project talismane by joliciel-informatique.
the class PatternNameFeature method checkInternal.
@Override
public FeatureResult<String> checkInternal(TokenPatternMatch tokenPatternMatch, RuntimeEnvironment env) {
FeatureResult<String> result = null;
TokenPattern pattern = tokenPatternMatch.getPattern();
result = this.generateResult(pattern.getName());
return result;
}
use of com.joliciel.talismane.tokeniser.patterns.TokenPattern in project talismane by joliciel-informatique.
the class PatternOffsetAddressFunction method checkInternal.
@Override
public FeatureResult<TokenWrapper> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
Token token = tokenWrapper.getToken();
FeatureResult<TokenWrapper> result = null;
FeatureResult<String> tokenPatternResult = tokenPatternFeature.check(tokenWrapper, env);
if (tokenPatternResult != null) {
// If we have a token pattern, then this is the first token to be
// tested in that pattern
TokenPattern tokenPattern = this.patternMap.get(tokenPatternResult.getOutcome());
int testIndex = tokenPattern.getIndexesToTest().get(0);
FeatureResult<Integer> offsetResult = offsetFeature.check(tokenWrapper, env);
if (offsetResult != null) {
int offset = offsetResult.getOutcome();
if (offset == 0) {
throw new TalismaneException("Cannot do a pattern offset with offset of 0");
}
Token offsetToken = null;
// baseIndex should be the last non-whitespace word in the
// pattern if offset > 0
// or the first non-whitespace word in the pattern if offset < 0
int baseIndex = 0;
int j = token.getIndexWithWhiteSpace() - testIndex;
for (int i = 0; i < tokenPattern.getTokenCount(); i++) {
if (j >= 0 && j < token.getTokenSequence().listWithWhiteSpace().size()) {
Token tokenInPattern = token.getTokenSequence().listWithWhiteSpace().get(j);
if (!tokenInPattern.isWhiteSpace()) {
baseIndex = tokenInPattern.getIndex();
if (offset < 0) {
break;
}
}
}
j++;
}
int offsetIndex = baseIndex + offset;
if (offsetIndex >= 0 && offsetIndex < token.getTokenSequence().size()) {
offsetToken = token.getTokenSequence().get(offsetIndex);
}
if (offsetToken != null) {
result = this.generateResult(offsetToken);
}
// we have an offset token
}
// we have an offset result
}
return result;
}
use of com.joliciel.talismane.tokeniser.patterns.TokenPattern in project talismane by joliciel-informatique.
the class PatternGroupNameFeature method checkInternal.
@Override
public FeatureResult<String> checkInternal(TokenPatternMatch tokenPatternMatch, RuntimeEnvironment env) {
FeatureResult<String> result = null;
TokenPattern pattern = tokenPatternMatch.getPattern();
if (pattern.getGroupName() != null)
result = this.generateResult(pattern.getGroupName());
return result;
}
use of com.joliciel.talismane.tokeniser.patterns.TokenPattern in project talismane by joliciel-informatique.
the class PatternIndexInSentenceFeature method checkInternal.
@Override
public FeatureResult<Integer> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
Token token = tokenWrapper.getToken();
FeatureResult<Integer> result = null;
FeatureResult<String> tokenPatternResult = tokenPatternFeature.check(tokenWrapper, env);
if (tokenPatternResult != null) {
// If we have a token pattern, then this is the first token to be
// tested in that pattern
TokenPattern tokenPattern = this.patternMap.get(tokenPatternResult.getOutcome());
TokenPatternMatch theMatch = null;
for (TokenPatternMatch tokenMatch : token.getMatches(tokenPattern)) {
if (tokenMatch.getPattern().equals(tokenPattern) && tokenMatch.getIndex() == tokenPattern.getIndexesToTest().get(0)) {
theMatch = tokenMatch;
break;
}
}
if (theMatch != null) {
// note - if a match is found, this is actually the second token
// in the pattern
// therefore, we want the index of the first token in the
// pattern.
int indexWithWhiteSpace = token.getIndexWithWhiteSpace() - theMatch.getIndex();
Token firstToken = token.getTokenSequence().listWithWhiteSpace().get(indexWithWhiteSpace);
int patternIndex = firstToken.getIndex();
result = this.generateResult(patternIndex);
}
// the current token matches the tokeniserPattern at it's first
// test index
}
return result;
}
use of com.joliciel.talismane.tokeniser.patterns.TokenPattern in project talismane by joliciel-informatique.
the class PatternMatchOffsetAddressFunction method checkInternal.
@Override
public FeatureResult<TokenWrapper> checkInternal(TokenPatternMatch tokenPatternMatch, RuntimeEnvironment env) throws TalismaneException {
Token token = tokenPatternMatch.getToken();
FeatureResult<TokenWrapper> result = null;
TokenPattern tokenPattern = tokenPatternMatch.getPattern();
int testIndex = tokenPattern.getIndexesToTest().get(0);
FeatureResult<Integer> offsetResult = offsetFeature.check(tokenPatternMatch, env);
if (offsetResult != null) {
int offset = offsetResult.getOutcome();
Token offsetToken = null;
if (offset == 0)
offsetToken = token;
else {
// baseIndex should be the last non-whitespace word in the
// pattern if offset > 0
// or the first non-whitespace word in the pattern if offset < 0
int baseIndex = 0;
int j = token.getIndexWithWhiteSpace() - testIndex;
for (int i = 0; i < tokenPattern.getTokenCount(); i++) {
if (j >= 0 && j < token.getTokenSequence().listWithWhiteSpace().size()) {
Token tokenInPattern = token.getTokenSequence().listWithWhiteSpace().get(j);
if (!tokenInPattern.isWhiteSpace()) {
baseIndex = tokenInPattern.getIndex();
if (offset < 0) {
break;
}
}
}
j++;
}
int offsetIndex = baseIndex + offset;
if (offsetIndex >= 0 && offsetIndex < token.getTokenSequence().size()) {
offsetToken = token.getTokenSequence().get(offsetIndex);
}
}
if (offsetToken != null) {
result = this.generateResult(offsetToken);
}
// we have an offset token
}
return result;
}
Aggregations