use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class LastWordInSentenceFeature method checkInternal.
@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
if (innerWrapper == null)
return null;
Token token = innerWrapper.getToken();
FeatureResult<Boolean> result = null;
boolean lastWord = (token.getIndex() == token.getTokenSequence().size() - 1);
result = this.generateResult(lastWord);
return result;
}
use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class NextTokensFeature method checkInternal.
@Override
public FeatureResult<String> checkInternal(PossibleSentenceBoundary context, RuntimeEnvironment env) throws TalismaneException {
FeatureResult<String> result = null;
FeatureResult<Integer> nResult = nFeature.check(context, env);
if (nResult != null) {
int n = nResult.getOutcome();
int tokenIndex = context.getTokenIndexWithWhitespace();
String tokenString = "";
for (int i = 0; i <= n; i++) {
int relativeIndex = tokenIndex + i;
if (relativeIndex < context.getTokenSequence().listWithWhiteSpace().size()) {
Token token = context.getTokenSequence().listWithWhiteSpace().get(relativeIndex);
tokenString = tokenString + token.getOriginalText();
} else {
tokenString = tokenString + "[[END]]";
}
}
result = this.generateResult(tokenString);
}
return result;
}
use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class PreviousTokensFeature method checkInternal.
@Override
public FeatureResult<String> checkInternal(PossibleSentenceBoundary context, RuntimeEnvironment env) throws TalismaneException {
FeatureResult<String> result = null;
FeatureResult<Integer> nResult = nFeature.check(context, env);
if (nResult != null) {
int n = nResult.getOutcome();
int tokenIndex = context.getTokenIndexWithWhitespace();
String tokenString = "";
for (int i = 0; i <= n; i++) {
int relativeIndex = tokenIndex - i;
if (relativeIndex >= 0) {
Token token = context.getTokenSequence().listWithWhiteSpace().get(relativeIndex);
tokenString = token.getOriginalText() + tokenString;
} else {
tokenString = "[[START]]" + tokenString;
}
}
result = this.generateResult(tokenString);
}
return result;
}
use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class NextLetterCapitalFeature method checkInternal.
@Override
public FeatureResult<String> checkInternal(PossibleSentenceBoundary context, RuntimeEnvironment env) {
FeatureResult<String> result = null;
int tokenIndex = context.getTokenIndexWithWhitespace();
boolean isInitial = false;
if (context.getBoundaryString().equals(".")) {
Token previousToken = null;
if (tokenIndex > 0)
previousToken = context.getTokenSequence().listWithWhiteSpace().get(tokenIndex - 1);
if (previousToken != null && Character.isUpperCase(previousToken.getOriginalText().charAt(0))) {
if (previousToken.getOriginalText().length() < 2)
isInitial = true;
}
}
boolean hasWhiteSpace = false;
boolean hasQuote = false;
boolean hasDash = false;
boolean nextLetterCapital = false;
if (tokenIndex >= 0) {
for (int i = tokenIndex + 1; i < context.getTokenSequence().listWithWhiteSpace().size(); i++) {
Token token = context.getTokenSequence().listWithWhiteSpace().get(i);
if (token.isWhiteSpace()) {
hasWhiteSpace = true;
} else if (token.getText().equals("\"") || token.getText().equals("“") || token.getText().equals("„") || token.getText().equals("‟") || token.getText().equals("″")) {
hasQuote = true;
if (hasDash)
break;
} else if (token.getText().equals("-")) {
hasDash = true;
if (hasQuote)
break;
} else if (token.isSeparator()) {
nextLetterCapital = false;
break;
} else {
nextLetterCapital = (Character.isUpperCase(token.getOriginalText().charAt(0)));
break;
}
}
}
nextLetterCapital = nextLetterCapital & hasWhiteSpace;
if (nextLetterCapital && isInitial)
result = this.generateResult("CapitalAfterInitial");
else if (nextLetterCapital && hasQuote)
result = this.generateResult("CapitalAfterQuote");
else if (nextLetterCapital && hasDash)
result = this.generateResult("CapitalAfterDash");
else if (nextLetterCapital)
result = this.generateResult("true");
else
result = this.generateResult("false");
return result;
}
Aggregations