use of com.joliciel.talismane.tokeniser.SeparatorDecision in project talismane by joliciel-informatique.
the class TokeniserPatternManager method getDefaultOutcomes.
/**
* Takes a sequence of atomic tokens and applies default decisions for each
* separator.
*/
public List<TokeniserOutcome> getDefaultOutcomes(TokenSequence tokenSequence) {
List<TokeniserOutcome> defaultOutcomes = new ArrayList<TokeniserOutcome>();
// Assign each separator its default value
TokeniserOutcome nextOutcome = TokeniserOutcome.SEPARATE;
Pattern tokenSeparators = Tokeniser.getTokenSeparators(sessionId);
for (Token token : tokenSequence.listWithWhiteSpace()) {
TokeniserOutcome outcome = null;
if (tokenSeparators.matcher(token.getAnalyisText()).matches()) {
boolean defaultValueFound = false;
for (Entry<SeparatorDecision, Pattern> entry : this.getSeparatorDefaultPatterns().entrySet()) {
if (entry.getValue().matcher(token.getAnalyisText()).matches()) {
defaultValueFound = true;
SeparatorDecision defaultSeparatorDecision = entry.getKey();
switch(defaultSeparatorDecision) {
case IS_SEPARATOR:
outcome = TokeniserOutcome.SEPARATE;
nextOutcome = TokeniserOutcome.SEPARATE;
break;
case IS_NOT_SEPARATOR:
outcome = TokeniserOutcome.JOIN;
nextOutcome = TokeniserOutcome.JOIN;
break;
case IS_SEPARATOR_BEFORE:
outcome = TokeniserOutcome.SEPARATE;
nextOutcome = TokeniserOutcome.JOIN;
case IS_SEPARATOR_AFTER:
outcome = TokeniserOutcome.JOIN;
nextOutcome = TokeniserOutcome.SEPARATE;
case NOT_APPLICABLE:
break;
default:
break;
}
break;
}
}
if (!defaultValueFound) {
outcome = TokeniserOutcome.SEPARATE;
nextOutcome = TokeniserOutcome.SEPARATE;
}
defaultOutcomes.add(outcome);
} else {
defaultOutcomes.add(nextOutcome);
}
}
return defaultOutcomes;
}
use of com.joliciel.talismane.tokeniser.SeparatorDecision in project talismane by joliciel-informatique.
the class TokeniserPatternManager method getSeparatorDefaultPatterns.
protected Map<SeparatorDecision, Pattern> getSeparatorDefaultPatterns() {
if (this.separatorDefaultPatterns == null) {
this.separatorDefaultPatterns = new HashMap<SeparatorDecision, Pattern>();
for (Entry<SeparatorDecision, String> entry : this.getSeparatorDefaults().entrySet()) {
String separators = entry.getValue();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < separators.length(); i++) {
char c = separators.charAt(i);
sb.append('\\');
sb.append(c);
}
Pattern pattern = Pattern.compile("[" + sb.toString() + "]", Pattern.UNICODE_CHARACTER_CLASS);
this.separatorDefaultPatterns.put(entry.getKey(), pattern);
}
}
return separatorDefaultPatterns;
}
Aggregations