use of com.joliciel.talismane.Annotation in project talismane by joliciel-informatique.
the class AbstractRegexAnnotator method annotate.
@Override
public void annotate(Sentence annotatedText, String... labels) {
List<Annotation<TokenPlaceholder>> placeholders = new ArrayList<>();
List<Annotation<TokenAttribute<?>>> annotations = new ArrayList<>();
Matcher matcher = this.getPattern().matcher(annotatedText.getText());
int lastStart = -1;
while (matcher.find()) {
int start = matcher.start(groupIndex);
if (start > lastStart) {
int end = matcher.end(groupIndex);
if (LOG.isTraceEnabled()) {
LOG.trace("Regex: " + this.regex);
LOG.trace("Next match: " + annotatedText.getText().subSequence(matcher.start(), matcher.end()).toString().replace('\n', '¶').replace('\r', '¶'));
if (matcher.start() != start || matcher.end() != end) {
LOG.trace("But matching group: " + annotatedText.getText().subSequence(start, end).toString().replace('\n', '¶').replace('\r', '¶'));
}
}
if (this.singleToken) {
String replacement = this.findReplacement(annotatedText.getText(), matcher);
TokenPlaceholder placeholder = new TokenPlaceholder(replacement, regex);
Annotation<TokenPlaceholder> placeholderAnnotation = new Annotation<>(start, end, placeholder, labels);
placeholders.add(placeholderAnnotation);
if (LOG.isTraceEnabled())
LOG.trace("Added placeholder: " + placeholder.toString());
}
for (String key : attributes.keySet()) {
TokenAttribute<?> attribute = attributes.get(key);
Annotation<TokenAttribute<?>> annotation = new Annotation<>(start, end, attribute, labels);
annotations.add(annotation);
if (LOG.isTraceEnabled())
LOG.trace("Added attribute: " + attribute.toString());
}
}
lastStart = start;
}
annotatedText.addAnnotations(placeholders);
annotatedText.addAnnotations(annotations);
}
Aggregations