use of edu.cmu.lti.javelin.util.DeltaRangeMap in project lucida by claritylab.
the class StanfordParser method createMapping.
/**
* @param sentence
* @return a list of RangeMap objects which define a mapping of character
* offsets in a white-space depleted version of the input string back into
* offsets in the input string.
*/
protected static List<RangeMap> createMapping(String sentence) {
List<RangeMap> mapping = new LinkedList<RangeMap>();
Matcher whitespace_matcher = whitespace_pattern.matcher(sentence);
DeltaRangeMap delta_rmap = null;
// find all sequences of whitespace chars
while (whitespace_matcher.find()) {
int start = whitespace_matcher.start();
int end = whitespace_matcher.end();
int length = end - start;
if (delta_rmap == null) {
// create a new RangeMap object whose start begins at current
// match start, and whose end is at the moment undefined. The
// delta here is taken to be the length of the whitespace
// sequence.
delta_rmap = new DeltaRangeMap(start, 0, length);
} else {
// we've found the next sequence of whitespace chars, so we
// finalize the end extent of the previous RangeMap, and make a
// new RangeMap to describe the mapping from this point forward.
delta_rmap.end = start - delta_rmap.delta;
mapping.add(delta_rmap);
delta_rmap = new DeltaRangeMap(delta_rmap.end, 0, delta_rmap.delta + length);
}
}
// process trailing DeltaRangeMap if it exists
if (delta_rmap != null) {
delta_rmap.end = sentence.length() - delta_rmap.delta;
mapping.add(delta_rmap);
}
return mapping;
}
Aggregations