use of java.text.BreakIterator in project elasticsearch by elastic.
the class CustomUnifiedHighlighter method getFieldHighlighter.
@Override
protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
Set<HighlightFlag> highlightFlags = getFlags(field);
PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
BreakIterator breakIterator = new SplittingBreakIterator(getBreakIterator(field), UnifiedHighlighter.MULTIVAL_SEP_CHAR);
FieldOffsetStrategy strategy = getOffsetStrategy(offsetSource, field, terms, phraseHelper, automata, highlightFlags);
return new CustomFieldHighlighter(field, strategy, breakIteratorLocale, breakIterator, getScorer(field), maxPassages, (noMatchSize > 0 ? 1 : 0), getFormatter(field), noMatchSize, fieldValue);
}
use of java.text.BreakIterator in project elasticsearch by elastic.
the class CustomSeparatorBreakIteratorTests method testBreakOnCustomSeparator.
public void testBreakOnCustomSeparator() throws Exception {
Character separator = randomSeparator();
BreakIterator bi = new CustomSeparatorBreakIterator(separator);
String source = "this" + separator + "is" + separator + "the" + separator + "first" + separator + "sentence";
bi.setText(source);
assertThat(bi.current(), equalTo(0));
assertThat(bi.first(), equalTo(0));
assertThat(source.substring(bi.current(), bi.next()), equalTo("this" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("is" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("the" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("first" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("sentence"));
assertThat(bi.next(), equalTo(BreakIterator.DONE));
assertThat(bi.last(), equalTo(source.length()));
int current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("sentence"));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("first" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("the" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("is" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("this" + separator));
assertThat(bi.previous(), equalTo(BreakIterator.DONE));
assertThat(bi.current(), equalTo(0));
assertThat(source.substring(0, bi.following(9)), equalTo("this" + separator + "is" + separator + "the" + separator));
assertThat(source.substring(0, bi.preceding(9)), equalTo("this" + separator + "is" + separator));
assertThat(bi.first(), equalTo(0));
assertThat(source.substring(0, bi.next(3)), equalTo("this" + separator + "is" + separator + "the" + separator));
}
use of java.text.BreakIterator in project elasticsearch by elastic.
the class CustomSeparatorBreakIteratorTests method testSingleSentences.
public void testSingleSentences() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("a", expected, actual);
assertSameBreaks("ab", expected, actual);
assertSameBreaks("abc", expected, actual);
assertSameBreaks("", expected, actual);
}
use of java.text.BreakIterator in project Openfire by igniterealtime.
the class StringUtils method wordWrap.
/**
* Reformats a string where lines that are longer than <tt>width</tt>
* are split apart at the earliest wordbreak or at maxLength, whichever is
* sooner. If the width specified is less than 5 or greater than the input
* Strings length the string will be returned as is.
* <p>
* Please note that this method can be lossy - trailing spaces on wrapped
* lines may be trimmed.</p>
*
* @param input the String to reformat.
* @param width the maximum length of any one line.
* @return a new String with reformatted as needed.
*/
public static String wordWrap(String input, int width, Locale locale) {
// protect ourselves
if (input == null) {
return "";
} else if (width < 5) {
return input;
} else if (width >= input.length()) {
return input;
}
// default locale
if (locale == null) {
locale = JiveGlobals.getLocale();
}
StringBuilder buf = new StringBuilder(input);
boolean endOfLine = false;
int lineStart = 0;
for (int i = 0; i < buf.length(); i++) {
if (buf.charAt(i) == '\n') {
lineStart = i + 1;
endOfLine = true;
}
// handle splitting at width character
if (i > lineStart + width - 1) {
if (!endOfLine) {
int limit = i - lineStart - 1;
BreakIterator breaks = BreakIterator.getLineInstance(locale);
breaks.setText(buf.substring(lineStart, i));
int end = breaks.last();
// break character
if (end == limit + 1) {
if (!Character.isWhitespace(buf.charAt(lineStart + end))) {
end = breaks.preceding(end - 1);
}
}
// if the last character is a space, replace it with a \n
if (end != BreakIterator.DONE && end == limit + 1) {
buf.replace(lineStart + end, lineStart + end + 1, "\n");
lineStart = lineStart + end;
} else // otherwise, just insert a \n
if (end != BreakIterator.DONE && end != 0) {
buf.insert(lineStart + end, '\n');
lineStart = lineStart + end + 1;
} else {
buf.insert(i, '\n');
lineStart = i + 1;
}
} else {
buf.insert(i, '\n');
lineStart = i + 1;
endOfLine = false;
}
}
}
return buf.toString();
}
use of java.text.BreakIterator in project Openfire by igniterealtime.
the class StringUtils method toLowerCaseWordArray.
/**
* Converts a line of text into an array of lower case words using a
* BreakIterator.wordInstance().<p>
*
* This method is under the Jive Open Source Software License and was
* written by Mark Imbriaco.
*
* @param text a String of text to convert into an array of words
* @return text broken up into an array of words.
*/
public static String[] toLowerCaseWordArray(String text) {
if (text == null || text.length() == 0) {
return new String[0];
}
List<String> wordList = new ArrayList<>();
BreakIterator boundary = BreakIterator.getWordInstance();
boundary.setText(text);
int start = 0;
for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
String tmp = text.substring(start, end).trim();
// Remove characters that are not needed.
tmp = replace(tmp, "+", "");
tmp = replace(tmp, "/", "");
tmp = replace(tmp, "\\", "");
tmp = replace(tmp, "#", "");
tmp = replace(tmp, "*", "");
tmp = replace(tmp, ")", "");
tmp = replace(tmp, "(", "");
tmp = replace(tmp, "&", "");
if (tmp.length() > 0) {
wordList.add(tmp);
}
}
return wordList.toArray(new String[wordList.size()]);
}
Aggregations