use of java.text.BreakIterator in project groovy-core by groovy.
the class SimpleGroovyDoc method calculateFirstSentence.
public static String calculateFirstSentence(String raw) {
// remove all the * from beginning of lines
String text = raw.replaceAll("(?m)^\\s*\\*", "").trim();
// assume a <p> paragraph tag signifies end of sentence
text = text.replaceFirst("(?ms)<p>.*", "").trim();
// assume completely blank line signifies end of sentence
text = text.replaceFirst("(?ms)\\n\\s*\\n.*", "").trim();
// assume @tag signifies end of sentence
text = text.replaceFirst("(?ms)\\n\\s*@(see|param|throws|return|author|since|exception|version|deprecated|todo)\\s.*", "").trim();
// Comment Summary using first sentence (Locale sensitive)
// todo - allow locale to be passed in
BreakIterator boundary = BreakIterator.getSentenceInstance(Locale.getDefault());
boundary.setText(text);
int start = boundary.first();
int end = boundary.next();
if (start > -1 && end > -1) {
// need to abbreviate this comment for the summary
text = text.substring(start, end);
}
return text;
}
use of java.text.BreakIterator in project VocabHunter by VocabHunter.
the class FileStreamer method splitToList.
private List<String> splitToList(final String text) {
List<String> list = new ArrayList<>();
BreakIterator iterator = BreakIterator.getSentenceInstance(LOCALE);
iterator.setText(text);
int start = iterator.first();
int end = iterator.next();
while (end != BreakIterator.DONE) {
String line = text.substring(start, end);
if (StringUtils.isNoneBlank(line)) {
list.add(line.replaceAll("\\s+", " ").trim());
}
start = end;
end = iterator.next();
}
return list;
}
use of java.text.BreakIterator in project geode by apache.
the class UnitTestDoclet method indent.
/**
* Indents a block of text a given amount.
*/
private static void indent(String text, final int indent, PrintWriter pw) {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < indent; i++) {
sb.append(" ");
}
String spaces = sb.toString();
pw.print(spaces);
int printed = indent;
boolean firstWord = true;
BreakIterator boundary = BreakIterator.getWordInstance();
boundary.setText(text);
int start = boundary.first();
for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
String word = text.substring(start, end);
if (printed + word.length() > 72) {
pw.println("");
pw.print(spaces);
printed = indent;
firstWord = true;
}
if (word.charAt(word.length() - 1) == '\n') {
pw.write(word, 0, word.length() - 1);
} else if (firstWord && Character.isWhitespace(word.charAt(0))) {
pw.write(word, 1, word.length() - 1);
} else {
pw.print(word);
}
printed += (end - start);
firstWord = false;
}
pw.println("");
}
use of java.text.BreakIterator in project lucene-solr by apache.
the class TestCharArrayIterator method testConsumeSentenceInstance.
public void testConsumeSentenceInstance() {
// we use the default locale, as it's randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
CharArrayIterator ci = CharArrayIterator.newSentenceInstance();
for (int i = 0; i < 10000; i++) {
char[] text = TestUtil.randomUnicodeString(random()).toCharArray();
ci.setText(text, 0, text.length);
consume(bi, ci);
}
}
use of java.text.BreakIterator in project lucene-solr by apache.
the class TestCustomSeparatorBreakIterator method testSliceMiddle.
public void testSliceMiddle() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("000a000", 3, 1, expected, actual);
assertSameBreaks("000ab000", 3, 2, expected, actual);
assertSameBreaks("000abc000", 3, 3, expected, actual);
assertSameBreaks("000000", 3, 0, expected, actual);
}
Aggregations