use of java.text.BreakIterator in project lucene-solr by apache.
the class TestWholeBreakIterator method testSliceEnd.
public void testSliceEnd() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("a000", 0, 1, expected, actual);
assertSameBreaks("ab000", 0, 1, expected, actual);
assertSameBreaks("abc000", 0, 1, expected, actual);
assertSameBreaks("000", 0, 0, expected, actual);
}
use of java.text.BreakIterator in project lucene-solr by apache.
the class TestWholeBreakIterator method testSliceMiddle.
public void testSliceMiddle() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("000a000", 3, 1, expected, actual);
assertSameBreaks("000ab000", 3, 2, expected, actual);
assertSameBreaks("000abc000", 3, 3, expected, actual);
assertSameBreaks("000000", 3, 0, expected, actual);
}
use of java.text.BreakIterator in project lucene-solr by apache.
the class TestWholeBreakIterator method testSingleSentences.
/** For single sentences, we know WholeBreakIterator should break the same as a sentence iterator */
public void testSingleSentences() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("a", expected, actual);
assertSameBreaks("ab", expected, actual);
assertSameBreaks("abc", expected, actual);
assertSameBreaks("", expected, actual);
}
use of java.text.BreakIterator in project lucene-solr by apache.
the class TestCharArrayIterator method testConsumeWordInstance.
public void testConsumeWordInstance() {
// we use the default locale, as it's randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
CharArrayIterator ci = CharArrayIterator.newWordInstance();
for (int i = 0; i < 10000; i++) {
char[] text = TestUtil.randomUnicodeString(random()).toCharArray();
ci.setText(text, 0, text.length);
consume(bi, ci);
}
}
use of java.text.BreakIterator in project jdk8u_jdk by JetBrains.
the class ConditionalSpecialCasing method isFinalCased.
/**
* Implements the "Final_Cased" condition
*
* Specification: Within the closest word boundaries containing C, there is a cased
* letter before C, and there is no cased letter after C.
*
* Regular Expression:
* Before C: [{cased==true}][{wordBoundary!=true}]*
* After C: !([{wordBoundary!=true}]*[{cased}])
*/
private static boolean isFinalCased(String src, int index, Locale locale) {
BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
wordBoundary.setText(src);
int ch;
// Look for a preceding 'cased' letter
for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i); i -= Character.charCount(ch)) {
ch = src.codePointBefore(i);
if (isCased(ch)) {
int len = src.length();
// Check that there is no 'cased' letter after the index
for (i = index + Character.charCount(src.codePointAt(index)); (i < len) && !wordBoundary.isBoundary(i); i += Character.charCount(ch)) {
ch = src.codePointAt(i);
if (isCased(ch)) {
return false;
}
}
return true;
}
}
return false;
}
Aggregations