use of java.text.BreakIterator in project lucene-solr by apache.
the class TestCustomSeparatorBreakIterator method testBreakOnCustomSeparator.
public void testBreakOnCustomSeparator() throws Exception {
Character separator = randomSeparator();
BreakIterator bi = new CustomSeparatorBreakIterator(separator);
String source = "this" + separator + "is" + separator + "the" + separator + "first" + separator + "sentence";
bi.setText(source);
assertThat(bi.current(), equalTo(0));
assertThat(bi.first(), equalTo(0));
assertThat(source.substring(bi.current(), bi.next()), equalTo("this" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("is" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("the" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("first" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("sentence"));
assertThat(bi.next(), equalTo(BreakIterator.DONE));
assertThat(bi.last(), equalTo(source.length()));
int current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("sentence"));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("first" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("the" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("is" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("this" + separator));
assertThat(bi.previous(), equalTo(BreakIterator.DONE));
assertThat(bi.current(), equalTo(0));
assertThat(source.substring(0, bi.following(9)), equalTo("this" + separator + "is" + separator + "the" + separator));
assertThat(source.substring(0, bi.preceding(9)), equalTo("this" + separator + "is" + separator));
assertThat(bi.first(), equalTo(0));
assertThat(source.substring(0, bi.next(3)), equalTo("this" + separator + "is" + separator + "the" + separator));
}
use of java.text.BreakIterator in project lucene-solr by apache.
the class TestCustomSeparatorBreakIterator method testSingleSentences.
public void testSingleSentences() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("a", expected, actual);
assertSameBreaks("ab", expected, actual);
assertSameBreaks("abc", expected, actual);
assertSameBreaks("", expected, actual);
}
use of java.text.BreakIterator in project lucene-solr by apache.
the class BreakIteratorBoundaryScannerTest method testOutOfRange.
public void testOutOfRange() throws Exception {
StringBuilder text = new StringBuilder(TEXT);
BreakIterator bi = BreakIterator.getWordInstance(Locale.ROOT);
BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
int start = TEXT.length() + 1;
assertEquals(start, scanner.findStartOffset(text, start));
assertEquals(start, scanner.findEndOffset(text, start));
start = 0;
assertEquals(start, scanner.findStartOffset(text, start));
start = -1;
assertEquals(start, scanner.findEndOffset(text, start));
}
use of java.text.BreakIterator in project lucene-solr by apache.
the class BreakIteratorBoundaryScannerTest method testSentenceBoundary.
public void testSentenceBoundary() throws Exception {
StringBuilder text = new StringBuilder(TEXT);
// we test this with default locale, it's randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
int start = TEXT.indexOf("any application");
int expected = TEXT.indexOf("It is a");
testFindStartOffset(text, start, expected, scanner);
expected = TEXT.indexOf("Apache Lucene is an open source");
testFindEndOffset(text, start, expected, scanner);
}
use of java.text.BreakIterator in project lucene-solr by apache.
the class BreakIteratorBoundaryScannerTest method testLineBoundary.
public void testLineBoundary() throws Exception {
StringBuilder text = new StringBuilder(TEXT);
// we test this with default locale, it's randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getLineInstance(Locale.getDefault());
BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
int start = TEXT.indexOf("any application");
int expected = TEXT.indexOf("nearly");
testFindStartOffset(text, start, expected, scanner);
expected = TEXT.indexOf("application that requires");
testFindEndOffset(text, start, expected, scanner);
}
Aggregations