Search in sources :

Example 71 with BreakIterator

use of java.text.BreakIterator in project lucene-solr by apache.

the class TestWholeBreakIterator method testSliceEnd.

public void testSliceEnd() throws Exception {
    BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
    BreakIterator actual = new WholeBreakIterator();
    assertSameBreaks("a000", 0, 1, expected, actual);
    assertSameBreaks("ab000", 0, 1, expected, actual);
    assertSameBreaks("abc000", 0, 1, expected, actual);
    assertSameBreaks("000", 0, 0, expected, actual);
}
Also used : BreakIterator(java.text.BreakIterator)

Example 72 with BreakIterator

use of java.text.BreakIterator in project lucene-solr by apache.

the class TestWholeBreakIterator method testSliceMiddle.

public void testSliceMiddle() throws Exception {
    BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
    BreakIterator actual = new WholeBreakIterator();
    assertSameBreaks("000a000", 3, 1, expected, actual);
    assertSameBreaks("000ab000", 3, 2, expected, actual);
    assertSameBreaks("000abc000", 3, 3, expected, actual);
    assertSameBreaks("000000", 3, 0, expected, actual);
}
Also used : BreakIterator(java.text.BreakIterator)

Example 73 with BreakIterator

use of java.text.BreakIterator in project lucene-solr by apache.

the class TestWholeBreakIterator method testSingleSentences.

/** For single sentences, we know WholeBreakIterator should break the same as a sentence iterator */
public void testSingleSentences() throws Exception {
    BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
    BreakIterator actual = new WholeBreakIterator();
    assertSameBreaks("a", expected, actual);
    assertSameBreaks("ab", expected, actual);
    assertSameBreaks("abc", expected, actual);
    assertSameBreaks("", expected, actual);
}
Also used : BreakIterator(java.text.BreakIterator)

Example 74 with BreakIterator

use of java.text.BreakIterator in project lucene-solr by apache.

the class TestCharArrayIterator method testConsumeWordInstance.

public void testConsumeWordInstance() {
    // we use the default locale, as it's randomized by LuceneTestCase
    BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
    CharArrayIterator ci = CharArrayIterator.newWordInstance();
    for (int i = 0; i < 10000; i++) {
        char[] text = TestUtil.randomUnicodeString(random()).toCharArray();
        ci.setText(text, 0, text.length);
        consume(bi, ci);
    }
}
Also used : BreakIterator(java.text.BreakIterator)

Example 75 with BreakIterator

use of java.text.BreakIterator in project jdk8u_jdk by JetBrains.

the class ConditionalSpecialCasing method isFinalCased.

/**
     * Implements the "Final_Cased" condition
     *
     * Specification: Within the closest word boundaries containing C, there is a cased
     * letter before C, and there is no cased letter after C.
     *
     * Regular Expression:
     *   Before C: [{cased==true}][{wordBoundary!=true}]*
     *   After C: !([{wordBoundary!=true}]*[{cased}])
     */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;
    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i); i -= Character.charCount(ch)) {
        ch = src.codePointBefore(i);
        if (isCased(ch)) {
            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index)); (i < len) && !wordBoundary.isBoundary(i); i += Character.charCount(ch)) {
                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }
            return true;
        }
    }
    return false;
}
Also used : BreakIterator(java.text.BreakIterator)

Aggregations

BreakIterator (java.text.BreakIterator)120 ArrayList (java.util.ArrayList)17 Locale (java.util.Locale)9 Paint (android.graphics.Paint)4 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)3 BytesRef (org.apache.lucene.util.BytesRef)3 SuppressLint (android.annotation.SuppressLint)2 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)2 Collection (java.util.Collection)2 BadLocationException (javax.swing.text.BadLocationException)2 Document (javax.swing.text.Document)2 Element (javax.swing.text.Element)2 Segment (javax.swing.text.Segment)2 Snippet (org.apache.lucene.search.highlight.Snippet)2 Intent (android.content.Intent)1 RectF (android.graphics.RectF)1 TextPaint (android.text.TextPaint)1 TagElement (com.google.devtools.j2objc.ast.TagElement)1 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)1 AbstractNLPDecoder (edu.emory.mathcs.nlp.decode.AbstractNLPDecoder)1