Search in sources :

Example 16 with BreakIterator

use of java.text.BreakIterator in project groovy-core by groovy.

the class SimpleGroovyDoc method calculateFirstSentence.

public static String calculateFirstSentence(String raw) {
    // remove all the * from beginning of lines
    String text = raw.replaceAll("(?m)^\\s*\\*", "").trim();
    // assume a <p> paragraph tag signifies end of sentence
    text = text.replaceFirst("(?ms)<p>.*", "").trim();
    // assume completely blank line signifies end of sentence
    text = text.replaceFirst("(?ms)\\n\\s*\\n.*", "").trim();
    // assume @tag signifies end of sentence
    text = text.replaceFirst("(?ms)\\n\\s*@(see|param|throws|return|author|since|exception|version|deprecated|todo)\\s.*", "").trim();
    // Comment Summary using first sentence (Locale sensitive)
    // todo - allow locale to be passed in
    BreakIterator boundary = BreakIterator.getSentenceInstance(Locale.getDefault());
    boundary.setText(text);
    int start = boundary.first();
    int end = boundary.next();
    if (start > -1 && end > -1) {
        // need to abbreviate this comment for the summary
        text = text.substring(start, end);
    }
    return text;
}
Also used : BreakIterator(java.text.BreakIterator)

Example 17 with BreakIterator

use of java.text.BreakIterator in project VocabHunter by VocabHunter.

the class FileStreamer method splitToList.

private List<String> splitToList(final String text) {
    List<String> list = new ArrayList<>();
    BreakIterator iterator = BreakIterator.getSentenceInstance(LOCALE);
    iterator.setText(text);
    int start = iterator.first();
    int end = iterator.next();
    while (end != BreakIterator.DONE) {
        String line = text.substring(start, end);
        if (StringUtils.isNoneBlank(line)) {
            list.add(line.replaceAll("\\s+", " ").trim());
        }
        start = end;
        end = iterator.next();
    }
    return list;
}
Also used : ArrayList(java.util.ArrayList) BreakIterator(java.text.BreakIterator)

Example 18 with BreakIterator

use of java.text.BreakIterator in project geode by apache.

the class UnitTestDoclet method indent.

/**
   * Indents a block of text a given amount.
   */
private static void indent(String text, final int indent, PrintWriter pw) {
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < indent; i++) {
        sb.append(" ");
    }
    String spaces = sb.toString();
    pw.print(spaces);
    int printed = indent;
    boolean firstWord = true;
    BreakIterator boundary = BreakIterator.getWordInstance();
    boundary.setText(text);
    int start = boundary.first();
    for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
        String word = text.substring(start, end);
        if (printed + word.length() > 72) {
            pw.println("");
            pw.print(spaces);
            printed = indent;
            firstWord = true;
        }
        if (word.charAt(word.length() - 1) == '\n') {
            pw.write(word, 0, word.length() - 1);
        } else if (firstWord && Character.isWhitespace(word.charAt(0))) {
            pw.write(word, 1, word.length() - 1);
        } else {
            pw.print(word);
        }
        printed += (end - start);
        firstWord = false;
    }
    pw.println("");
}
Also used : BreakIterator(java.text.BreakIterator)

Example 19 with BreakIterator

use of java.text.BreakIterator in project lucene-solr by apache.

the class TestCharArrayIterator method testConsumeSentenceInstance.

public void testConsumeSentenceInstance() {
    // we use the default locale, as it's randomized by LuceneTestCase
    BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
    CharArrayIterator ci = CharArrayIterator.newSentenceInstance();
    for (int i = 0; i < 10000; i++) {
        char[] text = TestUtil.randomUnicodeString(random()).toCharArray();
        ci.setText(text, 0, text.length);
        consume(bi, ci);
    }
}
Also used : BreakIterator(java.text.BreakIterator)

Example 20 with BreakIterator

use of java.text.BreakIterator in project lucene-solr by apache.

the class TestCustomSeparatorBreakIterator method testSliceMiddle.

public void testSliceMiddle() throws Exception {
    BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
    BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
    assertSameBreaks("000a000", 3, 1, expected, actual);
    assertSameBreaks("000ab000", 3, 2, expected, actual);
    assertSameBreaks("000abc000", 3, 3, expected, actual);
    assertSameBreaks("000000", 3, 0, expected, actual);
}
Also used : BreakIterator(java.text.BreakIterator)

Aggregations

BreakIterator (java.text.BreakIterator)59 ArrayList (java.util.ArrayList)10 Locale (java.util.Locale)6 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)3 BytesRef (org.apache.lucene.util.BytesRef)3 Snippet (org.apache.lucene.search.highlight.Snippet)2 Intent (android.content.Intent)1 TagElement (com.google.devtools.j2objc.ast.TagElement)1 Pair (edu.illinois.cs.cogcomp.core.datastructures.Pair)1 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)1 IOException (java.io.IOException)1 Iterator (java.util.Iterator)1 PriorityQueue (java.util.PriorityQueue)1 JComponent (javax.swing.JComponent)1 Text (org.apache.hadoop.io.Text)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 Encoder (org.apache.lucene.search.highlight.Encoder)1 CustomSeparatorBreakIterator (org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator)1 CustomPassageFormatter (org.apache.lucene.search.uhighlight.CustomPassageFormatter)1