Search in sources :

Example 56 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestCharArraySet method testNonZeroOffset.

public void testNonZeroOffset() {
    String[] words = { "Hello", "World", "this", "is", "a", "test" };
    char[] findme = "xthisy".toCharArray();
    CharArraySet set = new CharArraySet(10, true);
    set.addAll(Arrays.asList(words));
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme, 1, 4)));
    // test unmodifiable
    set = CharArraySet.unmodifiableSet(set);
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme, 1, 4)));
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet)

Example 57 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestCharArraySet method testClear.

public void testClear() {
    CharArraySet set = new CharArraySet(10, true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
    set.clear();
    assertEquals("not empty", 0, set.size());
    for (int i = 0; i < TEST_STOP_WORDS.length; i++) assertFalse(set.contains(TEST_STOP_WORDS[i]));
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
    for (int i = 0; i < TEST_STOP_WORDS.length; i++) assertTrue(set.contains(TEST_STOP_WORDS[i]));
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet)

Example 58 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestCharArraySet method testUnmodifiableSet.

public void testUnmodifiableSet() {
    CharArraySet set = new CharArraySet(10, true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    set.add(Integer.valueOf(1));
    final int size = set.size();
    set = CharArraySet.unmodifiableSet(set);
    assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
    for (String stopword : TEST_STOP_WORDS) {
        assertTrue(set.contains(stopword));
    }
    assertTrue(set.contains(Integer.valueOf(1)));
    assertTrue(set.contains("1"));
    assertTrue(set.contains(new char[] { '1' }));
    expectThrows(NullPointerException.class, () -> {
        CharArraySet.unmodifiableSet(null);
    });
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet)

Example 59 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestCharArraySet method testToString.

public void testToString() {
    CharArraySet set = CharArraySet.copy(Collections.singleton("test"));
    assertEquals("[test]", set.toString());
    set.add("test2");
    assertTrue(set.toString().contains(", "));
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet)

Example 60 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestCharArraySet method testSingleHighSurrogate.

public void testSingleHighSurrogate() {
    String missing = "Term %s is missing in the set";
    String falsePos = "Term %s is in the set but shouldn't";
    String[] upperArr = new String[] { "ABC�", "ABC�EfG", "�EfG", "�𐐜B" };
    String[] lowerArr = new String[] { "abc�", "abc�efg", "�efg", "�𐑄b" };
    CharArraySet set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true);
    for (String upper : upperArr) {
        set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
        assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
        assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
    }
    set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), false);
    for (String upper : upperArr) {
        set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
        assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
        assertFalse(String.format(Locale.ROOT, falsePos, upperArr[i]), set.contains(lowerArr[i]));
    }
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet)

Aggregations

CharArraySet (org.apache.lucene.analysis.CharArraySet)137 Analyzer (org.apache.lucene.analysis.Analyzer)54 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)46 Tokenizer (org.apache.lucene.analysis.Tokenizer)43 TokenStream (org.apache.lucene.analysis.TokenStream)37 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)34 SetKeywordMarkerFilter (org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter)26 StringReader (java.io.StringReader)23 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)9 StopFilter (org.apache.lucene.analysis.StopFilter)7 TokenFilter (org.apache.lucene.analysis.TokenFilter)6 WordDelimiterFilter (org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter)5 WordDelimiterGraphFilter (org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter)5 ClasspathResourceLoader (org.apache.lucene.analysis.util.ClasspathResourceLoader)5 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4 HyphenationTree (org.apache.lucene.analysis.compound.hyphenation.HyphenationTree)4 ResourceLoader (org.apache.lucene.analysis.util.ResourceLoader)4 InputSource (org.xml.sax.InputSource)4 Reader (java.io.Reader)3 ArrayList (java.util.ArrayList)3