Search in sources :

Example 26 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestStopFilter method testExactCase.

// other StopFilter functionality is already tested by TestStopAnalyzer
public void testExactCase() throws IOException {
    StringReader reader = new StringReader("Now is The Time");
    CharArraySet stopWords = new CharArraySet(asSet("is", "the", "Time"), false);
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = new StopFilter(in, stopWords);
    assertTokenStreamContents(stream, new String[] { "Now", "The" });
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharArraySet(org.apache.lucene.analysis.CharArraySet) TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader)

Example 27 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestCharArraySet method testNonZeroOffset.

public void testNonZeroOffset() {
    String[] words = { "Hello", "World", "this", "is", "a", "test" };
    char[] findme = "xthisy".toCharArray();
    CharArraySet set = new CharArraySet(10, true);
    set.addAll(Arrays.asList(words));
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme, 1, 4)));
    // test unmodifiable
    set = CharArraySet.unmodifiableSet(set);
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme, 1, 4)));
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet)

Example 28 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestCharArraySet method testClear.

public void testClear() {
    CharArraySet set = new CharArraySet(10, true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
    set.clear();
    assertEquals("not empty", 0, set.size());
    for (int i = 0; i < TEST_STOP_WORDS.length; i++) assertFalse(set.contains(TEST_STOP_WORDS[i]));
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
    for (int i = 0; i < TEST_STOP_WORDS.length; i++) assertTrue(set.contains(TEST_STOP_WORDS[i]));
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet)

Example 29 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestCharArraySet method testUnmodifiableSet.

public void testUnmodifiableSet() {
    CharArraySet set = new CharArraySet(10, true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    set.add(Integer.valueOf(1));
    final int size = set.size();
    set = CharArraySet.unmodifiableSet(set);
    assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
    for (String stopword : TEST_STOP_WORDS) {
        assertTrue(set.contains(stopword));
    }
    assertTrue(set.contains(Integer.valueOf(1)));
    assertTrue(set.contains("1"));
    assertTrue(set.contains(new char[] { '1' }));
    expectThrows(NullPointerException.class, () -> {
        CharArraySet.unmodifiableSet(null);
    });
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet)

Example 30 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestCharArraySet method testToString.

public void testToString() {
    CharArraySet set = CharArraySet.copy(Collections.singleton("test"));
    assertEquals("[test]", set.toString());
    set.add("test2");
    assertTrue(set.toString().contains(", "));
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet)

Aggregations

CharArraySet (org.apache.lucene.analysis.CharArraySet)137 Analyzer (org.apache.lucene.analysis.Analyzer)54 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)46 Tokenizer (org.apache.lucene.analysis.Tokenizer)43 TokenStream (org.apache.lucene.analysis.TokenStream)37 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)34 SetKeywordMarkerFilter (org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter)26 StringReader (java.io.StringReader)23 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)9 StopFilter (org.apache.lucene.analysis.StopFilter)7 TokenFilter (org.apache.lucene.analysis.TokenFilter)6 WordDelimiterFilter (org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter)5 WordDelimiterGraphFilter (org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter)5 ClasspathResourceLoader (org.apache.lucene.analysis.util.ClasspathResourceLoader)5 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4 HyphenationTree (org.apache.lucene.analysis.compound.hyphenation.HyphenationTree)4 ResourceLoader (org.apache.lucene.analysis.util.ResourceLoader)4 InputSource (org.xml.sax.InputSource)4 Reader (java.io.Reader)3 ArrayList (java.util.ArrayList)3