Search in sources :

Example 26 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class SpellingQueryConverterTest method test.

@Test
public void test() throws Exception {
    SpellingQueryConverter converter = new SpellingQueryConverter();
    converter.init(new NamedList());
    converter.setAnalyzer(new WhitespaceAnalyzer());
    Collection<Token> tokens = converter.convert("field:foo");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertTrue("tokens Size: " + tokens.size() + " is not: " + 1, tokens.size() == 1);
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) NamedList(org.apache.solr.common.util.NamedList) Token(org.apache.lucene.analysis.Token) Test(org.junit.Test)

Example 27 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class SpellingQueryConverterTest method testRequiredOrProhibitedFlags.

@Test
public void testRequiredOrProhibitedFlags() {
    SpellingQueryConverter converter = new SpellingQueryConverter();
    converter.init(new NamedList());
    converter.setAnalyzer(new WhitespaceAnalyzer());
    {
        List<Token> tokens = new ArrayList<>(converter.convert("aaa bbb ccc"));
        assertTrue("Should have 3 tokens", tokens != null && tokens.size() == 3);
        assertTrue("token 1 should be optional", !hasRequiredFlag(tokens.get(0)) && !hasProhibitedFlag(tokens.get(0)));
        assertTrue("token 2 should be optional", !hasRequiredFlag(tokens.get(1)) && !hasProhibitedFlag(tokens.get(1)));
        assertTrue("token 3 should be optional", !hasRequiredFlag(tokens.get(2)) && !hasProhibitedFlag(tokens.get(2)));
    }
    {
        List<Token> tokens = new ArrayList<>(converter.convert("+aaa bbb -ccc"));
        assertTrue("Should have 3 tokens", tokens != null && tokens.size() == 3);
        assertTrue("token 1 should be required", hasRequiredFlag(tokens.get(0)) && !hasProhibitedFlag(tokens.get(0)));
        assertTrue("token 2 should be optional", !hasRequiredFlag(tokens.get(1)) && !hasProhibitedFlag(tokens.get(1)));
        assertTrue("token 3 should be prohibited", !hasRequiredFlag(tokens.get(2)) && hasProhibitedFlag(tokens.get(2)));
    }
    {
        List<Token> tokens = new ArrayList<>(converter.convert("aaa AND bbb ccc"));
        assertTrue("Should have 3 tokens", tokens != null && tokens.size() == 3);
        assertTrue("token 1 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(0)) && hasInBooleanFlag(tokens.get(0)));
        assertTrue("token 2 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(1)) && hasInBooleanFlag(tokens.get(0)));
        assertTrue("token 3 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(2)) && hasInBooleanFlag(tokens.get(0)));
    }
    {
        List<Token> tokens = new ArrayList<>(converter.convert("aaa OR bbb OR ccc"));
        assertTrue("Should have 3 tokens", tokens != null && tokens.size() == 3);
        assertTrue("token 1 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(0)) && hasInBooleanFlag(tokens.get(0)));
        assertTrue("token 2 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(1)) && hasInBooleanFlag(tokens.get(0)));
        assertTrue("token 3 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(2)) && hasInBooleanFlag(tokens.get(0)));
    }
    {
        List<Token> tokens = new ArrayList<>(converter.convert("aaa AND bbb NOT ccc"));
        assertTrue("Should have 3 tokens", tokens != null && tokens.size() == 3);
        assertTrue("token 1 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(0)) && hasInBooleanFlag(tokens.get(0)));
        assertTrue("token 2 precedes n.b.o.", hasNBOFlag(tokens.get(1)) && hasInBooleanFlag(tokens.get(0)));
        assertTrue("token 3 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(2)) && hasInBooleanFlag(tokens.get(0)));
    }
    {
        List<Token> tokens = new ArrayList<>(converter.convert("aaa NOT bbb AND ccc"));
        assertTrue("Should have 3 tokens", tokens != null && tokens.size() == 3);
        assertTrue("token 1 precedes n.b.o.", hasNBOFlag(tokens.get(0)) && hasInBooleanFlag(tokens.get(0)));
        assertTrue("token 2 precedes n.b.o.", hasNBOFlag(tokens.get(1)) && hasInBooleanFlag(tokens.get(0)));
        assertTrue("token 3 doesn't precedes n.b.o.", !hasNBOFlag(tokens.get(2)) && hasInBooleanFlag(tokens.get(0)));
    }
    {
        List<Token> tokens = new ArrayList<>(converter.convert("aaa AND NOT bbb AND ccc"));
        assertTrue("Should have 3 tokens", tokens != null && tokens.size() == 3);
        assertTrue("token 1 precedes n.b.o.", hasNBOFlag(tokens.get(0)) && hasInBooleanFlag(tokens.get(0)));
        assertTrue("token 2 precedes n.b.o.", hasNBOFlag(tokens.get(1)) && hasInBooleanFlag(tokens.get(0)));
        assertTrue("token 3 doesn't precedes n.b.o.", !hasNBOFlag(tokens.get(2)) && hasInBooleanFlag(tokens.get(0)));
    }
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) NamedList(org.apache.solr.common.util.NamedList) List(java.util.List) NamedList(org.apache.solr.common.util.NamedList) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 28 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class SpellingQueryConverterTest method testUnicode.

@Test
public void testUnicode() {
    SpellingQueryConverter converter = new SpellingQueryConverter();
    converter.init(new NamedList());
    converter.setAnalyzer(new WhitespaceAnalyzer());
    // chinese text value
    Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
    tokens = converter.convert("text_购field:我购买了道具和服装。");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
    tokens = converter.convert("text_field:我购xyz买了道具和服装。");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) NamedList(org.apache.solr.common.util.NamedList) Token(org.apache.lucene.analysis.Token) Test(org.junit.Test)

Example 29 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class HighlighterTest method testOffsetWindowTokenFilter.

@Test
public void testOffsetWindowTokenFilter() throws Exception {
    String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
    Analyzer a1 = new WhitespaceAnalyzer();
    TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n");
    OffsetWindowTokenFilter tots = new OffsetWindowTokenFilter(tokenStream);
    for (String v : multivalued) {
        TokenStream ts1 = tots.advanceToNextWindowOfLength(v.length());
        ts1.reset();
        Analyzer a2 = new WhitespaceAnalyzer();
        TokenStream ts2 = a2.tokenStream("", v);
        ts2.reset();
        while (ts1.incrementToken()) {
            assertTrue(ts2.incrementToken());
            assertEquals(ts1, ts2);
        }
        assertFalse(ts2.incrementToken());
    }
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) TokenStream(org.apache.lucene.analysis.TokenStream) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Test(org.junit.Test)

Example 30 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class RangeFacetsExample method index.

/** Build the example index. */
public void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
    // "now", 2000 sec before "now", ...:
    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        long then = nowSec - i * 1000;
        // Add as doc values field, so we can compute range facets:
        doc.add(new NumericDocValuesField("timestamp", then));
        // Add as numeric field so we can drill-down:
        doc.add(new LongPoint("timestamp", then));
        indexWriter.addDocument(doc);
    }
    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
    indexWriter.close();
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) LongPoint(org.apache.lucene.document.LongPoint) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

WhitespaceAnalyzer (org.apache.lucene.analysis.core.WhitespaceAnalyzer)37 IndexWriter (org.apache.lucene.index.IndexWriter)17 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)17 Document (org.apache.lucene.document.Document)16 Analyzer (org.apache.lucene.analysis.Analyzer)9 Test (org.junit.Test)9 NamedList (org.apache.solr.common.util.NamedList)8 ArrayList (java.util.ArrayList)7 Token (org.apache.lucene.analysis.Token)7 TextField (org.apache.lucene.document.TextField)7 IndexSearcher (org.apache.lucene.search.IndexSearcher)6 IOException (java.io.IOException)5 HashMap (java.util.HashMap)5 Field (org.apache.lucene.document.Field)5 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)5 DirectoryReader (org.apache.lucene.index.DirectoryReader)5 TokenStream (org.apache.lucene.analysis.TokenStream)4 PerFieldAnalyzerWrapper (org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper)4 LongPoint (org.apache.lucene.document.LongPoint)4 BooleanQuery (org.apache.lucene.search.BooleanQuery)4