Search in sources :

Example 31 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class HighlighterTest method testOffsetWindowTokenFilter.

@Test
public void testOffsetWindowTokenFilter() throws Exception {
    String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
    Analyzer a1 = new WhitespaceAnalyzer();
    TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n");
    OffsetWindowTokenFilter tots = new OffsetWindowTokenFilter(tokenStream);
    for (String v : multivalued) {
        TokenStream ts1 = tots.advanceToNextWindowOfLength(v.length());
        ts1.reset();
        Analyzer a2 = new WhitespaceAnalyzer();
        TokenStream ts2 = a2.tokenStream("", v);
        ts2.reset();
        while (ts1.incrementToken()) {
            assertTrue(ts2.incrementToken());
            assertEquals(ts1, ts2);
        }
        assertFalse(ts2.incrementToken());
    }
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) TokenStream(org.apache.lucene.analysis.TokenStream) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Test(org.junit.Test)

Example 32 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class SpellCheckComponent method inform.

@Override
public void inform(SolrCore core) {
    if (initParams != null) {
        LOG.info("Initializing spell checkers");
        boolean hasDefault = false;
        for (int i = 0; i < initParams.size(); i++) {
            if (initParams.getName(i).equals("spellchecker")) {
                Object cfg = initParams.getVal(i);
                if (cfg instanceof NamedList) {
                    addSpellChecker(core, hasDefault, (NamedList) cfg);
                } else if (cfg instanceof Map) {
                    addSpellChecker(core, hasDefault, new NamedList((Map) cfg));
                } else if (cfg instanceof List) {
                    for (Object o : (List) cfg) {
                        if (o instanceof Map) {
                            addSpellChecker(core, hasDefault, new NamedList((Map) o));
                        }
                    }
                }
            }
        }
        Map<String, QueryConverter> queryConverters = new HashMap<>();
        core.initPlugins(queryConverters, QueryConverter.class);
        //ensure that there is at least one query converter defined
        if (queryConverters.size() == 0) {
            LOG.trace("No queryConverter defined, using default converter");
            queryConverters.put("queryConverter", new SpellingQueryConverter());
        }
        //there should only be one
        if (queryConverters.size() == 1) {
            queryConverter = queryConverters.values().iterator().next();
            IndexSchema schema = core.getLatestSchema();
            String fieldTypeName = (String) initParams.get("queryAnalyzerFieldType");
            FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
            Analyzer analyzer = fieldType == null ? new WhitespaceAnalyzer() : fieldType.getQueryAnalyzer();
            //TODO: There's got to be a better way!  Where's Spring when you need it?
            queryConverter.setAnalyzer(analyzer);
        }
    }
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) NamedList(org.apache.solr.common.util.NamedList) SpellingQueryConverter(org.apache.solr.spelling.SpellingQueryConverter) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) FieldType(org.apache.solr.schema.FieldType) QueryConverter(org.apache.solr.spelling.QueryConverter) SpellingQueryConverter(org.apache.solr.spelling.SpellingQueryConverter) List(java.util.List) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) IndexSchema(org.apache.solr.schema.IndexSchema) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 33 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class TestPerFieldAnalyzerWrapper method testPerField.

public void testPerField() throws Exception {
    String text = "Qwerty";
    Map<String, Analyzer> analyzerPerField = Collections.<String, Analyzer>singletonMap("special", new SimpleAnalyzer());
    Analyzer defaultAnalyzer = new WhitespaceAnalyzer();
    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerPerField);
    try (TokenStream tokenStream = analyzer.tokenStream("field", text)) {
        CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
        tokenStream.reset();
        assertTrue(tokenStream.incrementToken());
        assertEquals("WhitespaceAnalyzer does not lowercase", "Qwerty", termAtt.toString());
        assertFalse(tokenStream.incrementToken());
        tokenStream.end();
    }
    try (TokenStream tokenStream = analyzer.tokenStream("special", text)) {
        CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
        tokenStream.reset();
        assertTrue(tokenStream.incrementToken());
        assertEquals("SimpleAnalyzer lowercases", "qwerty", termAtt.toString());
        assertFalse(tokenStream.incrementToken());
        tokenStream.end();
    }
    // TODO: fix this about PFAW, this is crazy
    analyzer.close();
    defaultAnalyzer.close();
    IOUtils.close(analyzerPerField.values());
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer)

Example 34 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class TestPerFieldAnalyzerWrapper method testReuseWrapped.

public void testReuseWrapped() throws Exception {
    final String text = "Qwerty";
    final Analyzer specialAnalyzer = new SimpleAnalyzer();
    final Analyzer defaultAnalyzer = new WhitespaceAnalyzer();
    TokenStream ts1, ts2, ts3, ts4;
    final PerFieldAnalyzerWrapper wrapper1 = new PerFieldAnalyzerWrapper(defaultAnalyzer, Collections.<String, Analyzer>singletonMap("special", specialAnalyzer));
    // test that the PerFieldWrapper returns the same instance as original Analyzer:
    ts1 = defaultAnalyzer.tokenStream("something", text);
    ts2 = wrapper1.tokenStream("something", text);
    assertSame(ts1, ts2);
    ts1 = specialAnalyzer.tokenStream("special", text);
    ts2 = wrapper1.tokenStream("special", text);
    assertSame(ts1, ts2);
    // Wrap with another wrapper, which does *not* extend DelegatingAnalyzerWrapper:
    final AnalyzerWrapper wrapper2 = new AnalyzerWrapper(wrapper1.getReuseStrategy()) {

        @Override
        protected Analyzer getWrappedAnalyzer(String fieldName) {
            return wrapper1;
        }

        @Override
        protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
            assertNotSame(specialAnalyzer.tokenStream("special", text), components.getTokenStream());
            TokenFilter filter = new ASCIIFoldingFilter(components.getTokenStream());
            return new TokenStreamComponents(components.getTokenizer(), filter);
        }
    };
    ts3 = wrapper2.tokenStream("special", text);
    assertNotSame(ts1, ts3);
    assertTrue(ts3 instanceof ASCIIFoldingFilter);
    // check that cache did not get corrumpted:
    ts2 = wrapper1.tokenStream("special", text);
    assertSame(ts1, ts2);
    // Wrap PerField with another PerField. In that case all TokenStreams returned must be the same:
    final PerFieldAnalyzerWrapper wrapper3 = new PerFieldAnalyzerWrapper(wrapper1, Collections.<String, Analyzer>singletonMap("moreSpecial", specialAnalyzer));
    ts1 = specialAnalyzer.tokenStream("special", text);
    ts2 = wrapper3.tokenStream("special", text);
    assertSame(ts1, ts2);
    ts3 = specialAnalyzer.tokenStream("moreSpecial", text);
    ts4 = wrapper3.tokenStream("moreSpecial", text);
    assertSame(ts3, ts4);
    assertSame(ts2, ts3);
    IOUtils.close(wrapper3, wrapper2, wrapper1, specialAnalyzer, defaultAnalyzer);
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) TokenStream(org.apache.lucene.analysis.TokenStream) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) AnalyzerWrapper(org.apache.lucene.analysis.AnalyzerWrapper) Analyzer(org.apache.lucene.analysis.Analyzer) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) TokenFilter(org.apache.lucene.analysis.TokenFilter)

Example 35 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class RangeFacetsExample method index.

/** Build the example index. */
public void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
    // "now", 2000 sec before "now", ...:
    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        long then = nowSec - i * 1000;
        // Add as doc values field, so we can compute range facets:
        doc.add(new NumericDocValuesField("timestamp", then));
        // Add as numeric field so we can drill-down:
        doc.add(new LongPoint("timestamp", then));
        indexWriter.addDocument(doc);
    }
    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
    indexWriter.close();
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) LongPoint(org.apache.lucene.document.LongPoint) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

WhitespaceAnalyzer (org.apache.lucene.analysis.core.WhitespaceAnalyzer)37 IndexWriter (org.apache.lucene.index.IndexWriter)17 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)17 Document (org.apache.lucene.document.Document)16 Analyzer (org.apache.lucene.analysis.Analyzer)9 Test (org.junit.Test)9 NamedList (org.apache.solr.common.util.NamedList)8 ArrayList (java.util.ArrayList)7 Token (org.apache.lucene.analysis.Token)7 TextField (org.apache.lucene.document.TextField)7 IndexSearcher (org.apache.lucene.search.IndexSearcher)6 IOException (java.io.IOException)5 HashMap (java.util.HashMap)5 Field (org.apache.lucene.document.Field)5 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)5 DirectoryReader (org.apache.lucene.index.DirectoryReader)5 TokenStream (org.apache.lucene.analysis.TokenStream)4 PerFieldAnalyzerWrapper (org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper)4 LongPoint (org.apache.lucene.document.LongPoint)4 BooleanQuery (org.apache.lucene.search.BooleanQuery)4