Search in sources :

Example 6 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project elasticsearch by elastic.

the class PercolateQueryTests method testPercolateQuery.

public void testPercolateQuery() throws Exception {
    List<Iterable<? extends IndexableField>> docs = new ArrayList<>();
    List<Query> queries = new ArrayList<>();
    PercolateQuery.QueryStore queryStore = ctx -> queries::get;
    queries.add(new TermQuery(new Term("field", "fox")));
    docs.add(Collections.singleton(new StringField("select", "a", Field.Store.NO)));
    SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true);
    snp.addClause(new SpanTermQuery(new Term("field", "jumps")));
    snp.addClause(new SpanTermQuery(new Term("field", "lazy")));
    snp.addClause(new SpanTermQuery(new Term("field", "dog")));
    snp.setSlop(2);
    queries.add(snp.build());
    docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
    PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
    pq1.add(new Term("field", "quick"));
    pq1.add(new Term("field", "brown"));
    pq1.add(new Term("field", "jumps"));
    pq1.setSlop(1);
    queries.add(pq1.build());
    docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
    BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
    bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST);
    bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST);
    bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
    queries.add(bq1.build());
    docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
    indexWriter.addDocuments(docs);
    indexWriter.close();
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);
    MemoryIndex memoryIndex = new MemoryIndex();
    memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    IndexSearcher percolateSearcher = memoryIndex.createSearcher();
    // no scoring, wrapping it in a constant score query:
    Query query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("a"), new TermQuery(new Term("select", "a")), percolateSearcher, new MatchNoDocsQuery("")));
    TopDocs topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(1));
    assertThat(topDocs.scoreDocs.length, equalTo(1));
    assertThat(topDocs.scoreDocs[0].doc, equalTo(0));
    Explanation explanation = shardSearcher.explain(query, 0);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
    query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("b"), new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery("")));
    topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(3));
    assertThat(topDocs.scoreDocs.length, equalTo(3));
    assertThat(topDocs.scoreDocs[0].doc, equalTo(1));
    explanation = shardSearcher.explain(query, 1);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
    assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
    explanation = shardSearcher.explain(query, 2);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
    assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
    explanation = shardSearcher.explain(query, 2);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
    query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("c"), new MatchAllDocsQuery(), percolateSearcher, new MatchAllDocsQuery()));
    topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(4));
    query = new PercolateQuery("type", queryStore, new BytesArray("{}"), new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery(""));
    topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(3));
    assertThat(topDocs.scoreDocs.length, equalTo(3));
    assertThat(topDocs.scoreDocs[0].doc, equalTo(3));
    explanation = shardSearcher.explain(query, 3);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));
    assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
    explanation = shardSearcher.explain(query, 2);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));
    assertThat(topDocs.scoreDocs[2].doc, equalTo(1));
    explanation = shardSearcher.explain(query, 1);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));
}
Also used : Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) NoMergePolicy(org.apache.lucene.index.NoMergePolicy) Matchers.arrayWithSize(org.hamcrest.Matchers.arrayWithSize) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) StringField(org.apache.lucene.document.StringField) IndexableField(org.apache.lucene.index.IndexableField) Term(org.apache.lucene.index.Term) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) PhraseQuery(org.apache.lucene.search.PhraseQuery) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) ArrayList(java.util.ArrayList) BytesArray(org.elasticsearch.common.bytes.BytesArray) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) Directory(org.apache.lucene.store.Directory) After(org.junit.After) ESTestCase(org.elasticsearch.test.ESTestCase) Before(org.junit.Before) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TopDocs(org.apache.lucene.search.TopDocs) Explanation(org.apache.lucene.search.Explanation) DirectoryReader(org.apache.lucene.index.DirectoryReader) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BooleanClause(org.apache.lucene.search.BooleanClause) IndexWriter(org.apache.lucene.index.IndexWriter) TermQuery(org.apache.lucene.search.TermQuery) List(java.util.List) BooleanQuery(org.apache.lucene.search.BooleanQuery) Field(org.apache.lucene.document.Field) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.is(org.hamcrest.Matchers.is) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Collections(java.util.Collections) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) Explanation(org.apache.lucene.search.Explanation) ArrayList(java.util.ArrayList) TopDocs(org.apache.lucene.search.TopDocs) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) BytesArray(org.elasticsearch.common.bytes.BytesArray) PhraseQuery(org.apache.lucene.search.PhraseQuery) Term(org.apache.lucene.index.Term) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) IndexableField(org.apache.lucene.index.IndexableField) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) StringField(org.apache.lucene.document.StringField) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 7 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project elasticsearch by elastic.

the class PercolatorFieldMapperTests method testCreateCandidateQuery.

public void testCreateCandidateQuery() throws Exception {
    addQueryMapping();
    MemoryIndex memoryIndex = new MemoryIndex(false);
    memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
    memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
    memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());
    memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer());
    IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
    BooleanQuery candidateQuery = (BooleanQuery) fieldType.createCandidateQuery(indexReader);
    assertEquals(2, candidateQuery.clauses().size());
    assertEquals(Occur.SHOULD, candidateQuery.clauses().get(0).getOccur());
    TermInSetQuery termsQuery = (TermInSetQuery) candidateQuery.clauses().get(0).getQuery();
    PrefixCodedTerms terms = termsQuery.getTermData();
    assertThat(terms.size(), equalTo(14L));
    PrefixCodedTerms.TermIterator termIterator = terms.iterator();
    assertTermIterator(termIterator, "_field3me", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "_field3unhide", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1brown", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1dog", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1fox", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1jumps", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1lazy", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1over", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1quick", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1the", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2more", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2some", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2text", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field4123", fieldType.queryTermsField.name());
    assertEquals(Occur.SHOULD, candidateQuery.clauses().get(1).getOccur());
    assertEquals(new TermQuery(new Term(fieldType.extractionResultField.name(), EXTRACTION_FAILED)), candidateQuery.clauses().get(1).getQuery());
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) PrefixCodedTerms(org.apache.lucene.index.PrefixCodedTerms) IndexReader(org.apache.lucene.index.IndexReader) LongPoint(org.apache.lucene.document.LongPoint) Term(org.apache.lucene.index.Term)

Example 8 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class IndexBasedSpellCheckerTest method testAlternateLocation.

@Test
public void testAlternateLocation() throws Exception {
    String[] ALT_DOCS = new String[] { "jumpin jack flash", "Sargent Peppers Lonely Hearts Club Band", "Born to Run", "Thunder Road", "Londons Burning", "A Horse with No Name", "Sweet Caroline" };
    IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
    NamedList spellchecker = new NamedList();
    spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
    File tmpDir = createTempDir().toFile();
    File indexDir = new File(tmpDir, "spellingIdx");
    //create a standalone index
    File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime());
    Directory dir = newFSDirectory(altIndexDir.toPath());
    IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(new WhitespaceAnalyzer()));
    for (int i = 0; i < ALT_DOCS.length; i++) {
        Document doc = new Document();
        doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
        iw.addDocument(doc);
    }
    iw.forceMerge(1);
    iw.close();
    dir.close();
    indexDir.mkdirs();
    spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
    spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
    spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
    spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
    SolrCore core = h.getCore();
    String dictName = checker.init(spellchecker, core);
    assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
    RefCounted<SolrIndexSearcher> holder = core.getSearcher();
    SolrIndexSearcher searcher = holder.get();
    try {
        checker.build(core, searcher);
        IndexReader reader = searcher.getIndexReader();
        Collection<Token> tokens = queryConverter.convert("flesh");
        SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
        SpellingResult result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        //should be lowercased, b/c we are using a lowercasing analyzer
        Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("flesh is null and it shouldn't be", suggestions != null);
        assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
        Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
        assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
        assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);
        //test something not in the spell checker
        spellOpts.tokens = queryConverter.convert("super");
        result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("suggestions size should be 0", suggestions.size() == 0);
        spellOpts.tokens = queryConverter.convert("Caroline");
        result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("suggestions is not null and it should be", suggestions == null);
    } finally {
        holder.decref();
    }
}
Also used : SolrCore(org.apache.solr.core.SolrCore) Token(org.apache.lucene.analysis.Token) Document(org.apache.lucene.document.Document) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) NamedList(org.apache.solr.common.util.NamedList) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Date(java.util.Date) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) File(java.io.File) Map(java.util.Map) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 9 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class SpellingQueryConverterTest method testMultipleClauses.

@Test
public void testMultipleClauses() {
    SpellingQueryConverter converter = new SpellingQueryConverter();
    converter.init(new NamedList());
    converter.setAnalyzer(new WhitespaceAnalyzer());
    // two field:value pairs should give two tokens
    Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
    // a field:value pair and a search term should give two tokens
    tokens = converter.convert("text_field:我购买了道具和服装。 bar");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) NamedList(org.apache.solr.common.util.NamedList) Token(org.apache.lucene.analysis.Token) Test(org.junit.Test)

Example 10 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.

the class SpellingQueryConverterTest method testNumeric.

@Test
public void testNumeric() throws Exception {
    SpellingQueryConverter converter = new SpellingQueryConverter();
    converter.init(new NamedList());
    converter.setAnalyzer(new WhitespaceAnalyzer());
    String[] queries = { "12345", "foo:12345", "12345 67890", "foo:(12345 67890)", "foo:(life 67890)", "12345 life", "+12345 +life", "-12345 life" };
    int[] tokensToExpect = { 1, 1, 2, 2, 2, 2, 2, 2 };
    for (int i = 0; i < queries.length; i++) {
        Collection<Token> tokens = converter.convert(queries[i]);
        assertTrue("tokens Size: " + tokens.size() + " is not: " + tokensToExpect[i], tokens.size() == tokensToExpect[i]);
    }
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) NamedList(org.apache.solr.common.util.NamedList) Token(org.apache.lucene.analysis.Token) Test(org.junit.Test)

Aggregations

WhitespaceAnalyzer (org.apache.lucene.analysis.core.WhitespaceAnalyzer)37 IndexWriter (org.apache.lucene.index.IndexWriter)17 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)17 Document (org.apache.lucene.document.Document)16 Analyzer (org.apache.lucene.analysis.Analyzer)9 Test (org.junit.Test)9 NamedList (org.apache.solr.common.util.NamedList)8 ArrayList (java.util.ArrayList)7 Token (org.apache.lucene.analysis.Token)7 TextField (org.apache.lucene.document.TextField)7 IndexSearcher (org.apache.lucene.search.IndexSearcher)6 IOException (java.io.IOException)5 HashMap (java.util.HashMap)5 Field (org.apache.lucene.document.Field)5 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)5 DirectoryReader (org.apache.lucene.index.DirectoryReader)5 TokenStream (org.apache.lucene.analysis.TokenStream)4 PerFieldAnalyzerWrapper (org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper)4 LongPoint (org.apache.lucene.document.LongPoint)4 BooleanQuery (org.apache.lucene.search.BooleanQuery)4