Search in sources :

Example 31 with IndexSearcher

use of org.apache.lucene.search.IndexSearcher in project elasticsearch by elastic.

the class BlendedTermQueryTests method testDismaxQuery.

public void testDismaxQuery() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    String[] username = new String[] { "foo fighters", "some cool fan", "cover band" };
    String[] song = new String[] { "generator", "foo fighers - generator", "foo fighters generator" };
    final boolean omitNorms = random().nextBoolean();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
    ft.setOmitNorms(omitNorms);
    ft.freeze();
    FieldType ft1 = new FieldType(TextField.TYPE_NOT_STORED);
    ft1.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
    ft1.setOmitNorms(omitNorms);
    ft1.freeze();
    for (int i = 0; i < username.length; i++) {
        Document d = new Document();
        d.add(new TextField("id", Integer.toString(i), Field.Store.YES));
        d.add(new Field("username", username[i], ft));
        d.add(new Field("song", song[i], ft));
        w.addDocument(d);
    }
    int iters = scaledRandomIntBetween(25, 100);
    for (int j = 0; j < iters; j++) {
        Document d = new Document();
        d.add(new TextField("id", Integer.toString(username.length + j), Field.Store.YES));
        d.add(new Field("username", "foo fighters", ft1));
        d.add(new Field("song", "some bogus text to bump up IDF", ft1));
        w.addDocument(d);
    }
    w.commit();
    DirectoryReader reader = DirectoryReader.open(w);
    IndexSearcher searcher = setSimilarity(newSearcher(reader));
    {
        String[] fields = new String[] { "username", "song" };
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.setDisableCoord(true);
        query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 0.1f), BooleanClause.Occur.SHOULD);
        query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "fighters"), 0.1f), BooleanClause.Occur.SHOULD);
        query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "generator"), 0.1f), BooleanClause.Occur.SHOULD);
        TopDocs search = searcher.search(query.build(), 10);
        ScoreDoc[] scoreDocs = search.scoreDocs;
        assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue());
    }
    {
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.setDisableCoord(true);
        DisjunctionMaxQuery uname = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "foo")), new TermQuery(new Term("song", "foo"))), 0.0f);
        DisjunctionMaxQuery s = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "fighers")), new TermQuery(new Term("song", "fighers"))), 0.0f);
        DisjunctionMaxQuery gen = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "generator")), new TermQuery(new Term("song", "generator"))), 0f);
        query.add(uname, BooleanClause.Occur.SHOULD);
        query.add(s, BooleanClause.Occur.SHOULD);
        query.add(gen, BooleanClause.Occur.SHOULD);
        TopDocs search = searcher.search(query.build(), 4);
        ScoreDoc[] scoreDocs = search.scoreDocs;
        assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue());
    }
    reader.close();
    w.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory)

Example 32 with IndexSearcher

use of org.apache.lucene.search.IndexSearcher in project elasticsearch by elastic.

the class MinDocQueryTests method testRandom.

public void testRandom() throws IOException {
    final int numDocs = randomIntBetween(10, 200);
    final Document doc = new Document();
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numDocs; ++i) {
        w.addDocument(doc);
    }
    final IndexReader reader = w.getReader();
    final IndexSearcher searcher = newSearcher(reader);
    for (int i = 0; i <= numDocs; ++i) {
        assertEquals(numDocs - i, searcher.count(new MinDocQuery(i)));
    }
    w.close();
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 33 with IndexSearcher

use of org.apache.lucene.search.IndexSearcher in project elasticsearch by elastic.

the class CustomPostingsHighlighterTests method testCustomPostingsHighlighter.

public void testCustomPostingsHighlighter() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    //good position but only one match
    final String firstValue = "This is a test. Just a test1 highlighting from postings highlighter.";
    Field body = new Field("body", "", offsetsType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue(firstValue);
    //two matches, not the best snippet due to its length though
    final String secondValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower.";
    Field body2 = new Field("body", "", offsetsType);
    doc.add(body2);
    body2.setStringValue(secondValue);
    //two matches and short, will be scored highest
    final String thirdValue = "This is highlighting the third short highlighting value.";
    Field body3 = new Field("body", "", offsetsType);
    doc.add(body3);
    body3.setStringValue(thirdValue);
    //one match, same as first but at the end, will be scored lower due to its position
    final String fourthValue = "Just a test4 highlighting from postings highlighter.";
    Field body4 = new Field("body", "", offsetsType);
    doc.add(body4);
    body4.setStringValue(fourthValue);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    String firstHlValue = "Just a test1 <b>highlighting</b> from postings highlighter.";
    String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a longer text that gets scored lower.";
    String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value.";
    String fourthHlValue = "Just a test4 <b>highlighting</b> from postings highlighter.";
    IndexSearcher searcher = newSearcher(ir);
    Query query = new TermQuery(new Term("body", "highlighting"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertThat(topDocs.totalHits, equalTo(1));
    int docId = topDocs.scoreDocs[0].doc;
    String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue;
    CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValue, false);
    Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5);
    assertThat(snippets.length, equalTo(4));
    assertThat(snippets[0].getText(), equalTo(firstHlValue));
    assertThat(snippets[1].getText(), equalTo(secondHlValue));
    assertThat(snippets[2].getText(), equalTo(thirdHlValue));
    assertThat(snippets[3].getText(), equalTo(fourthHlValue));
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Snippet(org.apache.lucene.search.highlight.Snippet) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 34 with IndexSearcher

use of org.apache.lucene.search.IndexSearcher in project elasticsearch by elastic.

the class PercolateQueryBuilder method doToQuery.

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
    // Call nowInMillis() so that this query becomes un-cacheable since we
    // can't be sure that it doesn't use now or scripts
    context.nowInMillis();
    if (indexedDocumentIndex != null || indexedDocumentType != null || indexedDocumentId != null) {
        throw new IllegalStateException("query builder must be rewritten first");
    }
    if (document == null) {
        throw new IllegalStateException("no document to percolate");
    }
    MapperService mapperService = context.getMapperService();
    DocumentMapperForType docMapperForType = mapperService.documentMapperWithAutoCreate(documentType);
    DocumentMapper docMapper = docMapperForType.getDocumentMapper();
    ParsedDocument doc = docMapper.parse(source(context.index().getName(), documentType, "_temp_id", document, documentXContentType));
    FieldNameAnalyzer fieldNameAnalyzer = (FieldNameAnalyzer) docMapper.mappers().indexAnalyzer();
    // Need to this custom impl because FieldNameAnalyzer is strict and the percolator sometimes isn't when
    // 'index.percolator.map_unmapped_fields_as_string' is enabled:
    Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {

        @Override
        protected Analyzer getWrappedAnalyzer(String fieldName) {
            Analyzer analyzer = fieldNameAnalyzer.analyzers().get(fieldName);
            if (analyzer != null) {
                return analyzer;
            } else {
                return context.getIndexAnalyzers().getDefaultIndexAnalyzer();
            }
        }
    };
    final IndexSearcher docSearcher;
    if (doc.docs().size() > 1) {
        assert docMapper.hasNestedObjects();
        docSearcher = createMultiDocumentSearcher(analyzer, doc);
    } else {
        MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc.rootDoc(), analyzer, true, false);
        docSearcher = memoryIndex.createSearcher();
        docSearcher.setQueryCache(null);
    }
    Version indexVersionCreated = context.getIndexSettings().getIndexVersionCreated();
    boolean mapUnmappedFieldsAsString = context.getIndexSettings().getValue(PercolatorFieldMapper.INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING);
    // We have to make a copy of the QueryShardContext here so we can have a unfrozen version for parsing the legacy
    // percolator queries
    QueryShardContext percolateShardContext = new QueryShardContext(context);
    MappedFieldType fieldType = context.fieldMapper(field);
    if (fieldType == null) {
        throw new QueryShardException(context, "field [" + field + "] does not exist");
    }
    if (!(fieldType instanceof PercolatorFieldMapper.FieldType)) {
        throw new QueryShardException(context, "expected field [" + field + "] to be of type [percolator], but is of type [" + fieldType.typeName() + "]");
    }
    PercolatorFieldMapper.FieldType pft = (PercolatorFieldMapper.FieldType) fieldType;
    PercolateQuery.QueryStore queryStore = createStore(pft, percolateShardContext, mapUnmappedFieldsAsString);
    return pft.percolateQuery(documentType, queryStore, document, docSearcher);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FieldNameAnalyzer(org.elasticsearch.index.analysis.FieldNameAnalyzer) DocumentMapper(org.elasticsearch.index.mapper.DocumentMapper) Analyzer(org.apache.lucene.analysis.Analyzer) FieldNameAnalyzer(org.elasticsearch.index.analysis.FieldNameAnalyzer) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) ParsedDocument(org.elasticsearch.index.mapper.ParsedDocument) DelegatingAnalyzerWrapper(org.apache.lucene.analysis.DelegatingAnalyzerWrapper) Version(org.elasticsearch.Version) DocumentMapperForType(org.elasticsearch.index.mapper.DocumentMapperForType) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) QueryShardContext(org.elasticsearch.index.query.QueryShardContext) QueryShardException(org.elasticsearch.index.query.QueryShardException) MapperService(org.elasticsearch.index.mapper.MapperService)

Example 35 with IndexSearcher

use of org.apache.lucene.search.IndexSearcher in project elasticsearch by elastic.

the class PercolateQueryBuilder method createMultiDocumentSearcher.

static IndexSearcher createMultiDocumentSearcher(Analyzer analyzer, ParsedDocument doc) {
    RAMDirectory ramDirectory = new RAMDirectory();
    try (IndexWriter indexWriter = new IndexWriter(ramDirectory, new IndexWriterConfig(analyzer))) {
        indexWriter.addDocuments(doc.docs());
        indexWriter.commit();
        DirectoryReader directoryReader = DirectoryReader.open(ramDirectory);
        assert directoryReader.leaves().size() == 1 : "Expected single leaf, but got [" + directoryReader.leaves().size() + "]";
        final IndexSearcher slowSearcher = new IndexSearcher(directoryReader) {

            @Override
            public Weight createNormalizedWeight(Query query, boolean needsScores) throws IOException {
                BooleanQuery.Builder bq = new BooleanQuery.Builder();
                bq.add(query, BooleanClause.Occur.MUST);
                bq.add(Queries.newNestedFilter(), BooleanClause.Occur.MUST_NOT);
                return super.createNormalizedWeight(bq.build(), needsScores);
            }
        };
        slowSearcher.setQueryCache(null);
        return slowSearcher;
    } catch (IOException e) {
        throw new ElasticsearchException("Failed to create index for percolator with nested document ", e);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) PercolatorFieldMapper.parseQuery(org.elasticsearch.percolator.PercolatorFieldMapper.parseQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexWriter(org.apache.lucene.index.IndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) AbstractQueryBuilder(org.elasticsearch.index.query.AbstractQueryBuilder) IOException(java.io.IOException) ElasticsearchException(org.elasticsearch.ElasticsearchException) RAMDirectory(org.apache.lucene.store.RAMDirectory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

IndexSearcher (org.apache.lucene.search.IndexSearcher)777 Document (org.apache.lucene.document.Document)478 IndexReader (org.apache.lucene.index.IndexReader)390 Directory (org.apache.lucene.store.Directory)385 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)311 TopDocs (org.apache.lucene.search.TopDocs)303 TermQuery (org.apache.lucene.search.TermQuery)291 Term (org.apache.lucene.index.Term)250 Query (org.apache.lucene.search.Query)232 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)209 BooleanQuery (org.apache.lucene.search.BooleanQuery)146 Field (org.apache.lucene.document.Field)137 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)122 IndexWriter (org.apache.lucene.index.IndexWriter)113 Sort (org.apache.lucene.search.Sort)110 IOException (java.io.IOException)105 ScoreDoc (org.apache.lucene.search.ScoreDoc)101 SortField (org.apache.lucene.search.SortField)95 TextField (org.apache.lucene.document.TextField)94 DirectoryReader (org.apache.lucene.index.DirectoryReader)91