Search in sources :

Example 6 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class MinDocQueryTests method testRandom.

public void testRandom() throws IOException {
    final int numDocs = randomIntBetween(10, 200);
    final Document doc = new Document();
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numDocs; ++i) {
        w.addDocument(doc);
    }
    final IndexReader reader = w.getReader();
    final IndexSearcher searcher = newSearcher(reader);
    for (int i = 0; i <= numDocs; ++i) {
        assertEquals(numDocs - i, searcher.count(new MinDocQuery(i)));
    }
    w.close();
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 7 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class CustomPostingsHighlighterTests method testCustomPostingsHighlighter.

public void testCustomPostingsHighlighter() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    //good position but only one match
    final String firstValue = "This is a test. Just a test1 highlighting from postings highlighter.";
    Field body = new Field("body", "", offsetsType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue(firstValue);
    //two matches, not the best snippet due to its length though
    final String secondValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower.";
    Field body2 = new Field("body", "", offsetsType);
    doc.add(body2);
    body2.setStringValue(secondValue);
    //two matches and short, will be scored highest
    final String thirdValue = "This is highlighting the third short highlighting value.";
    Field body3 = new Field("body", "", offsetsType);
    doc.add(body3);
    body3.setStringValue(thirdValue);
    //one match, same as first but at the end, will be scored lower due to its position
    final String fourthValue = "Just a test4 highlighting from postings highlighter.";
    Field body4 = new Field("body", "", offsetsType);
    doc.add(body4);
    body4.setStringValue(fourthValue);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    String firstHlValue = "Just a test1 <b>highlighting</b> from postings highlighter.";
    String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a longer text that gets scored lower.";
    String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value.";
    String fourthHlValue = "Just a test4 <b>highlighting</b> from postings highlighter.";
    IndexSearcher searcher = newSearcher(ir);
    Query query = new TermQuery(new Term("body", "highlighting"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertThat(topDocs.totalHits, equalTo(1));
    int docId = topDocs.scoreDocs[0].doc;
    String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue;
    CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValue, false);
    Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5);
    assertThat(snippets.length, equalTo(4));
    assertThat(snippets[0].getText(), equalTo(firstHlValue));
    assertThat(snippets[1].getText(), equalTo(secondHlValue));
    assertThat(snippets[2].getText(), equalTo(thirdHlValue));
    assertThat(snippets[3].getText(), equalTo(fourthHlValue));
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Snippet(org.apache.lucene.search.highlight.Snippet) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 8 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class PercolatorFieldMapperTests method testCreateCandidateQuery.

public void testCreateCandidateQuery() throws Exception {
    addQueryMapping();
    MemoryIndex memoryIndex = new MemoryIndex(false);
    memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
    memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
    memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());
    memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer());
    IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
    BooleanQuery candidateQuery = (BooleanQuery) fieldType.createCandidateQuery(indexReader);
    assertEquals(2, candidateQuery.clauses().size());
    assertEquals(Occur.SHOULD, candidateQuery.clauses().get(0).getOccur());
    TermInSetQuery termsQuery = (TermInSetQuery) candidateQuery.clauses().get(0).getQuery();
    PrefixCodedTerms terms = termsQuery.getTermData();
    assertThat(terms.size(), equalTo(14L));
    PrefixCodedTerms.TermIterator termIterator = terms.iterator();
    assertTermIterator(termIterator, "_field3me", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "_field3unhide", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1brown", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1dog", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1fox", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1jumps", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1lazy", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1over", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1quick", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1the", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2more", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2some", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2text", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field4123", fieldType.queryTermsField.name());
    assertEquals(Occur.SHOULD, candidateQuery.clauses().get(1).getOccur());
    assertEquals(new TermQuery(new Term(fieldType.extractionResultField.name(), EXTRACTION_FAILED)), candidateQuery.clauses().get(1).getQuery());
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) PrefixCodedTerms(org.apache.lucene.index.PrefixCodedTerms) IndexReader(org.apache.lucene.index.IndexReader) LongPoint(org.apache.lucene.document.LongPoint) Term(org.apache.lucene.index.Term)

Example 9 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class SimpleLuceneTests method testBoost.

public void testBoost() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    for (int i = 0; i < 100; i++) {
        // TODO (just setting the boost value does not seem to work...)
        StringBuilder value = new StringBuilder().append("value");
        for (int j = 0; j < i; j++) {
            value.append(" ").append("value");
        }
        Document document = new Document();
        TextField textField = new TextField("_id", Integer.toString(i), Field.Store.YES);
        textField.setBoost(i);
        document.add(textField);
        textField = new TextField("value", value.toString(), Field.Store.YES);
        textField.setBoost(i);
        document.add(textField);
        indexWriter.addDocument(document);
    }
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TermQuery query = new TermQuery(new Term("value", "value"));
    TopDocs topDocs = searcher.search(query, 100);
    assertThat(100, equalTo(topDocs.totalHits));
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
        //            System.out.println(doc.get("id") + ": " + searcher.explain(query, topDocs.scoreDocs[i].doc));
        assertThat(doc.get("_id"), equalTo(Integer.toString(100 - i - 1)));
    }
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) TopDocs(org.apache.lucene.search.TopDocs) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 10 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class SimpleLuceneTests method testOrdering.

/**
     * Here, we verify that the order that we add fields to a document counts, and not the lexi order
     * of the field. This means that heavily accessed fields that use field selector should be added
     * first (with load and break).
     */
public void testOrdering() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new TextField("#id", "1", Field.Store.YES));
    indexWriter.addDocument(document);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
    final ArrayList<String> fieldsOrder = new ArrayList<>();
    searcher.doc(topDocs.scoreDocs[0].doc, new StoredFieldVisitor() {

        @Override
        public Status needsField(FieldInfo fieldInfo) throws IOException {
            fieldsOrder.add(fieldInfo.name);
            return Status.YES;
        }
    });
    assertThat(fieldsOrder.size(), equalTo(2));
    assertThat(fieldsOrder.get(0), equalTo("_id"));
    assertThat(fieldsOrder.get(1), equalTo("#id"));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) TopDocs(org.apache.lucene.search.TopDocs) IndexWriter(org.apache.lucene.index.IndexWriter) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) FieldInfo(org.apache.lucene.index.FieldInfo) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

IndexReader (org.apache.lucene.index.IndexReader)962 Document (org.apache.lucene.document.Document)610 Directory (org.apache.lucene.store.Directory)603 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)549 IndexSearcher (org.apache.lucene.search.IndexSearcher)410 Term (org.apache.lucene.index.Term)332 TopDocs (org.apache.lucene.search.TopDocs)204 TermQuery (org.apache.lucene.search.TermQuery)160 Query (org.apache.lucene.search.Query)158 IndexWriter (org.apache.lucene.index.IndexWriter)150 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)144 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)143 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)142 Field (org.apache.lucene.document.Field)135 BytesRef (org.apache.lucene.util.BytesRef)134 IOException (java.io.IOException)133 BooleanQuery (org.apache.lucene.search.BooleanQuery)122 ArrayList (java.util.ArrayList)108 TextField (org.apache.lucene.document.TextField)81 Test (org.junit.Test)81