Search in sources :

Example 36 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class PostingsWithTermVectorsOffsetStrategy method getOffsetsEnums.

@Override
public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
    LeafReader leafReader;
    if (reader instanceof LeafReader) {
        leafReader = (LeafReader) reader;
    } else {
        List<LeafReaderContext> leaves = reader.leaves();
        LeafReaderContext LeafReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves));
        leafReader = LeafReaderContext.reader();
        // adjust 'doc' to be within this atomic reader
        docId -= LeafReaderContext.docBase;
    }
    Terms docTerms = leafReader.getTermVector(docId, field);
    if (docTerms == null) {
        return Collections.emptyList();
    }
    leafReader = new TermVectorFilteredLeafReader(leafReader, docTerms);
    return createOffsetsEnumsFromReader(leafReader, docId);
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 37 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class TermVectorOffsetStrategy method getOffsetsEnums.

@Override
public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
    Terms tvTerms = reader.getTermVector(docId, field);
    if (tvTerms == null) {
        return Collections.emptyList();
    }
    LeafReader leafReader = new TermVectorLeafReader(field, tvTerms);
    docId = 0;
    return createOffsetsEnumsFromReader(leafReader, docId);
}
Also used : TermVectorLeafReader(org.apache.lucene.search.highlight.TermVectorLeafReader) LeafReader(org.apache.lucene.index.LeafReader) TermVectorLeafReader(org.apache.lucene.search.highlight.TermVectorLeafReader) Terms(org.apache.lucene.index.Terms)

Example 38 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class TestJoinUtil method testSimpleOrdinalsJoin.

public void testSimpleOrdinalsJoin() throws Exception {
    final String idField = "id";
    final String productIdField = "productId";
    // A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
    final String typeField = "type";
    // A single sorted doc values field that holds the join values for all document types.
    // Typically during indexing a schema will automatically create this field with the values
    final String joinField = idField + productIdField;
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
    // 0
    Document doc = new Document();
    doc.add(new TextField(idField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "product", Field.Store.NO));
    doc.add(new TextField("description", "random text", Field.Store.NO));
    doc.add(new TextField("name", "name1", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    w.addDocument(doc);
    // 1
    doc = new Document();
    doc.add(new TextField(productIdField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "10.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    w.addDocument(doc);
    // 2
    doc = new Document();
    doc.add(new TextField(productIdField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "20.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    w.addDocument(doc);
    // 3
    doc = new Document();
    doc.add(new TextField(idField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "product", Field.Store.NO));
    doc.add(new TextField("description", "more random text", Field.Store.NO));
    doc.add(new TextField("name", "name2", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    w.addDocument(doc);
    w.commit();
    // 4
    doc = new Document();
    doc.add(new TextField(productIdField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "10.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    w.addDocument(doc);
    // 5
    doc = new Document();
    doc.add(new TextField(productIdField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "20.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    w.addDocument(doc);
    IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
    w.close();
    IndexReader r = indexSearcher.getIndexReader();
    SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
    for (int i = 0; i < values.length; i++) {
        LeafReader leafReader = r.leaves().get(i).reader();
        values[i] = DocValues.getSorted(leafReader, joinField);
    }
    MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
    Query toQuery = new TermQuery(new Term(typeField, "price"));
    Query fromQuery = new TermQuery(new Term("name", "name2"));
    // Search for product and return prices
    Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    TopDocs result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(4, result.scoreDocs[0].doc);
    assertEquals(5, result.scoreDocs[1].doc);
    fromQuery = new TermQuery(new Term("name", "name1"));
    joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(1, result.scoreDocs[0].doc);
    assertEquals(2, result.scoreDocs[1].doc);
    // Search for prices and return products
    fromQuery = new TermQuery(new Term("price", "20.0"));
    toQuery = new TermQuery(new Term(typeField, "product"));
    joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(0, result.scoreDocs[0].doc);
    assertEquals(3, result.scoreDocs[1].doc);
    indexSearcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) LeafReader(org.apache.lucene.index.LeafReader) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) FieldValueQuery(org.apache.lucene.search.FieldValueQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MultiDocValues(org.apache.lucene.index.MultiDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap)

Example 39 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class TestMemoryIndex method testSimilarities.

@Test
public void testSimilarities() throws IOException {
    MemoryIndex mi = new MemoryIndex();
    mi.addField("f1", "a long text field that contains many many terms", analyzer);
    IndexSearcher searcher = mi.createSearcher();
    LeafReader reader = (LeafReader) searcher.getIndexReader();
    NumericDocValues norms = reader.getNormValues("f1");
    assertEquals(0, norms.nextDoc());
    float n1 = norms.longValue();
    // Norms are re-computed when we change the Similarity
    mi.setSimilarity(new Similarity() {

        @Override
        public long computeNorm(FieldInvertState state) {
            return 74;
        }

        @Override
        public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
            throw new UnsupportedOperationException();
        }

        @Override
        public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
            throw new UnsupportedOperationException();
        }
    });
    norms = reader.getNormValues("f1");
    assertEquals(0, norms.nextDoc());
    float n2 = norms.longValue();
    assertTrue(n1 != n2);
    TestUtil.checkReader(reader);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) BM25Similarity(org.apache.lucene.search.similarities.BM25Similarity) Similarity(org.apache.lucene.search.similarities.Similarity) IOException(java.io.IOException) TermStatistics(org.apache.lucene.search.TermStatistics) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) FieldInvertState(org.apache.lucene.index.FieldInvertState) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Test(org.junit.Test)

Example 40 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class TestMemoryIndex method testDocValuesDoNotAffectBoostPositionsOrOffset.

public void testDocValuesDoNotAffectBoostPositionsOrOffset() throws Exception {
    Document doc = new Document();
    doc.add(new BinaryDocValuesField("text", new BytesRef("quick brown fox")));
    doc.add(new TextField("text", "quick brown fox", Field.Store.NO));
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer, true, true);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    TermsEnum tenum = leafReader.terms("text").iterator();
    assertEquals("brown", tenum.next().utf8ToString());
    PostingsEnum penum = tenum.postings(null, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(1, penum.nextPosition());
    assertEquals(6, penum.startOffset());
    assertEquals(11, penum.endOffset());
    assertEquals("fox", tenum.next().utf8ToString());
    penum = tenum.postings(penum, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(2, penum.nextPosition());
    assertEquals(12, penum.startOffset());
    assertEquals(15, penum.endOffset());
    assertEquals("quick", tenum.next().utf8ToString());
    penum = tenum.postings(penum, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(0, penum.nextPosition());
    assertEquals(0, penum.startOffset());
    assertEquals(5, penum.endOffset());
    BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("text");
    assertEquals(0, binaryDocValues.nextDoc());
    assertEquals("quick brown fox", binaryDocValues.binaryValue().utf8ToString());
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) PostingsEnum(org.apache.lucene.index.PostingsEnum) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) TermsEnum(org.apache.lucene.index.TermsEnum)

Aggregations

LeafReader (org.apache.lucene.index.LeafReader)187 BytesRef (org.apache.lucene.util.BytesRef)69 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)68 Document (org.apache.lucene.document.Document)65 Directory (org.apache.lucene.store.Directory)62 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)55 DirectoryReader (org.apache.lucene.index.DirectoryReader)49 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)42 Test (org.junit.Test)41 IndexWriter (org.apache.lucene.index.IndexWriter)35 Terms (org.apache.lucene.index.Terms)34 NumericDocValues (org.apache.lucene.index.NumericDocValues)33 TermsEnum (org.apache.lucene.index.TermsEnum)32 IndexReader (org.apache.lucene.index.IndexReader)26 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)24 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)24 Term (org.apache.lucene.index.Term)24 SortedDocValues (org.apache.lucene.index.SortedDocValues)22 Bits (org.apache.lucene.util.Bits)21 IOException (java.io.IOException)20