Search in sources :

Example 1 with SequentialStoredFieldsLeafReader

use of org.opensearch.common.lucene.index.SequentialStoredFieldsLeafReader in project OpenSearch by opensearch-project.

the class FetchPhase method execute.

public void execute(SearchContext context) {
    if (LOGGER.isTraceEnabled()) {
        LOGGER.trace("{}", new SearchContextSourcePrinter(context));
    }
    if (context.isCancelled()) {
        throw new TaskCancelledException("cancelled task with reason: " + context.getTask().getReasonCancelled());
    }
    if (context.docIdsToLoadSize() == 0) {
        // no individual hits to process, so we shortcut
        context.fetchResult().hits(new SearchHits(new SearchHit[0], context.queryResult().getTotalHits(), context.queryResult().getMaxScore()));
        return;
    }
    DocIdToIndex[] docs = new DocIdToIndex[context.docIdsToLoadSize()];
    for (int index = 0; index < context.docIdsToLoadSize(); index++) {
        docs[index] = new DocIdToIndex(context.docIdsToLoad()[context.docIdsToLoadFrom() + index], index);
    }
    // make sure that we iterate in doc id order
    Arrays.sort(docs);
    Map<String, Set<String>> storedToRequestedFields = new HashMap<>();
    FieldsVisitor fieldsVisitor = createStoredFieldsVisitor(context, storedToRequestedFields);
    FetchContext fetchContext = new FetchContext(context);
    SearchHit[] hits = new SearchHit[context.docIdsToLoadSize()];
    List<FetchSubPhaseProcessor> processors = getProcessors(context.shardTarget(), fetchContext);
    int currentReaderIndex = -1;
    LeafReaderContext currentReaderContext = null;
    CheckedBiConsumer<Integer, FieldsVisitor, IOException> fieldReader = null;
    boolean hasSequentialDocs = hasSequentialDocs(docs);
    for (int index = 0; index < context.docIdsToLoadSize(); index++) {
        if (context.isCancelled()) {
            throw new TaskCancelledException("cancelled task with reason: " + context.getTask().getReasonCancelled());
        }
        int docId = docs[index].docId;
        try {
            int readerIndex = ReaderUtil.subIndex(docId, context.searcher().getIndexReader().leaves());
            if (currentReaderIndex != readerIndex) {
                currentReaderContext = context.searcher().getIndexReader().leaves().get(readerIndex);
                currentReaderIndex = readerIndex;
                if (currentReaderContext.reader() instanceof SequentialStoredFieldsLeafReader && hasSequentialDocs && docs.length >= 10) {
                    // All the docs to fetch are adjacent but Lucene stored fields are optimized
                    // for random access and don't optimize for sequential access - except for merging.
                    // So we do a little hack here and pretend we're going to do merges in order to
                    // get better sequential access.
                    SequentialStoredFieldsLeafReader lf = (SequentialStoredFieldsLeafReader) currentReaderContext.reader();
                    fieldReader = lf.getSequentialStoredFieldsReader()::visitDocument;
                } else {
                    fieldReader = currentReaderContext.reader()::document;
                }
                for (FetchSubPhaseProcessor processor : processors) {
                    processor.setNextReader(currentReaderContext);
                }
            }
            assert currentReaderContext != null;
            HitContext hit = prepareHitContext(context, fetchContext.searchLookup(), fieldsVisitor, docId, storedToRequestedFields, currentReaderContext, fieldReader);
            for (FetchSubPhaseProcessor processor : processors) {
                processor.process(hit);
            }
            hits[docs[index].index] = hit.hit();
        } catch (Exception e) {
            throw new FetchPhaseExecutionException(context.shardTarget(), "Error running fetch phase for doc [" + docId + "]", e);
        }
    }
    if (context.isCancelled()) {
        throw new TaskCancelledException("cancelled task with reason: " + context.getTask().getReasonCancelled());
    }
    TotalHits totalHits = context.queryResult().getTotalHits();
    context.fetchResult().hits(new SearchHits(hits, totalHits, context.queryResult().getMaxScore()));
}
Also used : TotalHits(org.apache.lucene.search.TotalHits) FieldsVisitor(org.opensearch.index.fieldvisitor.FieldsVisitor) CustomFieldsVisitor(org.opensearch.index.fieldvisitor.CustomFieldsVisitor) BitSet(org.apache.lucene.util.BitSet) Set(java.util.Set) HashSet(java.util.HashSet) SearchHit(org.opensearch.search.SearchHit) HashMap(java.util.HashMap) HitContext(org.opensearch.search.fetch.FetchSubPhase.HitContext) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SearchHits(org.opensearch.search.SearchHits) SequentialStoredFieldsLeafReader(org.opensearch.common.lucene.index.SequentialStoredFieldsLeafReader) IOException(java.io.IOException) SearchContextSourcePrinter(org.opensearch.search.SearchContextSourcePrinter) TaskCancelledException(org.opensearch.tasks.TaskCancelledException) IOException(java.io.IOException) TaskCancelledException(org.opensearch.tasks.TaskCancelledException)

Example 2 with SequentialStoredFieldsLeafReader

use of org.opensearch.common.lucene.index.SequentialStoredFieldsLeafReader in project OpenSearch by opensearch-project.

the class SourceLookup method setSegmentAndDocument.

public void setSegmentAndDocument(LeafReaderContext context, int docId) {
    if (this.reader == context.reader() && this.docId == docId) {
        // if we are called with the same document, don't invalidate source
        return;
    }
    if (this.reader != context.reader()) {
        this.reader = context.reader();
        // only reset reader and fieldReader when reader changes
        try {
            if (context.reader() instanceof SequentialStoredFieldsLeafReader) {
                // All the docs to fetch are adjacent but Lucene stored fields are optimized
                // for random access and don't optimize for sequential access - except for merging.
                // So we do a little hack here and pretend we're going to do merges in order to
                // get better sequential access.
                SequentialStoredFieldsLeafReader lf = (SequentialStoredFieldsLeafReader) context.reader();
                fieldReader = lf.getSequentialStoredFieldsReader()::visitDocument;
            } else {
                fieldReader = context.reader()::document;
            }
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    }
    this.source = null;
    this.sourceAsBytes = null;
    this.docId = docId;
}
Also used : SequentialStoredFieldsLeafReader(org.opensearch.common.lucene.index.SequentialStoredFieldsLeafReader) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException)

Example 3 with SequentialStoredFieldsLeafReader

use of org.opensearch.common.lucene.index.SequentialStoredFieldsLeafReader in project OpenSearch by opensearch-project.

the class ContextIndexSearcherTests method doTestContextIndexSearcher.

public void doTestContextIndexSearcher(boolean sparse, boolean deletions) throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(null));
    Document doc = new Document();
    StringField allowedField = new StringField("allowed", "yes", Field.Store.NO);
    doc.add(allowedField);
    StringField fooField = new StringField("foo", "bar", Field.Store.NO);
    doc.add(fooField);
    StringField deleteField = new StringField("delete", "no", Field.Store.NO);
    doc.add(deleteField);
    IntPoint pointField = new IntPoint("point", 1, 2);
    doc.add(pointField);
    w.addDocument(doc);
    if (deletions) {
        // add a document that matches foo:bar but will be deleted
        deleteField.setStringValue("yes");
        w.addDocument(doc);
        deleteField.setStringValue("no");
    }
    allowedField.setStringValue("no");
    w.addDocument(doc);
    if (sparse) {
        for (int i = 0; i < 1000; ++i) {
            w.addDocument(doc);
        }
        w.forceMerge(1);
    }
    w.deleteDocuments(new Term("delete", "yes"));
    IndexSettings settings = IndexSettingsModule.newIndexSettings("_index", Settings.EMPTY);
    BitsetFilterCache.Listener listener = new BitsetFilterCache.Listener() {

        @Override
        public void onCache(ShardId shardId, Accountable accountable) {
        }

        @Override
        public void onRemoval(ShardId shardId, Accountable accountable) {
        }
    };
    DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(w), new ShardId(settings.getIndex(), 0));
    BitsetFilterCache cache = new BitsetFilterCache(settings, listener);
    Query roleQuery = new TermQuery(new Term("allowed", "yes"));
    BitSet bitSet = cache.getBitSetProducer(roleQuery).getBitSet(reader.leaves().get(0));
    if (sparse) {
        assertThat(bitSet, instanceOf(SparseFixedBitSet.class));
    } else {
        assertThat(bitSet, instanceOf(FixedBitSet.class));
    }
    DocumentSubsetDirectoryReader filteredReader = new DocumentSubsetDirectoryReader(reader, cache, roleQuery);
    ContextIndexSearcher searcher = new ContextIndexSearcher(filteredReader, IndexSearcher.getDefaultSimilarity(), IndexSearcher.getDefaultQueryCache(), IndexSearcher.getDefaultQueryCachingPolicy(), true);
    for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
        assertThat(context.reader(), instanceOf(SequentialStoredFieldsLeafReader.class));
        SequentialStoredFieldsLeafReader lf = (SequentialStoredFieldsLeafReader) context.reader();
        assertNotNull(lf.getSequentialStoredFieldsReader());
    }
    // Assert wrapping
    assertEquals(ExitableDirectoryReader.class, searcher.getIndexReader().getClass());
    for (LeafReaderContext lrc : searcher.getIndexReader().leaves()) {
        assertEquals(ExitableLeafReader.class, lrc.reader().getClass());
        assertNotEquals(ExitableTerms.class, lrc.reader().terms("foo").getClass());
        assertNotEquals(ExitablePointValues.class, lrc.reader().getPointValues("point").getClass());
    }
    searcher.addQueryCancellation(() -> {
    });
    for (LeafReaderContext lrc : searcher.getIndexReader().leaves()) {
        assertEquals(ExitableTerms.class, lrc.reader().terms("foo").getClass());
        assertEquals(ExitablePointValues.class, lrc.reader().getPointValues("point").getClass());
    }
    // Searching a non-existing term will trigger a null scorer
    assertEquals(0, searcher.count(new TermQuery(new Term("non_existing_field", "non_existing_value"))));
    assertEquals(1, searcher.count(new TermQuery(new Term("foo", "bar"))));
    // make sure scorers are created only once, see #1725
    assertEquals(1, searcher.count(new CreateScorerOnceQuery(new MatchAllDocsQuery())));
    TopDocs topDocs = searcher.search(new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "bar"))), 3f), 1);
    assertEquals(1, topDocs.totalHits.value);
    assertEquals(1, topDocs.scoreDocs.length);
    assertEquals(3f, topDocs.scoreDocs[0].score, 0);
    IOUtils.close(reader, w, dir);
}
Also used : Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) TermQuery(org.apache.lucene.search.TermQuery) BoostQuery(org.apache.lucene.search.BoostQuery) IndexSettings(org.opensearch.index.IndexSettings) Document(org.apache.lucene.document.Document) BoostQuery(org.apache.lucene.search.BoostQuery) ShardId(org.opensearch.index.shard.ShardId) TopDocs(org.apache.lucene.search.TopDocs) SparseFixedBitSet(org.apache.lucene.util.SparseFixedBitSet) SparseFixedBitSet(org.apache.lucene.util.SparseFixedBitSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery) SequentialStoredFieldsLeafReader(org.opensearch.common.lucene.index.SequentialStoredFieldsLeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) FilterDirectoryReader(org.apache.lucene.index.FilterDirectoryReader) OpenSearchDirectoryReader(org.opensearch.common.lucene.index.OpenSearchDirectoryReader) Accountable(org.apache.lucene.util.Accountable) BitSet(org.apache.lucene.util.BitSet) ContextIndexSearcher.intersectScorerAndBitSet(org.opensearch.search.internal.ContextIndexSearcher.intersectScorerAndBitSet) SparseFixedBitSet(org.apache.lucene.util.SparseFixedBitSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) CombinedBitSet(org.apache.lucene.util.CombinedBitSet) Term(org.apache.lucene.index.Term) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) IntPoint(org.apache.lucene.document.IntPoint) IntPoint(org.apache.lucene.document.IntPoint) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) BitsetFilterCache(org.opensearch.index.cache.bitset.BitsetFilterCache)

Example 4 with SequentialStoredFieldsLeafReader

use of org.opensearch.common.lucene.index.SequentialStoredFieldsLeafReader in project OpenSearch by opensearch-project.

the class InternalEngineTests method testProducesStoredFieldsReader.

public void testProducesStoredFieldsReader() throws Exception {
    // Make sure that the engine produces a SequentialStoredFieldsLeafReader.
    // This is required for optimizations on SourceLookup to work, which is in-turn useful for runtime fields.
    ParsedDocument doc = testParsedDocument("1", null, testDocumentWithTextField("test"), new BytesArray("{}".getBytes(Charset.defaultCharset())), null);
    Engine.Index operation = randomBoolean() ? appendOnlyPrimary(doc, false, 1) : appendOnlyReplica(doc, false, 1, randomIntBetween(0, 5));
    engine.index(operation);
    engine.refresh("test");
    try (Engine.Searcher searcher = engine.acquireSearcher("test")) {
        IndexReader reader = searcher.getIndexReader();
        assertThat(reader.leaves().size(), Matchers.greaterThanOrEqualTo(1));
        for (LeafReaderContext context : reader.leaves()) {
            assertThat(context.reader(), Matchers.instanceOf(SequentialStoredFieldsLeafReader.class));
            SequentialStoredFieldsLeafReader lf = (SequentialStoredFieldsLeafReader) context.reader();
            assertNotNull(lf.getSequentialStoredFieldsReader());
        }
    }
}
Also used : BytesArray(org.opensearch.common.bytes.BytesArray) SequentialStoredFieldsLeafReader(org.opensearch.common.lucene.index.SequentialStoredFieldsLeafReader) ParsedDocument(org.opensearch.index.mapper.ParsedDocument) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Aggregations

SequentialStoredFieldsLeafReader (org.opensearch.common.lucene.index.SequentialStoredFieldsLeafReader)4 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 IOException (java.io.IOException)2 BitSet (org.apache.lucene.util.BitSet)2 UncheckedIOException (java.io.UncheckedIOException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 Document (org.apache.lucene.document.Document)1 IntPoint (org.apache.lucene.document.IntPoint)1 StringField (org.apache.lucene.document.StringField)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 FilterDirectoryReader (org.apache.lucene.index.FilterDirectoryReader)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 Term (org.apache.lucene.index.Term)1 BoostQuery (org.apache.lucene.search.BoostQuery)1 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)1 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)1 Query (org.apache.lucene.search.Query)1