Search in sources :

Example 26 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.

the class MultiPhrasePrefixQuery method getPrefixTerms.

private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }
        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }
        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }
            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}
Also used : Terms(org.apache.lucene.index.Terms) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Term(org.apache.lucene.index.Term) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 27 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.

the class Versions method loadPrimaryTerm.

/**
     * Returns the primary term for the given uid term, returning {@code 0} if none is found.
     */
public static long loadPrimaryTerm(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME) : "can only load _primary_term by uid";
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return 0;
    }
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReader leaf = leaves.get(i).reader();
        Bits liveDocs = leaf.getLiveDocs();
        TermsEnum termsEnum = null;
        NumericDocValues dvField = null;
        PostingsEnum docsEnum = null;
        final Fields fields = leaf.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                termsEnum = terms.iterator();
                assert termsEnum != null;
                dvField = leaf.getNumericDocValues(SeqNoFieldMapper.PRIMARY_TERM_NAME);
                assert dvField != null;
                final BytesRef id = term.bytes();
                if (termsEnum.seekExact(id)) {
                    // there may be more than one matching docID, in the
                    // case of nested docs, so we want the last one:
                    docsEnum = termsEnum.postings(docsEnum, 0);
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
                        if (liveDocs != null && liveDocs.get(d) == false) {
                            continue;
                        }
                        docID = d;
                    }
                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        return dvField.get(docID);
                    }
                }
            }
        }
    }
    return 0;
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Fields(org.apache.lucene.index.Fields) LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 28 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.

the class Lucene method exists.

/**
     * Check whether there is one or more documents matching the provided query.
     */
public static boolean exists(IndexSearcher searcher, Query query) throws IOException {
    final Weight weight = searcher.createNormalizedWeight(query, false);
    // match than the bulk scorer API
    for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
        final Scorer scorer = weight.scorer(context);
        if (scorer == null) {
            continue;
        }
        final Bits liveDocs = context.reader().getLiveDocs();
        final DocIdSetIterator iterator = scorer.iterator();
        for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
            if (liveDocs == null || liveDocs.get(doc)) {
                return true;
            }
        }
    }
    return false;
}
Also used : LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Scorer(org.apache.lucene.search.Scorer) Bits(org.apache.lucene.util.Bits) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) Weight(org.apache.lucene.search.Weight)

Example 29 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.

the class AllTermQuery method rewrite.

@Override
public Query rewrite(IndexReader reader) throws IOException {
    Query rewritten = super.rewrite(reader);
    if (rewritten != this) {
        return rewritten;
    }
    boolean hasPayloads = false;
    for (LeafReaderContext context : reader.leaves()) {
        final Terms terms = context.reader().terms(term.field());
        if (terms != null) {
            if (terms.hasPayloads()) {
                hasPayloads = true;
                break;
            }
        }
    }
    // which rewrites query with an empty reader.
    if (hasPayloads == false) {
        return new TermQuery(term);
    }
    return this;
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Terms(org.apache.lucene.index.Terms) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 30 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.

the class IndexShardTestCase method getShardDocUIDs.

protected Set<Uid> getShardDocUIDs(final IndexShard shard) throws IOException {
    shard.refresh("get_uids");
    try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
        Set<Uid> ids = new HashSet<>();
        for (LeafReaderContext leafContext : searcher.reader().leaves()) {
            LeafReader reader = leafContext.reader();
            Bits liveDocs = reader.getLiveDocs();
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (liveDocs == null || liveDocs.get(i)) {
                    Document uuid = reader.document(i, Collections.singleton(UidFieldMapper.NAME));
                    ids.add(Uid.createUid(uuid.get(UidFieldMapper.NAME)));
                }
            }
        }
        return ids;
    }
}
Also used : Uid(org.elasticsearch.index.mapper.Uid) LeafReader(org.apache.lucene.index.LeafReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document) Engine(org.elasticsearch.index.engine.Engine) HashSet(java.util.HashSet)

Aggregations

LeafReaderContext (org.apache.lucene.index.LeafReaderContext)335 LeafReader (org.apache.lucene.index.LeafReader)73 Document (org.apache.lucene.document.Document)71 IOException (java.io.IOException)69 BytesRef (org.apache.lucene.util.BytesRef)67 Directory (org.apache.lucene.store.Directory)61 Term (org.apache.lucene.index.Term)52 IndexSearcher (org.apache.lucene.search.IndexSearcher)49 IndexReader (org.apache.lucene.index.IndexReader)48 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 DirectoryReader (org.apache.lucene.index.DirectoryReader)44 Bits (org.apache.lucene.util.Bits)44 NumericDocValues (org.apache.lucene.index.NumericDocValues)43 ArrayList (java.util.ArrayList)41 Weight (org.apache.lucene.search.Weight)37 Terms (org.apache.lucene.index.Terms)36 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)36 Scorer (org.apache.lucene.search.Scorer)36 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)34 Query (org.apache.lucene.search.Query)34