Search in sources :

Example 51 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.

the class PagedBytesIndexFieldData method loadDirect.

@Override
public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception {
    LeafReader reader = context.reader();
    AtomicOrdinalsFieldData data = null;
    PagedBytesEstimator estimator = new PagedBytesEstimator(context, breakerService.getBreaker(CircuitBreaker.FIELDDATA), getFieldName());
    Terms terms = reader.terms(getFieldName());
    if (terms == null) {
        data = AbstractAtomicOrdinalsFieldData.empty();
        estimator.afterLoad(null, data.ramBytesUsed());
        return data;
    }
    final PagedBytes bytes = new PagedBytes(15);
    final PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
    final float acceptableTransientOverheadRatio = OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO;
    // Wrap the context in an estimator and use it to either estimate
    // the entire set, or wrap the TermsEnum so it can be calculated
    // per-term
    TermsEnum termsEnum = estimator.beforeLoad(terms);
    boolean success = false;
    try (OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio)) {
        PostingsEnum docsEnum = null;
        for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
            final long termOrd = builder.nextOrdinal();
            assert termOrd == termOrdToBytesOffset.size();
            termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
            docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
            for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                builder.addDoc(docId);
            }
        }
        PagedBytes.Reader bytesReader = bytes.freeze(true);
        final Ordinals ordinals = builder.build();
        data = new PagedBytesAtomicFieldData(bytesReader, termOrdToBytesOffset.build(), ordinals);
        success = true;
        return data;
    } finally {
        if (!success) {
            // If something went wrong, unwind any current estimations we've made
            estimator.afterLoad(termsEnum, 0);
        } else {
            // Call .afterLoad() to adjust the breaker now that we have an exact size
            estimator.afterLoad(termsEnum, data.ramBytesUsed());
        }
    }
}
Also used : Ordinals(org.elasticsearch.index.fielddata.ordinals.Ordinals) PackedLongValues(org.apache.lucene.util.packed.PackedLongValues) LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) RamAccountingTermsEnum(org.elasticsearch.index.fielddata.RamAccountingTermsEnum) TermsEnum(org.apache.lucene.index.TermsEnum) AtomicOrdinalsFieldData(org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData) PagedBytes(org.apache.lucene.util.PagedBytes) OrdinalsBuilder(org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef)

Example 52 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.

the class TextFieldMapperTests method testPositionIncrementGap.

public void testPositionIncrementGap() throws IOException {
    final int positionIncrementGap = randomIntBetween(1, 1000);
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field").field("type", "text").field("position_increment_gap", positionIncrementGap).endObject().endObject().endObject().endObject().string();
    DocumentMapper mapper = indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, false);
    assertEquals(mapping, mapper.mappingSource().toString());
    ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().array("field", new String[] { "a", "b" }).endObject().bytes());
    IndexableField[] fields = doc.rootDoc().getFields("field");
    assertEquals(2, fields.length);
    assertEquals("a", fields[0].stringValue());
    assertEquals("b", fields[1].stringValue());
    IndexShard shard = indexService.getShard(0);
    shard.index(new Engine.Index(new Term("_uid", doc.uid()), doc));
    shard.refresh("test");
    try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
        LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
        TermsEnum terms = leaf.terms("field").iterator();
        assertTrue(terms.seekExact(new BytesRef("b")));
        PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
        assertEquals(0, postings.nextDoc());
        assertEquals(positionIncrementGap + 1, postings.nextPosition());
    }
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) IndexShard(org.elasticsearch.index.shard.IndexShard) Matchers.containsString(org.hamcrest.Matchers.containsString) Term(org.apache.lucene.index.Term) TermsEnum(org.apache.lucene.index.TermsEnum) IndexableField(org.apache.lucene.index.IndexableField) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) PostingsEnum(org.apache.lucene.index.PostingsEnum) Engine(org.elasticsearch.index.engine.Engine) BytesRef(org.apache.lucene.util.BytesRef)

Example 53 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.

the class AllTermQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    if (needsScores == false) {
        return new TermQuery(term).createWeight(searcher, needsScores);
    }
    final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term);
    final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field());
    final TermStatistics termStats = searcher.termStatistics(term, termStates);
    final Similarity similarity = searcher.getSimilarity(needsScores);
    final SimWeight stats = similarity.computeWeight(collectionStats, termStats);
    return new Weight(this) {

        @Override
        public float getValueForNormalization() throws IOException {
            return stats.getValueForNormalization();
        }

        @Override
        public void normalize(float norm, float topLevelBoost) {
            stats.normalize(norm, topLevelBoost);
        }

        @Override
        public void extractTerms(Set<Term> terms) {
            terms.add(term);
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            AllTermScorer scorer = scorer(context);
            if (scorer != null) {
                int newDoc = scorer.iterator().advance(doc);
                if (newDoc == doc) {
                    float score = scorer.score();
                    float freq = scorer.freq();
                    SimScorer docScorer = similarity.simScorer(stats, context);
                    Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
                    Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
                    Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
                    return Explanation.match(score, "weight(" + getQuery() + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], product of:", termScoreExplanation, payloadBoostExplanation);
                }
            }
            return Explanation.noMatch("no matching term");
        }

        @Override
        public AllTermScorer scorer(LeafReaderContext context) throws IOException {
            final Terms terms = context.reader().terms(term.field());
            if (terms == null) {
                return null;
            }
            final TermsEnum termsEnum = terms.iterator();
            if (termsEnum == null) {
                return null;
            }
            final TermState state = termStates.get(context.ord);
            if (state == null) {
                // Term does not exist in this segment
                return null;
            }
            termsEnum.seekExact(term.bytes(), state);
            PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS);
            assert docs != null;
            return new AllTermScorer(this, docs, similarity.simScorer(stats, context));
        }
    };
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Set(java.util.Set) Similarity(org.apache.lucene.search.similarities.Similarity) Explanation(org.apache.lucene.search.Explanation) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) Terms(org.apache.lucene.index.Terms) SimScorer(org.apache.lucene.search.similarities.Similarity.SimScorer) TermStatistics(org.apache.lucene.search.TermStatistics) TermContext(org.apache.lucene.index.TermContext) Weight(org.apache.lucene.search.Weight) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState) PostingsEnum(org.apache.lucene.index.PostingsEnum)

Example 54 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.

the class FilterableTermsEnum method seekExact.

@Override
public boolean seekExact(BytesRef text) throws IOException {
    int docFreq = 0;
    long totalTermFreq = 0;
    for (Holder anEnum : enums) {
        if (anEnum.termsEnum.seekExact(text)) {
            if (anEnum.bits == null) {
                docFreq += anEnum.termsEnum.docFreq();
                if (docsEnumFlag == PostingsEnum.FREQS) {
                    long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq();
                    if (totalTermFreq == -1 || leafTotalTermFreq == -1) {
                        totalTermFreq = -1;
                        continue;
                    }
                    totalTermFreq += leafTotalTermFreq;
                }
            } else {
                final PostingsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.postings(anEnum.docsEnum, docsEnumFlag);
                // 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not
                if (docsEnumFlag == PostingsEnum.FREQS) {
                    for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                        if (anEnum.bits != null && anEnum.bits.get(docId) == false) {
                            continue;
                        }
                        docFreq++;
                        // docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value
                        // is really 1 or unrecorded when filtering like this
                        totalTermFreq += docsEnum.freq();
                    }
                } else {
                    for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                        if (anEnum.bits != null && anEnum.bits.get(docId) == false) {
                            continue;
                        }
                        // docsEnum.freq() behaviour is undefined if docsEnumFlag==PostingsEnum.FLAG_NONE so don't bother with call
                        docFreq++;
                    }
                }
            }
        }
    }
    if (docFreq > 0) {
        currentDocFreq = docFreq;
        currentTotalTermFreq = totalTermFreq;
        current = text;
        return true;
    } else {
        currentDocFreq = NOT_FOUND;
        currentTotalTermFreq = NOT_FOUND;
        current = null;
        return false;
    }
}
Also used : PostingsEnum(org.apache.lucene.index.PostingsEnum)

Example 55 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.

the class Versions method loadSeqNo.

/**
     * Returns the sequence number for the given uid term, returning
     * {@code SequenceNumbersService.UNASSIGNED_SEQ_NO} if none is found.
     */
public static long loadSeqNo(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME) : "can only load _seq_no by uid";
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return SequenceNumbersService.UNASSIGNED_SEQ_NO;
    }
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReader leaf = leaves.get(i).reader();
        Bits liveDocs = leaf.getLiveDocs();
        TermsEnum termsEnum = null;
        SortedNumericDocValues dvField = null;
        PostingsEnum docsEnum = null;
        final Fields fields = leaf.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                termsEnum = terms.iterator();
                assert termsEnum != null;
                dvField = leaf.getSortedNumericDocValues(SeqNoFieldMapper.NAME);
                assert dvField != null;
                final BytesRef id = term.bytes();
                if (termsEnum.seekExact(id)) {
                    // there may be more than one matching docID, in the
                    // case of nested docs, so we want the last one:
                    docsEnum = termsEnum.postings(docsEnum, 0);
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
                        if (liveDocs != null && liveDocs.get(d) == false) {
                            continue;
                        }
                        docID = d;
                    }
                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        dvField.setDocument(docID);
                        assert dvField.count() == 1 : "expected only a single value for _seq_no but got " + dvField.count();
                        return dvField.valueAt(0);
                    }
                }
            }
        }
    }
    return SequenceNumbersService.UNASSIGNED_SEQ_NO;
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Fields(org.apache.lucene.index.Fields) LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Aggregations

PostingsEnum (org.apache.lucene.index.PostingsEnum)80 BytesRef (org.apache.lucene.util.BytesRef)59 TermsEnum (org.apache.lucene.index.TermsEnum)56 Terms (org.apache.lucene.index.Terms)47 Fields (org.apache.lucene.index.Fields)18 LeafReader (org.apache.lucene.index.LeafReader)17 Term (org.apache.lucene.index.Term)17 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)15 Document (org.apache.lucene.document.Document)13 ArrayList (java.util.ArrayList)12 Bits (org.apache.lucene.util.Bits)11 IndexReader (org.apache.lucene.index.IndexReader)10 TextField (org.apache.lucene.document.TextField)9 Directory (org.apache.lucene.store.Directory)9 IOException (java.io.IOException)8 DirectoryReader (org.apache.lucene.index.DirectoryReader)7 IndexWriter (org.apache.lucene.index.IndexWriter)6 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)6 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)5 XContentBuilder (org.elasticsearch.common.xcontent.XContentBuilder)5