Search in sources :

Example 86 with LeafReader

use of org.apache.lucene.index.LeafReader in project elasticsearch by elastic.

the class Versions method loadSeqNo.

/**
     * Returns the sequence number for the given uid term, returning
     * {@code SequenceNumbersService.UNASSIGNED_SEQ_NO} if none is found.
     */
public static long loadSeqNo(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME) : "can only load _seq_no by uid";
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return SequenceNumbersService.UNASSIGNED_SEQ_NO;
    }
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReader leaf = leaves.get(i).reader();
        Bits liveDocs = leaf.getLiveDocs();
        TermsEnum termsEnum = null;
        SortedNumericDocValues dvField = null;
        PostingsEnum docsEnum = null;
        final Fields fields = leaf.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                termsEnum = terms.iterator();
                assert termsEnum != null;
                dvField = leaf.getSortedNumericDocValues(SeqNoFieldMapper.NAME);
                assert dvField != null;
                final BytesRef id = term.bytes();
                if (termsEnum.seekExact(id)) {
                    // there may be more than one matching docID, in the
                    // case of nested docs, so we want the last one:
                    docsEnum = termsEnum.postings(docsEnum, 0);
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
                        if (liveDocs != null && liveDocs.get(d) == false) {
                            continue;
                        }
                        docID = d;
                    }
                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        dvField.setDocument(docID);
                        assert dvField.count() == 1 : "expected only a single value for _seq_no but got " + dvField.count();
                        return dvField.valueAt(0);
                    }
                }
            }
        }
    }
    return SequenceNumbersService.UNASSIGNED_SEQ_NO;
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Fields(org.apache.lucene.index.Fields) LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 87 with LeafReader

use of org.apache.lucene.index.LeafReader in project elasticsearch by elastic.

the class PagedBytesIndexFieldData method loadDirect.

@Override
public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception {
    LeafReader reader = context.reader();
    AtomicOrdinalsFieldData data = null;
    PagedBytesEstimator estimator = new PagedBytesEstimator(context, breakerService.getBreaker(CircuitBreaker.FIELDDATA), getFieldName());
    Terms terms = reader.terms(getFieldName());
    if (terms == null) {
        data = AbstractAtomicOrdinalsFieldData.empty();
        estimator.afterLoad(null, data.ramBytesUsed());
        return data;
    }
    final PagedBytes bytes = new PagedBytes(15);
    final PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
    final float acceptableTransientOverheadRatio = OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO;
    // Wrap the context in an estimator and use it to either estimate
    // the entire set, or wrap the TermsEnum so it can be calculated
    // per-term
    TermsEnum termsEnum = estimator.beforeLoad(terms);
    boolean success = false;
    try (OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio)) {
        PostingsEnum docsEnum = null;
        for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
            final long termOrd = builder.nextOrdinal();
            assert termOrd == termOrdToBytesOffset.size();
            termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
            docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
            for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                builder.addDoc(docId);
            }
        }
        PagedBytes.Reader bytesReader = bytes.freeze(true);
        final Ordinals ordinals = builder.build();
        data = new PagedBytesAtomicFieldData(bytesReader, termOrdToBytesOffset.build(), ordinals);
        success = true;
        return data;
    } finally {
        if (!success) {
            // If something went wrong, unwind any current estimations we've made
            estimator.afterLoad(termsEnum, 0);
        } else {
            // Call .afterLoad() to adjust the breaker now that we have an exact size
            estimator.afterLoad(termsEnum, data.ramBytesUsed());
        }
    }
}
Also used : Ordinals(org.elasticsearch.index.fielddata.ordinals.Ordinals) PackedLongValues(org.apache.lucene.util.packed.PackedLongValues) LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) RamAccountingTermsEnum(org.elasticsearch.index.fielddata.RamAccountingTermsEnum) TermsEnum(org.apache.lucene.index.TermsEnum) AtomicOrdinalsFieldData(org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData) PagedBytes(org.apache.lucene.util.PagedBytes) OrdinalsBuilder(org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef)

Example 88 with LeafReader

use of org.apache.lucene.index.LeafReader in project elasticsearch by elastic.

the class ShardCoreKeyMapTests method testAddingAClosedReader.

public void testAddingAClosedReader() throws Exception {
    LeafReader reader;
    try (Directory dir = newDirectory();
        RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
        writer.addDocument(new Document());
        try (DirectoryReader dirReader = ElasticsearchDirectoryReader.wrap(writer.getReader(), new ShardId("index1", "_na_", 1))) {
            reader = dirReader.leaves().get(0).reader();
        }
    }
    ShardCoreKeyMap map = new ShardCoreKeyMap();
    try {
        map.add(reader);
        fail("Expected AlreadyClosedException");
    } catch (AlreadyClosedException e) {
    // What we wanted
    }
    assertEquals(0, map.size());
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 89 with LeafReader

use of org.apache.lucene.index.LeafReader in project elasticsearch by elastic.

the class TextFieldMapperTests method testPositionIncrementGap.

public void testPositionIncrementGap() throws IOException {
    final int positionIncrementGap = randomIntBetween(1, 1000);
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field").field("type", "text").field("position_increment_gap", positionIncrementGap).endObject().endObject().endObject().endObject().string();
    DocumentMapper mapper = indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, false);
    assertEquals(mapping, mapper.mappingSource().toString());
    ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().array("field", new String[] { "a", "b" }).endObject().bytes());
    IndexableField[] fields = doc.rootDoc().getFields("field");
    assertEquals(2, fields.length);
    assertEquals("a", fields[0].stringValue());
    assertEquals("b", fields[1].stringValue());
    IndexShard shard = indexService.getShard(0);
    shard.index(new Engine.Index(new Term("_uid", doc.uid()), doc));
    shard.refresh("test");
    try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
        LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
        TermsEnum terms = leaf.terms("field").iterator();
        assertTrue(terms.seekExact(new BytesRef("b")));
        PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
        assertEquals(0, postings.nextDoc());
        assertEquals(positionIncrementGap + 1, postings.nextPosition());
    }
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) IndexShard(org.elasticsearch.index.shard.IndexShard) Matchers.containsString(org.hamcrest.Matchers.containsString) Term(org.apache.lucene.index.Term) TermsEnum(org.apache.lucene.index.TermsEnum) IndexableField(org.apache.lucene.index.IndexableField) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) PostingsEnum(org.apache.lucene.index.PostingsEnum) Engine(org.elasticsearch.index.engine.Engine) BytesRef(org.apache.lucene.util.BytesRef)

Example 90 with LeafReader

use of org.apache.lucene.index.LeafReader in project elasticsearch by elastic.

the class SimpleLuceneTests method testNumericTermDocsFreqs.

/**
     * A test just to verify that term freqs are not stored for numeric fields. <tt>int1</tt> is not storing termFreq
     * and <tt>int2</tt> does.
     */
public void testNumericTermDocsFreqs() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    Document doc = new Document();
    FieldType type = LegacyIntField.TYPE_NOT_STORED;
    LegacyIntField field = new LegacyIntField("int1", 1, type);
    doc.add(field);
    type = new FieldType(LegacyIntField.TYPE_NOT_STORED);
    type.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    type.freeze();
    field = new LegacyIntField("int1", 1, type);
    doc.add(field);
    field = new LegacyIntField("int2", 1, type);
    doc.add(field);
    field = new LegacyIntField("int2", 1, type);
    doc.add(field);
    indexWriter.addDocument(doc);
    IndexReader reader = DirectoryReader.open(indexWriter);
    LeafReader atomicReader = SlowCompositeReaderWrapper.wrap(reader);
    Terms terms = atomicReader.terms("int1");
    TermsEnum termsEnum = terms.iterator();
    termsEnum.next();
    PostingsEnum termDocs = termsEnum.postings(null);
    assertThat(termDocs.nextDoc(), equalTo(0));
    assertThat(termDocs.docID(), equalTo(0));
    assertThat(termDocs.freq(), equalTo(1));
    terms = atomicReader.terms("int2");
    termsEnum = terms.iterator();
    termsEnum.next();
    termDocs = termsEnum.postings(termDocs);
    assertThat(termDocs.nextDoc(), equalTo(0));
    assertThat(termDocs.docID(), equalTo(0));
    assertThat(termDocs.freq(), equalTo(2));
    reader.close();
    indexWriter.close();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Terms(org.apache.lucene.index.Terms) Document(org.apache.lucene.document.Document) PostingsEnum(org.apache.lucene.index.PostingsEnum) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) FieldType(org.apache.lucene.document.FieldType) LegacyIntField(org.apache.lucene.document.LegacyIntField) TermsEnum(org.apache.lucene.index.TermsEnum)

Aggregations

LeafReader (org.apache.lucene.index.LeafReader)168 BytesRef (org.apache.lucene.util.BytesRef)65 Document (org.apache.lucene.document.Document)61 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)58 Directory (org.apache.lucene.store.Directory)56 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)55 DirectoryReader (org.apache.lucene.index.DirectoryReader)47 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)42 Test (org.junit.Test)36 IndexWriter (org.apache.lucene.index.IndexWriter)32 Terms (org.apache.lucene.index.Terms)30 TermsEnum (org.apache.lucene.index.TermsEnum)28 NumericDocValues (org.apache.lucene.index.NumericDocValues)24 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)24 SortedDocValues (org.apache.lucene.index.SortedDocValues)22 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)21 IndexReader (org.apache.lucene.index.IndexReader)20 Term (org.apache.lucene.index.Term)20 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)18 Bits (org.apache.lucene.util.Bits)18