Search in sources :

Example 1 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.

the class LuceneTests method testNumDocs.

public void testNumDocs() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();
    SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(1, Lucene.getNumDocs(segmentCommitInfos));
    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(1, Lucene.getNumDocs(segmentCommitInfos));
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(3, Lucene.getNumDocs(segmentCommitInfos));
    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(2, Lucene.getNumDocs(segmentCommitInfos));
    int numDocsToIndex = randomIntBetween(10, 50);
    List<Term> deleteTerms = new ArrayList<>();
    for (int i = 0; i < numDocsToIndex; i++) {
        doc = new Document();
        doc.add(new TextField("id", "extra_" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        deleteTerms.add(new Term("id", "extra_" + i));
        writer.addDocument(doc);
    }
    int numDocsToDelete = randomIntBetween(0, numDocsToIndex);
    Collections.shuffle(deleteTerms, random());
    for (int i = 0; i < numDocsToDelete; i++) {
        Term remove = deleteTerms.remove(0);
        writer.deleteDocuments(remove);
    }
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(2 + deleteTerms.size(), Lucene.getNumDocs(segmentCommitInfos));
    writer.close();
    dir.close();
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) SegmentInfos(org.apache.lucene.index.SegmentInfos) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) ArrayList(java.util.ArrayList) TextField(org.apache.lucene.document.TextField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 2 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.

the class Engine method getSegmentInfo.

protected Segment[] getSegmentInfo(SegmentInfos lastCommittedSegmentInfos, boolean verbose) {
    ensureOpen();
    Map<String, Segment> segments = new HashMap<>();
    // first, go over and compute the search ones...
    Searcher searcher = acquireSearcher("segments");
    try {
        for (LeafReaderContext reader : searcher.reader().leaves()) {
            SegmentCommitInfo info = segmentReader(reader.reader()).getSegmentInfo();
            assert !segments.containsKey(info.info.name);
            Segment segment = new Segment(info.info.name);
            segment.search = true;
            segment.docCount = reader.reader().numDocs();
            segment.delDocCount = reader.reader().numDeletedDocs();
            segment.version = info.info.getVersion();
            segment.compound = info.info.getUseCompoundFile();
            try {
                segment.sizeInBytes = info.sizeInBytes();
            } catch (IOException e) {
                logger.trace((Supplier<?>) () -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e);
            }
            final SegmentReader segmentReader = segmentReader(reader.reader());
            segment.memoryInBytes = segmentReader.ramBytesUsed();
            if (verbose) {
                segment.ramTree = Accountables.namedAccountable("root", segmentReader);
            }
            // TODO: add more fine grained mem stats values to per segment info here
            segments.put(info.info.name, segment);
        }
    } finally {
        searcher.close();
    }
    // now, correlate or add the committed ones...
    if (lastCommittedSegmentInfos != null) {
        SegmentInfos infos = lastCommittedSegmentInfos;
        for (SegmentCommitInfo info : infos) {
            Segment segment = segments.get(info.info.name);
            if (segment == null) {
                segment = new Segment(info.info.name);
                segment.search = false;
                segment.committed = true;
                segment.docCount = info.info.maxDoc();
                segment.delDocCount = info.getDelCount();
                segment.version = info.info.getVersion();
                segment.compound = info.info.getUseCompoundFile();
                try {
                    segment.sizeInBytes = info.sizeInBytes();
                } catch (IOException e) {
                    logger.trace((Supplier<?>) () -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e);
                }
                segments.put(info.info.name, segment);
            } else {
                segment.committed = true;
            }
        }
    }
    Segment[] segmentsArr = segments.values().toArray(new Segment[segments.values().size()]);
    Arrays.sort(segmentsArr, new Comparator<Segment>() {

        @Override
        public int compare(Segment o1, Segment o2) {
            return (int) (o1.getGeneration() - o2.getGeneration());
        }
    });
    return segmentsArr;
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) SegmentCommitInfo(org.apache.lucene.index.SegmentCommitInfo) HashMap(java.util.HashMap) IndexSearcher(org.apache.lucene.search.IndexSearcher) IOException(java.io.IOException) SegmentReader(org.apache.lucene.index.SegmentReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Supplier(org.apache.logging.log4j.util.Supplier) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage)

Example 3 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.

the class StoreRecovery method internalRecoverFromStore.

/**
     * Recovers the state of the shard from the store.
     */
private void internalRecoverFromStore(IndexShard indexShard) throws IndexShardRecoveryException {
    final RecoveryState recoveryState = indexShard.recoveryState();
    final boolean indexShouldExists = recoveryState.getRecoverySource().getType() != RecoverySource.Type.EMPTY_STORE;
    indexShard.prepareForIndexRecovery();
    long version = -1;
    SegmentInfos si = null;
    final Store store = indexShard.store();
    store.incRef();
    try {
        try {
            store.failIfCorrupted();
            try {
                si = store.readLastCommittedSegmentsInfo();
            } catch (Exception e) {
                String files = "_unknown_";
                try {
                    files = Arrays.toString(store.directory().listAll());
                } catch (Exception inner) {
                    inner.addSuppressed(e);
                    files += " (failure=" + ExceptionsHelper.detailedMessage(inner) + ")";
                }
                if (indexShouldExists) {
                    throw new IndexShardRecoveryException(shardId, "shard allocated for local recovery (post api), should exist, but doesn't, current files: " + files, e);
                }
            }
            if (si != null) {
                if (indexShouldExists) {
                    version = si.getVersion();
                } else {
                    // it exists on the directory, but shouldn't exist on the FS, its a leftover (possibly dangling)
                    // its a "new index create" API, we have to do something, so better to clean it than use same data
                    logger.trace("cleaning existing shard, shouldn't exists");
                    Lucene.cleanLuceneIndex(store.directory());
                    si = null;
                }
            }
        } catch (Exception e) {
            throw new IndexShardRecoveryException(shardId, "failed to fetch index version after copying it over", e);
        }
        recoveryState.getIndex().updateVersion(version);
        if (recoveryState.getRecoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS) {
            assert indexShouldExists;
            indexShard.skipTranslogRecovery(IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP);
        } else {
            // since we recover from local, just fill the files and size
            try {
                final RecoveryState.Index index = recoveryState.getIndex();
                if (si != null) {
                    addRecoveredFileDetails(si, store, index);
                }
            } catch (IOException e) {
                logger.debug("failed to list file details", e);
            }
            indexShard.performTranslogRecovery(indexShouldExists);
        }
        indexShard.finalizeRecovery();
        indexShard.postRecovery("post recovery from shard_store");
    } catch (EngineException | IOException e) {
        throw new IndexShardRecoveryException(shardId, "failed to recover from gateway", e);
    } finally {
        store.decRef();
    }
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) EngineException(org.elasticsearch.index.engine.EngineException) Store(org.elasticsearch.index.store.Store) IOException(java.io.IOException) RecoveryState(org.elasticsearch.indices.recovery.RecoveryState) IndexShardRestoreFailedException(org.elasticsearch.index.snapshots.IndexShardRestoreFailedException) EngineException(org.elasticsearch.index.engine.EngineException) IOException(java.io.IOException)

Example 4 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.

the class Store method tryOpenIndex.

/**
     * Tries to open an index for the given location. This includes reading the
     * segment infos and possible corruption markers. If the index can not
     * be opened, an exception is thrown
     */
public static void tryOpenIndex(Path indexLocation, ShardId shardId, NodeEnvironment.ShardLocker shardLocker, Logger logger) throws IOException, ShardLockObtainFailedException {
    try (ShardLock lock = shardLocker.lock(shardId, TimeUnit.SECONDS.toMillis(5));
        Directory dir = new SimpleFSDirectory(indexLocation)) {
        failIfCorrupted(dir, shardId);
        SegmentInfos segInfo = Lucene.readSegmentInfos(dir);
        logger.trace("{} loaded segment info [{}]", shardId, segInfo);
    }
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) ShardLock(org.elasticsearch.env.ShardLock) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) Directory(org.apache.lucene.store.Directory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) FilterDirectory(org.apache.lucene.store.FilterDirectory)

Example 5 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class TestCodecSupport method assertCompressionMode.

protected void assertCompressionMode(String expectedModeString, SolrCore core) throws IOException {
    RefCounted<SolrIndexSearcher> ref = null;
    SolrIndexSearcher searcher = null;
    try {
        ref = core.getSearcher();
        searcher = ref.get();
        SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory());
        SegmentInfo info = infos.info(infos.size() - 1).info;
        assertEquals("Expecting compression mode string to be " + expectedModeString + " but got: " + info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY) + "\n SegmentInfo: " + info + "\n SegmentInfos: " + infos + "\n Codec: " + core.getCodec(), expectedModeString, info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY));
    } finally {
        if (ref != null)
            ref.decref();
    }
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) SegmentInfo(org.apache.lucene.index.SegmentInfo) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher)

Aggregations

SegmentInfos (org.apache.lucene.index.SegmentInfos)23 IOException (java.io.IOException)9 IndexWriter (org.apache.lucene.index.IndexWriter)9 Term (org.apache.lucene.index.Term)8 IndexSearcher (org.apache.lucene.search.IndexSearcher)8 TermQuery (org.apache.lucene.search.TermQuery)7 Document (org.apache.lucene.document.Document)6 TextField (org.apache.lucene.document.TextField)6 DirectoryReader (org.apache.lucene.index.DirectoryReader)6 Directory (org.apache.lucene.store.Directory)6 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)5 SegmentCommitInfo (org.apache.lucene.index.SegmentCommitInfo)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 IndexReader (org.apache.lucene.index.IndexReader)4 Collectors (java.util.stream.Collectors)3 IntStream (java.util.stream.IntStream)3 Store (org.apache.lucene.document.Field.Store)3 StringField (org.apache.lucene.document.StringField)3 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)3