Search in sources :

Example 11 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.

the class LuceneTests method testPruneUnreferencedFiles.

public void testPruneUnreferencedFiles() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();
    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();
    SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir);
    doc = new Document();
    doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    DirectoryReader open = DirectoryReader.open(writer);
    assertEquals(3, open.numDocs());
    assertEquals(1, open.numDeletedDocs());
    assertEquals(4, open.maxDoc());
    open.close();
    writer.close();
    SegmentInfos si = Lucene.pruneUnreferencedFiles(segmentCommitInfos.getSegmentsFileName(), dir);
    assertEquals(si.getSegmentsFileName(), segmentCommitInfos.getSegmentsFileName());
    open = DirectoryReader.open(dir);
    assertEquals(3, open.numDocs());
    assertEquals(0, open.numDeletedDocs());
    assertEquals(3, open.maxDoc());
    IndexSearcher s = new IndexSearcher(open);
    assertEquals(s.search(new TermQuery(new Term("id", "1")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "2")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "3")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "4")), 1).totalHits, 0);
    for (String file : dir.listAll()) {
        assertFalse("unexpected file: " + file, file.equals("segments_3") || file.startsWith("_2"));
    }
    open.close();
    dir.close();
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) SegmentInfos(org.apache.lucene.index.SegmentInfos) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) TextField(org.apache.lucene.document.TextField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 12 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.

the class StoreTests method testMarkCorruptedOnTruncatedSegmentsFile.

public void testMarkCorruptedOnTruncatedSegmentsFile() throws IOException {
    IndexWriterConfig iwc = newIndexWriterConfig();
    final ShardId shardId = new ShardId("index", "_na_", 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId));
    IndexWriter writer = new IndexWriter(store.directory(), iwc);
    int numDocs = 1 + random().nextInt(10);
    List<Document> docs = new ArrayList<>();
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
        docs.add(doc);
    }
    for (Document d : docs) {
        writer.addDocument(d);
    }
    writer.commit();
    writer.close();
    SegmentInfos segmentCommitInfos = store.readLastCommittedSegmentsInfo();
    store.directory().deleteFile(segmentCommitInfos.getSegmentsFileName());
    try (IndexOutput out = store.directory().createOutput(segmentCommitInfos.getSegmentsFileName(), IOContext.DEFAULT)) {
    // empty file
    }
    try {
        if (randomBoolean()) {
            store.getMetadata(null);
        } else {
            store.readLastCommittedSegmentsInfo();
        }
        fail("corrupted segments_N file");
    } catch (CorruptIndexException ex) {
    // expected
    }
    assertTrue(store.isMarkedCorrupted());
    // we have to remove the index since it's corrupted and might fail the MocKDirWrapper checkindex call
    Lucene.cleanLuceneIndex(store.directory());
    store.close();
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) ArrayList(java.util.ArrayList) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IndexOutput(org.apache.lucene.store.IndexOutput) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) TextField(org.apache.lucene.document.TextField) DummyShardLock(org.elasticsearch.test.DummyShardLock) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 13 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.

the class StoreRecoveryTests method testAddIndices.

public void testAddIndices() throws IOException {
    Directory[] dirs = new Directory[randomIntBetween(1, 10)];
    final int numDocs = randomIntBetween(50, 100);
    int id = 0;
    for (int i = 0; i < dirs.length; i++) {
        dirs[i] = newFSDirectory(createTempDir());
        IndexWriter writer = new IndexWriter(dirs[i], newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
        for (int j = 0; j < numDocs; j++) {
            writer.addDocument(Arrays.asList(new StringField("id", Integer.toString(id++), Field.Store.YES)));
        }
        writer.commit();
        writer.close();
    }
    StoreRecovery storeRecovery = new StoreRecovery(new ShardId("foo", "bar", 1), logger);
    RecoveryState.Index indexStats = new RecoveryState.Index();
    Directory target = newFSDirectory(createTempDir());
    storeRecovery.addIndices(indexStats, target, dirs);
    int numFiles = 0;
    Predicate<String> filesFilter = (f) -> f.startsWith("segments") == false && f.equals("write.lock") == false && f.startsWith("extra") == false;
    for (Directory d : dirs) {
        numFiles += Arrays.asList(d.listAll()).stream().filter(filesFilter).count();
    }
    final long targetNumFiles = Arrays.asList(target.listAll()).stream().filter(filesFilter).count();
    assertEquals(numFiles, targetNumFiles);
    assertEquals(indexStats.totalFileCount(), targetNumFiles);
    if (hardLinksSupported(createTempDir())) {
        assertEquals(targetNumFiles, indexStats.reusedFileCount());
    } else {
        assertEquals(0, indexStats.reusedFileCount(), 0);
    }
    DirectoryReader reader = DirectoryReader.open(target);
    SegmentInfos segmentCommitInfos = SegmentInfos.readLatestCommit(target);
    for (SegmentCommitInfo info : segmentCommitInfos) {
        // check that we didn't merge
        assertEquals("all sources must be flush", info.info.getDiagnostics().get("source"), "flush");
    }
    assertEquals(reader.numDeletedDocs(), 0);
    assertEquals(reader.numDocs(), id);
    reader.close();
    target.close();
    IOUtils.close(dirs);
}
Also used : NoMergePolicy(org.apache.lucene.index.NoMergePolicy) Arrays(java.util.Arrays) Files(java.nio.file.Files) StringField(org.apache.lucene.document.StringField) Predicate(java.util.function.Predicate) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOUtils(org.apache.lucene.util.IOUtils) IOException(java.io.IOException) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes) SegmentCommitInfo(org.apache.lucene.index.SegmentCommitInfo) SegmentInfos(org.apache.lucene.index.SegmentInfos) IndexWriter(org.apache.lucene.index.IndexWriter) Field(org.apache.lucene.document.Field) RecoveryState(org.elasticsearch.indices.recovery.RecoveryState) CodecUtil(org.apache.lucene.codecs.CodecUtil) Directory(org.apache.lucene.store.Directory) AccessControlException(java.security.AccessControlException) ESTestCase(org.elasticsearch.test.ESTestCase) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) IOContext(org.apache.lucene.store.IOContext) Path(java.nio.file.Path) IndexOutput(org.apache.lucene.store.IndexOutput) SegmentInfos(org.apache.lucene.index.SegmentInfos) SegmentCommitInfo(org.apache.lucene.index.SegmentCommitInfo) DirectoryReader(org.apache.lucene.index.DirectoryReader) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) RecoveryState(org.elasticsearch.indices.recovery.RecoveryState) Directory(org.apache.lucene.store.Directory)

Example 14 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.

the class Lucene method pruneUnreferencedFiles.

/**
     * This method removes all files from the given directory that are not referenced by the given segments file.
     * This method will open an IndexWriter and relies on index file deleter to remove all unreferenced files. Segment files
     * that are newer than the given segments file are removed forcefully to prevent problems with IndexWriter opening a potentially
     * broken commit point / leftover.
     * <b>Note:</b> this method will fail if there is another IndexWriter open on the given directory. This method will also acquire
     * a write lock from the directory while pruning unused files. This method expects an existing index in the given directory that has
     * the given segments file.
     */
public static SegmentInfos pruneUnreferencedFiles(String segmentsFileName, Directory directory) throws IOException {
    final SegmentInfos si = readSegmentInfos(segmentsFileName, directory);
    try (Lock writeLock = directory.obtainLock(IndexWriter.WRITE_LOCK_NAME)) {
        int foundSegmentFiles = 0;
        for (final String file : directory.listAll()) {
            /**
                 * we could also use a deletion policy here but in the case of snapshot and restore
                 * sometimes we restore an index and override files that were referenced by a "future"
                 * commit. If such a commit is opened by the IW it would likely throw a corrupted index exception
                 * since checksums don's match anymore. that's why we prune the name here directly.
                 * We also want the caller to know if we were not able to remove a segments_N file.
                 */
            if (file.startsWith(IndexFileNames.SEGMENTS) || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) {
                foundSegmentFiles++;
                if (file.equals(si.getSegmentsFileName()) == false) {
                    // remove all segment_N files except of the one we wanna keep
                    directory.deleteFile(file);
                }
            }
        }
        assert SegmentInfos.getLastCommitSegmentsFileName(directory).equals(segmentsFileName);
        if (foundSegmentFiles == 0) {
            throw new IllegalStateException("no commit found in the directory");
        }
    }
    final CommitPoint cp = new CommitPoint(si, directory);
    try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Lucene.STANDARD_ANALYZER).setIndexCommit(cp).setCommitOnClose(false).setMergePolicy(NoMergePolicy.INSTANCE).setOpenMode(IndexWriterConfig.OpenMode.APPEND))) {
    // do nothing and close this will kick of IndexFileDeleter which will remove all pending files
    }
    return si;
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) IndexWriter(org.apache.lucene.index.IndexWriter) Lock(org.apache.lucene.store.Lock) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 15 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class TestSimpleTextCodec method test.

public void test() throws Exception {
    SolrConfig config = h.getCore().getSolrConfig();
    String codecFactory = config.get("codecFactory/@class");
    assertEquals("Unexpected solrconfig codec factory", "solr.SimpleTextCodecFactory", codecFactory);
    assertEquals("Unexpected core codec", "SimpleText", h.getCore().getCodec().getName());
    RefCounted<IndexWriter> writerRef = h.getCore().getSolrCoreState().getIndexWriter(h.getCore());
    try {
        IndexWriter writer = writerRef.get();
        assertEquals("Unexpected codec in IndexWriter config", "SimpleText", writer.getConfig().getCodec().getName());
    } finally {
        writerRef.decref();
    }
    assertU(add(doc("id", "1", "text", "textual content goes here")));
    assertU(commit());
    RefCounted<SolrIndexSearcher> searcherRef = h.getCore().getSearcher();
    try {
        SolrIndexSearcher searcher = searcherRef.get();
        SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory());
        SegmentInfo info = infos.info(infos.size() - 1).info;
        assertEquals("Unexpected segment codec", "SimpleText", info.getCodec().getName());
    } finally {
        searcherRef.decref();
    }
    assertQ(req("q", "id:1"), "*[count(//doc)=1]");
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) IndexWriter(org.apache.lucene.index.IndexWriter) SegmentInfo(org.apache.lucene.index.SegmentInfo) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher)

Aggregations

SegmentInfos (org.apache.lucene.index.SegmentInfos)23 IOException (java.io.IOException)9 IndexWriter (org.apache.lucene.index.IndexWriter)9 Term (org.apache.lucene.index.Term)8 IndexSearcher (org.apache.lucene.search.IndexSearcher)8 TermQuery (org.apache.lucene.search.TermQuery)7 Document (org.apache.lucene.document.Document)6 TextField (org.apache.lucene.document.TextField)6 DirectoryReader (org.apache.lucene.index.DirectoryReader)6 Directory (org.apache.lucene.store.Directory)6 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)5 SegmentCommitInfo (org.apache.lucene.index.SegmentCommitInfo)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 IndexReader (org.apache.lucene.index.IndexReader)4 Collectors (java.util.stream.Collectors)3 IntStream (java.util.stream.IntStream)3 Store (org.apache.lucene.document.Field.Store)3 StringField (org.apache.lucene.document.StringField)3 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)3