use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.
the class LuceneTests method testPruneUnreferencedFiles.
public void testPruneUnreferencedFiles() throws IOException {
MockDirectoryWrapper dir = newMockDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
iwc.setMaxBufferedDocs(2);
IndexWriter writer = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
writer.commit();
doc = new Document();
doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
doc = new Document();
doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
writer.commit();
SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir);
doc = new Document();
doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
writer.deleteDocuments(new Term("id", "2"));
writer.commit();
DirectoryReader open = DirectoryReader.open(writer);
assertEquals(3, open.numDocs());
assertEquals(1, open.numDeletedDocs());
assertEquals(4, open.maxDoc());
open.close();
writer.close();
SegmentInfos si = Lucene.pruneUnreferencedFiles(segmentCommitInfos.getSegmentsFileName(), dir);
assertEquals(si.getSegmentsFileName(), segmentCommitInfos.getSegmentsFileName());
open = DirectoryReader.open(dir);
assertEquals(3, open.numDocs());
assertEquals(0, open.numDeletedDocs());
assertEquals(3, open.maxDoc());
IndexSearcher s = new IndexSearcher(open);
assertEquals(s.search(new TermQuery(new Term("id", "1")), 1).totalHits, 1);
assertEquals(s.search(new TermQuery(new Term("id", "2")), 1).totalHits, 1);
assertEquals(s.search(new TermQuery(new Term("id", "3")), 1).totalHits, 1);
assertEquals(s.search(new TermQuery(new Term("id", "4")), 1).totalHits, 0);
for (String file : dir.listAll()) {
assertFalse("unexpected file: " + file, file.equals("segments_3") || file.startsWith("_2"));
}
open.close();
dir.close();
}
use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.
the class StoreTests method testMarkCorruptedOnTruncatedSegmentsFile.
public void testMarkCorruptedOnTruncatedSegmentsFile() throws IOException {
IndexWriterConfig iwc = newIndexWriterConfig();
final ShardId shardId = new ShardId("index", "_na_", 1);
DirectoryService directoryService = new LuceneManagedDirectoryService(random());
Store store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId));
IndexWriter writer = new IndexWriter(store.directory(), iwc);
int numDocs = 1 + random().nextInt(10);
List<Document> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
docs.add(doc);
}
for (Document d : docs) {
writer.addDocument(d);
}
writer.commit();
writer.close();
SegmentInfos segmentCommitInfos = store.readLastCommittedSegmentsInfo();
store.directory().deleteFile(segmentCommitInfos.getSegmentsFileName());
try (IndexOutput out = store.directory().createOutput(segmentCommitInfos.getSegmentsFileName(), IOContext.DEFAULT)) {
// empty file
}
try {
if (randomBoolean()) {
store.getMetadata(null);
} else {
store.readLastCommittedSegmentsInfo();
}
fail("corrupted segments_N file");
} catch (CorruptIndexException ex) {
// expected
}
assertTrue(store.isMarkedCorrupted());
// we have to remove the index since it's corrupted and might fail the MocKDirWrapper checkindex call
Lucene.cleanLuceneIndex(store.directory());
store.close();
}
use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.
the class StoreRecoveryTests method testAddIndices.
public void testAddIndices() throws IOException {
Directory[] dirs = new Directory[randomIntBetween(1, 10)];
final int numDocs = randomIntBetween(50, 100);
int id = 0;
for (int i = 0; i < dirs.length; i++) {
dirs[i] = newFSDirectory(createTempDir());
IndexWriter writer = new IndexWriter(dirs[i], newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
for (int j = 0; j < numDocs; j++) {
writer.addDocument(Arrays.asList(new StringField("id", Integer.toString(id++), Field.Store.YES)));
}
writer.commit();
writer.close();
}
StoreRecovery storeRecovery = new StoreRecovery(new ShardId("foo", "bar", 1), logger);
RecoveryState.Index indexStats = new RecoveryState.Index();
Directory target = newFSDirectory(createTempDir());
storeRecovery.addIndices(indexStats, target, dirs);
int numFiles = 0;
Predicate<String> filesFilter = (f) -> f.startsWith("segments") == false && f.equals("write.lock") == false && f.startsWith("extra") == false;
for (Directory d : dirs) {
numFiles += Arrays.asList(d.listAll()).stream().filter(filesFilter).count();
}
final long targetNumFiles = Arrays.asList(target.listAll()).stream().filter(filesFilter).count();
assertEquals(numFiles, targetNumFiles);
assertEquals(indexStats.totalFileCount(), targetNumFiles);
if (hardLinksSupported(createTempDir())) {
assertEquals(targetNumFiles, indexStats.reusedFileCount());
} else {
assertEquals(0, indexStats.reusedFileCount(), 0);
}
DirectoryReader reader = DirectoryReader.open(target);
SegmentInfos segmentCommitInfos = SegmentInfos.readLatestCommit(target);
for (SegmentCommitInfo info : segmentCommitInfos) {
// check that we didn't merge
assertEquals("all sources must be flush", info.info.getDiagnostics().get("source"), "flush");
}
assertEquals(reader.numDeletedDocs(), 0);
assertEquals(reader.numDocs(), id);
reader.close();
target.close();
IOUtils.close(dirs);
}
use of org.apache.lucene.index.SegmentInfos in project elasticsearch by elastic.
the class Lucene method pruneUnreferencedFiles.
/**
* This method removes all files from the given directory that are not referenced by the given segments file.
* This method will open an IndexWriter and relies on index file deleter to remove all unreferenced files. Segment files
* that are newer than the given segments file are removed forcefully to prevent problems with IndexWriter opening a potentially
* broken commit point / leftover.
* <b>Note:</b> this method will fail if there is another IndexWriter open on the given directory. This method will also acquire
* a write lock from the directory while pruning unused files. This method expects an existing index in the given directory that has
* the given segments file.
*/
public static SegmentInfos pruneUnreferencedFiles(String segmentsFileName, Directory directory) throws IOException {
final SegmentInfos si = readSegmentInfos(segmentsFileName, directory);
try (Lock writeLock = directory.obtainLock(IndexWriter.WRITE_LOCK_NAME)) {
int foundSegmentFiles = 0;
for (final String file : directory.listAll()) {
/**
* we could also use a deletion policy here but in the case of snapshot and restore
* sometimes we restore an index and override files that were referenced by a "future"
* commit. If such a commit is opened by the IW it would likely throw a corrupted index exception
* since checksums don's match anymore. that's why we prune the name here directly.
* We also want the caller to know if we were not able to remove a segments_N file.
*/
if (file.startsWith(IndexFileNames.SEGMENTS) || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) {
foundSegmentFiles++;
if (file.equals(si.getSegmentsFileName()) == false) {
// remove all segment_N files except of the one we wanna keep
directory.deleteFile(file);
}
}
}
assert SegmentInfos.getLastCommitSegmentsFileName(directory).equals(segmentsFileName);
if (foundSegmentFiles == 0) {
throw new IllegalStateException("no commit found in the directory");
}
}
final CommitPoint cp = new CommitPoint(si, directory);
try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Lucene.STANDARD_ANALYZER).setIndexCommit(cp).setCommitOnClose(false).setMergePolicy(NoMergePolicy.INSTANCE).setOpenMode(IndexWriterConfig.OpenMode.APPEND))) {
// do nothing and close this will kick of IndexFileDeleter which will remove all pending files
}
return si;
}
use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.
the class TestSimpleTextCodec method test.
public void test() throws Exception {
SolrConfig config = h.getCore().getSolrConfig();
String codecFactory = config.get("codecFactory/@class");
assertEquals("Unexpected solrconfig codec factory", "solr.SimpleTextCodecFactory", codecFactory);
assertEquals("Unexpected core codec", "SimpleText", h.getCore().getCodec().getName());
RefCounted<IndexWriter> writerRef = h.getCore().getSolrCoreState().getIndexWriter(h.getCore());
try {
IndexWriter writer = writerRef.get();
assertEquals("Unexpected codec in IndexWriter config", "SimpleText", writer.getConfig().getCodec().getName());
} finally {
writerRef.decref();
}
assertU(add(doc("id", "1", "text", "textual content goes here")));
assertU(commit());
RefCounted<SolrIndexSearcher> searcherRef = h.getCore().getSearcher();
try {
SolrIndexSearcher searcher = searcherRef.get();
SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory());
SegmentInfo info = infos.info(infos.size() - 1).info;
assertEquals("Unexpected segment codec", "SimpleText", info.getCodec().getName());
} finally {
searcherRef.decref();
}
assertQ(req("q", "id:1"), "*[count(//doc)=1]");
}
Aggregations