use of org.apache.lucene.util.FailOnNonBulkMergesInfoStream in project lucene-solr by apache.
the class TestConsistentFieldNumbers method testFieldNumberGaps.
public void testFieldNumberGaps() throws IOException {
int numIters = atLeast(13);
for (int i = 0; i < numIters; i++) {
Directory dir = newDirectory();
{
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
Document d = new Document();
d.add(new TextField("f1", "d1 first field", Field.Store.YES));
d.add(new TextField("f2", "d1 second field", Field.Store.YES));
writer.addDocument(d);
writer.close();
SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
assertEquals(1, sis.size());
FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
assertEquals("f1", fis1.fieldInfo(0).name);
assertEquals("f2", fis1.fieldInfo(1).name);
}
{
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
Document d = new Document();
d.add(new TextField("f1", "d2 first field", Field.Store.YES));
d.add(new StoredField("f3", new byte[] { 1, 2, 3 }));
writer.addDocument(d);
writer.close();
SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
assertEquals(2, sis.size());
FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
FieldInfos fis2 = IndexWriter.readFieldInfos(sis.info(1));
assertEquals("f1", fis1.fieldInfo(0).name);
assertEquals("f2", fis1.fieldInfo(1).name);
assertEquals("f1", fis2.fieldInfo(0).name);
assertNull(fis2.fieldInfo(1));
assertEquals("f3", fis2.fieldInfo(2).name);
}
{
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
Document d = new Document();
d.add(new TextField("f1", "d3 first field", Field.Store.YES));
d.add(new TextField("f2", "d3 second field", Field.Store.YES));
d.add(new StoredField("f3", new byte[] { 1, 2, 3, 4, 5 }));
writer.addDocument(d);
writer.close();
SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
assertEquals(3, sis.size());
FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
FieldInfos fis2 = IndexWriter.readFieldInfos(sis.info(1));
FieldInfos fis3 = IndexWriter.readFieldInfos(sis.info(2));
assertEquals("f1", fis1.fieldInfo(0).name);
assertEquals("f2", fis1.fieldInfo(1).name);
assertEquals("f1", fis2.fieldInfo(0).name);
assertNull(fis2.fieldInfo(1));
assertEquals("f3", fis2.fieldInfo(2).name);
assertEquals("f1", fis3.fieldInfo(0).name);
assertEquals("f2", fis3.fieldInfo(1).name);
assertEquals("f3", fis3.fieldInfo(2).name);
}
{
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
writer.deleteDocuments(new Term("f1", "d1"));
// nuke the first segment entirely so that the segment with gaps is
// loaded first!
writer.forceMergeDeletes();
writer.close();
}
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogByteSizeMergePolicy()).setInfoStream(new FailOnNonBulkMergesInfoStream()));
writer.forceMerge(1);
writer.close();
SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
assertEquals(1, sis.size());
FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
assertEquals("f1", fis1.fieldInfo(0).name);
assertEquals("f2", fis1.fieldInfo(1).name);
assertEquals("f3", fis1.fieldInfo(2).name);
dir.close();
}
}
use of org.apache.lucene.util.FailOnNonBulkMergesInfoStream in project lucene-solr by apache.
the class ThreadedIndexingAndSearchingTestCase method runTest.
public void runTest(String testName) throws Exception {
failed.set(false);
addCount.set(0);
delCount.set(0);
packCount.set(0);
final long t0 = System.currentTimeMillis();
Random random = new Random(random().nextLong());
final LineFileDocs docs = new LineFileDocs(random);
final Path tempDir = createTempDir(testName);
// some subclasses rely on this being MDW
dir = getDirectory(newMockFSDirectory(tempDir));
if (dir instanceof BaseDirectoryWrapper) {
// don't double-checkIndex, we do it ourselves.
((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false);
}
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false);
conf.setInfoStream(new FailOnNonBulkMergesInfoStream());
if (conf.getMergePolicy() instanceof MockRandomMergePolicy) {
((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false);
}
if (LuceneTestCase.TEST_NIGHTLY) {
// newIWConfig makes smallish max seg size, which
// results in tons and tons of segments for this test
// when run nightly:
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof TieredMergePolicy) {
((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
} else if (mp instanceof LogByteSizeMergePolicy) {
((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
} else if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setMaxMergeDocs(100000);
}
// when running nightly, merging can still have crazy parameters,
// and might use many per-field codecs. turn on CFS for IW flushes
// and ensure CFS ratio is reasonable to keep it contained.
conf.setUseCompoundFile(true);
mp.setNoCFSRatio(Math.max(0.25d, mp.getNoCFSRatio()));
}
conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
@Override
public void warm(LeafReader reader) throws IOException {
if (VERBOSE) {
System.out.println("TEST: now warm merged reader=" + reader);
}
warmed.put(((SegmentReader) reader).core, Boolean.TRUE);
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
int sum = 0;
final int inc = Math.max(1, maxDoc / 50);
for (int docID = 0; docID < maxDoc; docID += inc) {
if (liveDocs == null || liveDocs.get(docID)) {
final Document doc = reader.document(docID);
sum += doc.getFields().size();
}
}
IndexSearcher searcher = newSearcher(reader, false);
sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;
if (VERBOSE) {
System.out.println("TEST: warm visited " + sum + " fields");
}
}
});
if (VERBOSE) {
conf.setInfoStream(new PrintStreamInfoStream(System.out) {
@Override
public void message(String component, String message) {
if ("TP".equals(component)) {
// ignore test points!
return;
}
super.message(component, message);
}
});
}
writer = new IndexWriter(dir, conf);
TestUtil.reduceOpenFiles(writer);
final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
doAfterWriter(es);
final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4);
final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;
final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>());
final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>());
final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>());
final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;
final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);
if (VERBOSE) {
System.out.println("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis() - t0) + " ms]");
}
// Let index build up a bit
Thread.sleep(100);
doSearching(es, stopTime);
if (VERBOSE) {
System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]");
}
for (Thread thread : indexThreads) {
thread.join();
}
if (VERBOSE) {
System.out.println("TEST: done join indexing threads [" + (System.currentTimeMillis() - t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
}
final IndexSearcher s = getFinalSearcher();
if (VERBOSE) {
System.out.println("TEST: finalSearcher=" + s);
}
assertFalse(failed.get());
boolean doFail = false;
// Verify: make sure delIDs are in fact deleted:
for (String id : delIDs) {
final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
if (hits.totalHits != 0) {
System.out.println("doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc);
doFail = true;
}
}
// Verify: make sure delPackIDs are in fact deleted:
for (String id : delPackIDs) {
final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
if (hits.totalHits != 0) {
System.out.println("packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
doFail = true;
}
}
// Verify: make sure each group of sub-docs are still in docID order:
for (SubDocs subDocs : allSubDocs) {
TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
if (!subDocs.deleted) {
// We sort by relevance but the scores should be identical so sort falls back to by docID:
if (hits.totalHits != subDocs.subIDs.size()) {
System.out.println("packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits);
doFail = true;
} else {
int lastDocID = -1;
int startDocID = -1;
for (ScoreDoc scoreDoc : hits.scoreDocs) {
final int docID = scoreDoc.doc;
if (lastDocID != -1) {
assertEquals(1 + lastDocID, docID);
} else {
startDocID = docID;
}
lastDocID = docID;
final Document doc = s.doc(docID);
assertEquals(subDocs.packID, doc.get("packID"));
}
lastDocID = startDocID - 1;
for (String subID : subDocs.subIDs) {
hits = s.search(new TermQuery(new Term("docid", subID)), 1);
assertEquals(1, hits.totalHits);
final int docID = hits.scoreDocs[0].doc;
if (lastDocID != -1) {
assertEquals(1 + lastDocID, docID);
}
lastDocID = docID;
}
}
} else {
// because we can re-use packID for update:
for (String subID : subDocs.subIDs) {
assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
}
}
}
// Verify: make sure all not-deleted docs are in fact
// not deleted:
final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
docs.close();
for (int id = 0; id < endID; id++) {
String stringID = "" + id;
if (!delIDs.contains(stringID)) {
final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1);
if (hits.totalHits != 1) {
System.out.println("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.totalHits + "; delIDs=" + delIDs);
doFail = true;
}
}
}
assertFalse(doFail);
assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), s.getIndexReader().numDocs());
releaseSearcher(s);
writer.commit();
assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs());
doClose();
try {
writer.commit();
} finally {
writer.close();
}
// searches, and that IS may be using this es!
if (es != null) {
es.shutdown();
es.awaitTermination(1, TimeUnit.SECONDS);
}
TestUtil.checkIndex(dir);
dir.close();
if (VERBOSE) {
System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]");
}
}
Aggregations