use of org.apache.lucene.store.MergeInfo in project lucene-solr by apache.
the class TestSegmentMerger method testMerge.
public void testMerge() throws IOException {
final Codec codec = Codec.getDefault();
final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, null, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(reader1, reader2), si, InfoStream.getDefault(), mergedDir, new FieldInfos.FieldNumbers(), newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1))));
MergeState mergeState = merger.merge();
int docsMerged = mergeState.segmentInfo.maxDoc();
assertTrue(docsMerged == 2);
//Should be able to open a new SegmentReader against the new directory
SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(mergeState.segmentInfo, 0, -1L, -1L, -1L), Version.LATEST.major, newIOContext(random()));
assertTrue(mergedReader != null);
assertTrue(mergedReader.numDocs() == 2);
Document newDoc1 = mergedReader.document(0);
assertTrue(newDoc1 != null);
//There are 2 unstored fields on the document
assertTrue(DocHelper.numFields(newDoc1) == DocHelper.numFields(doc1) - DocHelper.unstored.size());
Document newDoc2 = mergedReader.document(1);
assertTrue(newDoc2 != null);
assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size());
PostingsEnum termDocs = TestUtil.docs(random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), null, 0);
assertTrue(termDocs != null);
assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
int tvCount = 0;
for (FieldInfo fieldInfo : mergedReader.getFieldInfos()) {
if (fieldInfo.hasVectors()) {
tvCount++;
}
}
//System.out.println("stored size: " + stored.size());
assertEquals("We do not have 3 fields that were indexed with term vector", 3, tvCount);
Terms vector = mergedReader.getTermVectors(0).terms(DocHelper.TEXT_FIELD_2_KEY);
assertNotNull(vector);
assertEquals(3, vector.size());
TermsEnum termsEnum = vector.iterator();
int i = 0;
while (termsEnum.next() != null) {
String term = termsEnum.term().utf8ToString();
int freq = (int) termsEnum.totalTermFreq();
//System.out.println("Term: " + term + " Freq: " + freq);
assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1);
assertTrue(DocHelper.FIELD_2_FREQS[i] == freq);
i++;
}
TestSegmentReader.checkNorms(mergedReader);
mergedReader.close();
}
use of org.apache.lucene.store.MergeInfo in project lucene-solr by apache.
the class TestDoc method merge.
private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile) throws Exception {
IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)));
SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context);
SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context);
final Codec codec = Codec.getDefault();
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2), si, InfoStream.getDefault(), trackingDir, new FieldInfos.FieldNumbers(), context);
MergeState mergeState = merger.merge();
r1.close();
r2.close();
;
si.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
if (useCompoundFile) {
Collection<String> filesToDelete = si.files();
codec.compoundFormat().write(dir, si, context);
si.setUseCompoundFile(true);
for (String name : filesToDelete) {
si1.info.dir.deleteFile(name);
}
}
return new SegmentCommitInfo(si, 0, -1L, -1L, -1L);
}
use of org.apache.lucene.store.MergeInfo in project lucene-solr by apache.
the class IndexWriter method addIndexes.
/**
* Merges the provided indexes into this index.
*
* <p>
* The provided IndexReaders are not closed.
*
* <p>
* See {@link #addIndexes} for details on transactional semantics, temporary
* free space required in the Directory, and non-CFS segments on an Exception.
*
* <p>
* <b>NOTE:</b> empty segments are dropped by this method and not added to this
* index.
*
* <p>
* <b>NOTE:</b> this merges all given {@link LeafReader}s in one
* merge. If you intend to merge a large number of readers, it may be better
* to call this method multiple times, each time with a small set of readers.
* In principle, if you use a merge policy with a {@code mergeFactor} or
* {@code maxMergeAtOnce} parameter, you should pass that many readers in one
* call.
*
* <p>
* <b>NOTE:</b> this method does not call or make use of the {@link MergeScheduler},
* so any custom bandwidth throttling is at the moment ignored.
*
* @return The <a href="#sequence_number">sequence number</a>
* for this operation
*
* @throws CorruptIndexException
* if the index is corrupt
* @throws IOException
* if there is a low-level IO error
* @throws IllegalArgumentException
* if addIndexes would cause the index to exceed {@link #MAX_DOCS}
*/
public long addIndexes(CodecReader... readers) throws IOException {
ensureOpen();
// long so we can detect int overflow:
long numDocs = 0;
Sort indexSort = config.getIndexSort();
long seqNo;
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at addIndexes(CodecReader...)");
}
flush(false, true);
String mergedName = newSegmentName();
for (CodecReader leaf : readers) {
numDocs += leaf.numDocs();
validateMergeReader(leaf);
}
// Best-effort up front check:
testReserveDocs(numDocs);
final IOContext context = new IOContext(new MergeInfo(Math.toIntExact(numDocs), -1, false, UNBOUNDED_MAX_MERGE_SEGMENTS));
// TODO: somehow we should fix this merge so it's
// abortable so that IW.close(false) is able to stop it
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
// We set the min version to null for now, it will be set later by SegmentMerger
SegmentInfo info = new SegmentInfo(directoryOrig, Version.LATEST, null, mergedName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort());
SegmentMerger merger = new SegmentMerger(Arrays.asList(readers), info, infoStream, trackingDir, globalFieldNumberMap, context);
if (!merger.shouldMerge()) {
return docWriter.deleteQueue.getNextSequenceNumber();
}
// merge 'em
merger.merge();
SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L, -1L);
info.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
trackingDir.clearCreatedFiles();
setDiagnostics(info, SOURCE_ADDINDEXES_READERS);
final MergePolicy mergePolicy = config.getMergePolicy();
boolean useCompoundFile;
synchronized (this) {
// Guard segmentInfos
if (stopMerges) {
// Safe: these files must exist
deleteNewFiles(infoPerCommit.files());
return docWriter.deleteQueue.getNextSequenceNumber();
}
ensureOpen();
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, infoPerCommit, this);
}
// Now create the compound file if needed
if (useCompoundFile) {
Collection<String> filesToDelete = infoPerCommit.files();
TrackingDirectoryWrapper trackingCFSDir = new TrackingDirectoryWrapper(directory);
// createCompoundFile tries to cleanup, but it might not always be able to...
try {
createCompoundFile(infoStream, trackingCFSDir, info, context);
} finally {
// delete new non cfs files directly: they were never
// registered with IFD
deleteNewFiles(filesToDelete);
}
info.setUseCompoundFile(true);
}
// Have codec write SegmentInfo. Must do this after
// creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change
// above:
codec.segmentInfoFormat().write(trackingDir, info, context);
info.addFiles(trackingDir.getCreatedFiles());
// Register the new segment
synchronized (this) {
if (stopMerges) {
// Safe: these files must exist
deleteNewFiles(infoPerCommit.files());
return docWriter.deleteQueue.getNextSequenceNumber();
}
ensureOpen();
// Now reserve the docs, just before we update SIS:
reserveDocs(numDocs);
segmentInfos.add(infoPerCommit);
seqNo = docWriter.deleteQueue.getNextSequenceNumber();
checkpoint();
}
} catch (VirtualMachineError tragedy) {
tragicEvent(tragedy, "addIndexes(CodecReader...)");
// dead code but javac disagrees:
seqNo = -1;
}
maybeMerge();
return seqNo;
}
use of org.apache.lucene.store.MergeInfo in project lucene-solr by apache.
the class BlockDirectoryTest method ensureCacheConfigurable.
/**
* Verify the configuration options for the block cache are handled
* appropriately.
*/
@Test
public void ensureCacheConfigurable() throws Exception {
IOContext mergeContext = new IOContext(new MergeInfo(1, 1, false, 1));
BlockDirectory d = directory;
assertTrue(d.useReadCache("", IOContext.DEFAULT));
if (d.getCache() instanceof MapperCache) {
assertTrue(d.useWriteCache("", IOContext.DEFAULT));
} else {
assertFalse(d.useWriteCache("", IOContext.DEFAULT));
}
assertFalse(d.useWriteCache("", mergeContext));
d = new BlockDirectory("test", directory, mapperCache, null, true, false);
assertTrue(d.useReadCache("", IOContext.DEFAULT));
assertFalse(d.useWriteCache("", IOContext.DEFAULT));
assertFalse(d.useWriteCache("", mergeContext));
d = new BlockDirectory("test", directory, mapperCache, null, false, true);
assertFalse(d.useReadCache("", IOContext.DEFAULT));
if (d.getCache() instanceof MapperCache) {
assertTrue(d.useWriteCache("", IOContext.DEFAULT));
} else {
assertFalse(d.useWriteCache("", IOContext.DEFAULT));
}
assertFalse(d.useWriteCache("", mergeContext));
}
use of org.apache.lucene.store.MergeInfo in project lucene-solr by apache.
the class LuceneTestCase method newIOContext.
/** TODO: javadoc */
public static IOContext newIOContext(Random random, IOContext oldContext) {
final int randomNumDocs = random.nextInt(4192);
final int size = random.nextInt(512) * randomNumDocs;
if (oldContext.flushInfo != null) {
// the incoming IOContext:
return new IOContext(new FlushInfo(randomNumDocs, Math.max(oldContext.flushInfo.estimatedSegmentSize, size)));
} else if (oldContext.mergeInfo != null) {
// the incoming IOContext:
return new IOContext(new MergeInfo(randomNumDocs, Math.max(oldContext.mergeInfo.estimatedMergeBytes, size), random.nextBoolean(), TestUtil.nextInt(random, 1, 100)));
} else {
// Make a totally random IOContext:
final IOContext context;
switch(random.nextInt(5)) {
case 0:
context = IOContext.DEFAULT;
break;
case 1:
context = IOContext.READ;
break;
case 2:
context = IOContext.READONCE;
break;
case 3:
context = new IOContext(new MergeInfo(randomNumDocs, size, true, -1));
break;
case 4:
context = new IOContext(new FlushInfo(randomNumDocs, size));
break;
default:
context = IOContext.DEFAULT;
}
return context;
}
}
Aggregations