use of org.apache.lucene.store.TrackingDirectoryWrapper in project lucene-solr by apache.
the class IndexWriter method addIndexes.
/**
* Merges the provided indexes into this index.
*
* <p>
* The provided IndexReaders are not closed.
*
* <p>
* See {@link #addIndexes} for details on transactional semantics, temporary
* free space required in the Directory, and non-CFS segments on an Exception.
*
* <p>
* <b>NOTE:</b> empty segments are dropped by this method and not added to this
* index.
*
* <p>
* <b>NOTE:</b> this merges all given {@link LeafReader}s in one
* merge. If you intend to merge a large number of readers, it may be better
* to call this method multiple times, each time with a small set of readers.
* In principle, if you use a merge policy with a {@code mergeFactor} or
* {@code maxMergeAtOnce} parameter, you should pass that many readers in one
* call.
*
* <p>
* <b>NOTE:</b> this method does not call or make use of the {@link MergeScheduler},
* so any custom bandwidth throttling is at the moment ignored.
*
* @return The <a href="#sequence_number">sequence number</a>
* for this operation
*
* @throws CorruptIndexException
* if the index is corrupt
* @throws IOException
* if there is a low-level IO error
* @throws IllegalArgumentException
* if addIndexes would cause the index to exceed {@link #MAX_DOCS}
*/
public long addIndexes(CodecReader... readers) throws IOException {
ensureOpen();
// long so we can detect int overflow:
long numDocs = 0;
Sort indexSort = config.getIndexSort();
long seqNo;
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at addIndexes(CodecReader...)");
}
flush(false, true);
String mergedName = newSegmentName();
for (CodecReader leaf : readers) {
numDocs += leaf.numDocs();
validateMergeReader(leaf);
}
// Best-effort up front check:
testReserveDocs(numDocs);
final IOContext context = new IOContext(new MergeInfo(Math.toIntExact(numDocs), -1, false, UNBOUNDED_MAX_MERGE_SEGMENTS));
// TODO: somehow we should fix this merge so it's
// abortable so that IW.close(false) is able to stop it
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
// We set the min version to null for now, it will be set later by SegmentMerger
SegmentInfo info = new SegmentInfo(directoryOrig, Version.LATEST, null, mergedName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort());
SegmentMerger merger = new SegmentMerger(Arrays.asList(readers), info, infoStream, trackingDir, globalFieldNumberMap, context);
if (!merger.shouldMerge()) {
return docWriter.deleteQueue.getNextSequenceNumber();
}
// merge 'em
merger.merge();
SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L, -1L);
info.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
trackingDir.clearCreatedFiles();
setDiagnostics(info, SOURCE_ADDINDEXES_READERS);
final MergePolicy mergePolicy = config.getMergePolicy();
boolean useCompoundFile;
synchronized (this) {
// Guard segmentInfos
if (stopMerges) {
// Safe: these files must exist
deleteNewFiles(infoPerCommit.files());
return docWriter.deleteQueue.getNextSequenceNumber();
}
ensureOpen();
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, infoPerCommit, this);
}
// Now create the compound file if needed
if (useCompoundFile) {
Collection<String> filesToDelete = infoPerCommit.files();
TrackingDirectoryWrapper trackingCFSDir = new TrackingDirectoryWrapper(directory);
// createCompoundFile tries to cleanup, but it might not always be able to...
try {
createCompoundFile(infoStream, trackingCFSDir, info, context);
} finally {
// delete new non cfs files directly: they were never
// registered with IFD
deleteNewFiles(filesToDelete);
}
info.setUseCompoundFile(true);
}
// Have codec write SegmentInfo. Must do this after
// creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change
// above:
codec.segmentInfoFormat().write(trackingDir, info, context);
info.addFiles(trackingDir.getCreatedFiles());
// Register the new segment
synchronized (this) {
if (stopMerges) {
// Safe: these files must exist
deleteNewFiles(infoPerCommit.files());
return docWriter.deleteQueue.getNextSequenceNumber();
}
ensureOpen();
// Now reserve the docs, just before we update SIS:
reserveDocs(numDocs);
segmentInfos.add(infoPerCommit);
seqNo = docWriter.deleteQueue.getNextSequenceNumber();
checkpoint();
}
} catch (VirtualMachineError tragedy) {
tragicEvent(tragedy, "addIndexes(CodecReader...)");
// dead code but javac disagrees:
seqNo = -1;
}
maybeMerge();
return seqNo;
}
use of org.apache.lucene.store.TrackingDirectoryWrapper in project lucene-solr by apache.
the class DocumentsWriterPerThread method sealFlushedSegment.
/**
* Seals the {@link SegmentInfo} for the new flushed segment and persists
* the deleted documents {@link MutableBits}.
*/
void sealFlushedSegment(FlushedSegment flushedSegment, Sorter.DocMap sortMap) throws IOException {
assert flushedSegment != null;
SegmentCommitInfo newSegment = flushedSegment.segmentInfo;
IndexWriter.setDiagnostics(newSegment.info, IndexWriter.SOURCE_FLUSH);
IOContext context = new IOContext(new FlushInfo(newSegment.info.maxDoc(), newSegment.sizeInBytes()));
boolean success = false;
try {
if (indexWriterConfig.getUseCompoundFile()) {
Set<String> originalFiles = newSegment.info.files();
// TODO: like addIndexes, we are relying on createCompoundFile to successfully cleanup...
indexWriter.createCompoundFile(infoStream, new TrackingDirectoryWrapper(directory), newSegment.info, context);
filesToDelete.addAll(originalFiles);
newSegment.info.setUseCompoundFile(true);
}
// Have codec write SegmentInfo. Must do this after
// creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change
// above:
codec.segmentInfoFormat().write(directory, newSegment.info, context);
// slurp the del file into CFS:
if (flushedSegment.liveDocs != null) {
final int delCount = flushedSegment.delCount;
assert delCount > 0;
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.getDelGen());
}
// TODO: we should prune the segment if it's 100%
// deleted... but merge will also catch it.
// TODO: in the NRT case it'd be better to hand
// this del vector over to the
// shortly-to-be-opened SegmentReader and let it
// carry the changes; there's no reason to use
// filesystem as intermediary here.
SegmentCommitInfo info = flushedSegment.segmentInfo;
Codec codec = info.info.getCodec();
final MutableBits bits;
if (sortMap == null) {
bits = flushedSegment.liveDocs;
} else {
bits = sortLiveDocs(flushedSegment.liveDocs, sortMap);
}
codec.liveDocsFormat().writeLiveDocs(bits, directory, info, delCount, context);
newSegment.setDelCount(delCount);
newSegment.advanceDelGen();
}
success = true;
} finally {
if (!success) {
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "hit exception creating compound file for newly flushed segment " + newSegment.info.name);
}
}
}
}
use of org.apache.lucene.store.TrackingDirectoryWrapper in project lucene-solr by apache.
the class TestDoc method merge.
private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile) throws Exception {
IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)));
SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context);
SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context);
final Codec codec = Codec.getDefault();
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2), si, InfoStream.getDefault(), trackingDir, new FieldInfos.FieldNumbers(), context);
MergeState mergeState = merger.merge();
r1.close();
r2.close();
;
si.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
if (useCompoundFile) {
Collection<String> filesToDelete = si.files();
codec.compoundFormat().write(dir, si, context);
si.setUseCompoundFile(true);
for (String name : filesToDelete) {
si1.info.dir.deleteFile(name);
}
}
return new SegmentCommitInfo(si, 0, -1L, -1L, -1L);
}
use of org.apache.lucene.store.TrackingDirectoryWrapper in project lucene-solr by apache.
the class ReadersAndUpdates method handleBinaryDVUpdates.
@SuppressWarnings("synthetic-access")
private void handleBinaryDVUpdates(FieldInfos infos, Map<String, BinaryDocValuesFieldUpdates> updates, TrackingDirectoryWrapper dir, DocValuesFormat dvFormat, final SegmentReader reader, Map<Integer, Set<String>> fieldFiles) throws IOException {
for (Entry<String, BinaryDocValuesFieldUpdates> e : updates.entrySet()) {
final String field = e.getKey();
final BinaryDocValuesFieldUpdates fieldUpdates = e.getValue();
final long nextDocValuesGen = info.getNextDocValuesGen();
final String segmentSuffix = Long.toString(nextDocValuesGen, Character.MAX_RADIX);
final long estUpdatesSize = fieldUpdates.ramBytesPerDoc() * info.info.maxDoc();
final IOContext updatesContext = new IOContext(new FlushInfo(info.info.maxDoc(), estUpdatesSize));
final FieldInfo fieldInfo = infos.fieldInfo(field);
assert fieldInfo != null;
fieldInfo.setDocValuesGen(nextDocValuesGen);
final FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { fieldInfo });
// separately also track which files were created for this gen
final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix);
try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) {
// write the binary updates to a new gen'd docvalues file
fieldsConsumer.addBinaryField(fieldInfo, new EmptyDocValuesProducer() {
@Override
public BinaryDocValues getBinary(FieldInfo fieldInfoIn) throws IOException {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
final int maxDoc = reader.maxDoc();
final BinaryDocValuesFieldUpdates.Iterator updatesIter = fieldUpdates.iterator();
updatesIter.reset();
final BinaryDocValues currentValues = reader.getBinaryDocValues(field);
// Merge sort of the original doc values with updated doc values:
return new BinaryDocValues() {
// merged docID
private int docIDOut = -1;
// docID from our original doc values
private int docIDIn = -1;
// docID from our updates
private int updateDocID = -1;
private BytesRef value;
@Override
public int docID() {
return docIDOut;
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return currentValues.cost();
}
@Override
public BytesRef binaryValue() {
return value;
}
@Override
public int nextDoc() throws IOException {
if (docIDIn == docIDOut) {
if (currentValues == null) {
docIDIn = NO_MORE_DOCS;
} else {
docIDIn = currentValues.nextDoc();
}
}
if (updateDocID == docIDOut) {
updateDocID = updatesIter.nextDoc();
}
if (docIDIn < updateDocID) {
// no update to this doc
docIDOut = docIDIn;
value = currentValues.binaryValue();
} else {
docIDOut = updateDocID;
if (docIDOut != NO_MORE_DOCS) {
value = updatesIter.value();
}
}
return docIDOut;
}
};
}
});
}
info.advanceDocValuesGen();
assert !fieldFiles.containsKey(fieldInfo.number);
fieldFiles.put(fieldInfo.number, trackingDir.getCreatedFiles());
}
}
use of org.apache.lucene.store.TrackingDirectoryWrapper in project lucene-solr by apache.
the class ReadersAndUpdates method writeFieldInfosGen.
private Set<String> writeFieldInfosGen(FieldInfos fieldInfos, Directory dir, DocValuesFormat dvFormat, FieldInfosFormat infosFormat) throws IOException {
final long nextFieldInfosGen = info.getNextFieldInfosGen();
final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX);
// we write approximately that many bytes (based on Lucene46DVF):
// HEADER + FOOTER: 40
// 90 bytes per-field (over estimating long name and attributes map)
final long estInfosSize = 40 + 90 * fieldInfos.size();
final IOContext infosContext = new IOContext(new FlushInfo(info.info.maxDoc(), estInfosSize));
// separately also track which files were created for this gen
final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
infosFormat.write(trackingDir, info.info, segmentSuffix, fieldInfos, infosContext);
info.advanceFieldInfosGen();
return trackingDir.getCreatedFiles();
}
Aggregations