use of org.apache.lucene.store.FlushInfo in project lucene-solr by apache.
the class DocumentsWriterPerThread method sealFlushedSegment.
/**
* Seals the {@link SegmentInfo} for the new flushed segment and persists
* the deleted documents {@link MutableBits}.
*/
void sealFlushedSegment(FlushedSegment flushedSegment, Sorter.DocMap sortMap) throws IOException {
assert flushedSegment != null;
SegmentCommitInfo newSegment = flushedSegment.segmentInfo;
IndexWriter.setDiagnostics(newSegment.info, IndexWriter.SOURCE_FLUSH);
IOContext context = new IOContext(new FlushInfo(newSegment.info.maxDoc(), newSegment.sizeInBytes()));
boolean success = false;
try {
if (indexWriterConfig.getUseCompoundFile()) {
Set<String> originalFiles = newSegment.info.files();
// TODO: like addIndexes, we are relying on createCompoundFile to successfully cleanup...
indexWriter.createCompoundFile(infoStream, new TrackingDirectoryWrapper(directory), newSegment.info, context);
filesToDelete.addAll(originalFiles);
newSegment.info.setUseCompoundFile(true);
}
// Have codec write SegmentInfo. Must do this after
// creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change
// above:
codec.segmentInfoFormat().write(directory, newSegment.info, context);
// slurp the del file into CFS:
if (flushedSegment.liveDocs != null) {
final int delCount = flushedSegment.delCount;
assert delCount > 0;
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.getDelGen());
}
// TODO: we should prune the segment if it's 100%
// deleted... but merge will also catch it.
// TODO: in the NRT case it'd be better to hand
// this del vector over to the
// shortly-to-be-opened SegmentReader and let it
// carry the changes; there's no reason to use
// filesystem as intermediary here.
SegmentCommitInfo info = flushedSegment.segmentInfo;
Codec codec = info.info.getCodec();
final MutableBits bits;
if (sortMap == null) {
bits = flushedSegment.liveDocs;
} else {
bits = sortLiveDocs(flushedSegment.liveDocs, sortMap);
}
codec.liveDocsFormat().writeLiveDocs(bits, directory, info, delCount, context);
newSegment.setDelCount(delCount);
newSegment.advanceDelGen();
}
success = true;
} finally {
if (!success) {
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "hit exception creating compound file for newly flushed segment " + newSegment.info.name);
}
}
}
}
use of org.apache.lucene.store.FlushInfo in project lucene-solr by apache.
the class DocumentsWriterPerThread method flush.
/** Flush all pending docs to a new segment */
FlushedSegment flush() throws IOException, AbortingException {
assert numDocsInRAM > 0;
assert deleteSlice.isEmpty() : "all deletes must be applied in prepareFlush";
segmentInfo.setMaxDoc(numDocsInRAM);
final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(), pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())));
final double startMBUsed = bytesUsed() / 1024. / 1024.;
// doc, eg if analyzer has some problem w/ the text):
if (pendingUpdates.docIDs.size() > 0) {
flushState.liveDocs = codec.liveDocsFormat().newLiveDocs(numDocsInRAM);
for (int delDocID : pendingUpdates.docIDs) {
flushState.liveDocs.clear(delDocID);
}
flushState.delCountOnFlush = pendingUpdates.docIDs.size();
pendingUpdates.bytesUsed.addAndGet(-pendingUpdates.docIDs.size() * BufferedUpdates.BYTES_PER_DEL_DOCID);
pendingUpdates.docIDs.clear();
}
if (aborted) {
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "flush: skip because aborting is set");
}
return null;
}
long t0 = System.nanoTime();
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "flush postings as segment " + flushState.segmentInfo.name + " numDocs=" + numDocsInRAM);
}
final Sorter.DocMap sortMap;
try {
sortMap = consumer.flush(flushState);
pendingUpdates.terms.clear();
segmentInfo.setFiles(new HashSet<>(directory.getCreatedFiles()));
final SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, -1L, -1L, -1L);
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : flushState.delCountOnFlush) + " deleted docs");
infoStream.message("DWPT", "new segment has " + (flushState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " + (flushState.fieldInfos.hasNorms() ? "norms" : "no norms") + "; " + (flushState.fieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " + (flushState.fieldInfos.hasFreq() ? "freqs" : "no freqs"));
infoStream.message("DWPT", "flushedFiles=" + segmentInfoPerCommit.files());
infoStream.message("DWPT", "flushed codec=" + codec);
}
final BufferedUpdates segmentDeletes;
if (pendingUpdates.queries.isEmpty() && pendingUpdates.numericUpdates.isEmpty() && pendingUpdates.binaryUpdates.isEmpty()) {
pendingUpdates.clear();
segmentDeletes = null;
} else {
segmentDeletes = pendingUpdates;
}
if (infoStream.isEnabled("DWPT")) {
final double newSegmentSize = segmentInfoPerCommit.sizeInBytes() / 1024. / 1024.;
infoStream.message("DWPT", "flushed: segment=" + segmentInfo.name + " ramUsed=" + nf.format(startMBUsed) + " MB" + " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + " docs/MB=" + nf.format(flushState.segmentInfo.maxDoc() / newSegmentSize));
}
assert segmentInfo != null;
FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.fieldInfos, segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush);
sealFlushedSegment(fs, sortMap);
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "flush time " + ((System.nanoTime() - t0) / 1000000.0) + " msec");
}
return fs;
} catch (Throwable th) {
abort();
throw AbortingException.wrap(th);
}
}
use of org.apache.lucene.store.FlushInfo in project lucene-solr by apache.
the class TermVectorsConsumer method initTermVectorsWriter.
void initTermVectorsWriter() throws IOException {
if (writer == null) {
IOContext context = new IOContext(new FlushInfo(docWriter.getNumDocsInRAM(), docWriter.bytesUsed()));
writer = docWriter.codec.termVectorsFormat().vectorsWriter(docWriter.directory, docWriter.getSegmentInfo(), context);
lastDocID = 0;
}
}
use of org.apache.lucene.store.FlushInfo in project lucene-solr by apache.
the class IndexWriter method addIndexes.
/**
* Adds all segments from an array of indexes into this index.
*
* <p>This may be used to parallelize batch indexing. A large document
* collection can be broken into sub-collections. Each sub-collection can be
* indexed in parallel, on a different thread, process or machine. The
* complete index can then be created by merging sub-collection indexes
* with this method.
*
* <p>
* <b>NOTE:</b> this method acquires the write lock in
* each directory, to ensure that no {@code IndexWriter}
* is currently open or tries to open while this is
* running.
*
* <p>This method is transactional in how Exceptions are
* handled: it does not commit a new segments_N file until
* all indexes are added. This means if an Exception
* occurs (for example disk full), then either no indexes
* will have been added or they all will have been.
*
* <p>Note that this requires temporary free space in the
* {@link Directory} up to 2X the sum of all input indexes
* (including the starting index). If readers/searchers
* are open against the starting index, then temporary
* free space required will be higher by the size of the
* starting index (see {@link #forceMerge(int)} for details).
*
* <p>This requires this index not be among those to be added.
*
* <p>All added indexes must have been created by the same
* Lucene version as this index.
*
* @return The <a href="#sequence_number">sequence number</a>
* for this operation
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
* @throws IllegalArgumentException if addIndexes would cause
* the index to exceed {@link #MAX_DOCS}, or if the indoming
* index sort does not match this index's index sort
*/
public long addIndexes(Directory... dirs) throws IOException {
ensureOpen();
noDupDirs(dirs);
List<Lock> locks = acquireWriteLocks(dirs);
Sort indexSort = config.getIndexSort();
boolean successTop = false;
long seqNo;
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at addIndexes(Directory...)");
}
flush(false, true);
List<SegmentCommitInfo> infos = new ArrayList<>();
// long so we can detect int overflow:
long totalMaxDoc = 0;
List<SegmentInfos> commits = new ArrayList<>(dirs.length);
for (Directory dir : dirs) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "addIndexes: process directory " + dir);
}
// read infos from dir
SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
if (segmentInfos.getIndexCreatedVersionMajor() != sis.getIndexCreatedVersionMajor()) {
throw new IllegalArgumentException("Cannot use addIndexes(Directory) with indexes that have been created " + "by a different Lucene version. The current index was generated by Lucene " + segmentInfos.getIndexCreatedVersionMajor() + " while one of the directories contains an index that was generated with Lucene " + sis.getIndexCreatedVersionMajor());
}
totalMaxDoc += sis.totalMaxDoc();
commits.add(sis);
}
// Best-effort up front check:
testReserveDocs(totalMaxDoc);
boolean success = false;
try {
for (SegmentInfos sis : commits) {
for (SegmentCommitInfo info : sis) {
assert !infos.contains(info) : "dup info dir=" + info.info.dir + " name=" + info.info.name;
Sort segmentIndexSort = info.info.getIndexSort();
if (indexSort != null && segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) {
// TODO: we could make this smarter, e.g. if the incoming indexSort is congruent with our sort ("starts with") then it's OK
throw new IllegalArgumentException("cannot change index sort from " + segmentIndexSort + " to " + indexSort);
}
String newSegName = newSegmentName();
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "addIndexes: process segment origName=" + info.info.name + " newName=" + newSegName + " info=" + info);
}
IOContext context = new IOContext(new FlushInfo(info.info.maxDoc(), info.sizeInBytes()));
FieldInfos fis = readFieldInfos(info);
for (FieldInfo fi : fis) {
// This will throw exceptions if any of the incoming fields have an illegal schema change:
globalFieldNumberMap.addOrGet(fi.name, fi.number, fi.getDocValuesType(), fi.getPointDimensionCount(), fi.getPointNumBytes());
}
infos.add(copySegmentAsIs(info, newSegName, context));
}
}
success = true;
} finally {
if (!success) {
for (SegmentCommitInfo sipc : infos) {
// Safe: these files must exist
deleteNewFiles(sipc.files());
}
}
}
synchronized (this) {
success = false;
try {
ensureOpen();
// Now reserve the docs, just before we update SIS:
reserveDocs(totalMaxDoc);
seqNo = docWriter.deleteQueue.getNextSequenceNumber();
success = true;
} finally {
if (!success) {
for (SegmentCommitInfo sipc : infos) {
// Safe: these files must exist
deleteNewFiles(sipc.files());
}
}
}
segmentInfos.addAll(infos);
checkpoint();
}
successTop = true;
} catch (VirtualMachineError tragedy) {
tragicEvent(tragedy, "addIndexes(Directory...)");
// dead code but javac disagrees:
seqNo = -1;
} finally {
if (successTop) {
IOUtils.close(locks);
} else {
IOUtils.closeWhileHandlingException(locks);
}
}
maybeMerge();
return seqNo;
}
use of org.apache.lucene.store.FlushInfo in project lucene-solr by apache.
the class ReadersAndUpdates method handleNumericDVUpdates.
@SuppressWarnings("synthetic-access")
private void handleNumericDVUpdates(FieldInfos infos, Map<String, NumericDocValuesFieldUpdates> updates, Directory dir, DocValuesFormat dvFormat, final SegmentReader reader, Map<Integer, Set<String>> fieldFiles) throws IOException {
for (Entry<String, NumericDocValuesFieldUpdates> e : updates.entrySet()) {
final String field = e.getKey();
final NumericDocValuesFieldUpdates fieldUpdates = e.getValue();
final long nextDocValuesGen = info.getNextDocValuesGen();
final String segmentSuffix = Long.toString(nextDocValuesGen, Character.MAX_RADIX);
final long estUpdatesSize = fieldUpdates.ramBytesPerDoc() * info.info.maxDoc();
final IOContext updatesContext = new IOContext(new FlushInfo(info.info.maxDoc(), estUpdatesSize));
final FieldInfo fieldInfo = infos.fieldInfo(field);
assert fieldInfo != null;
fieldInfo.setDocValuesGen(nextDocValuesGen);
final FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { fieldInfo });
// separately also track which files were created for this gen
final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix);
try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) {
// write the numeric updates to a new gen'd docvalues file
fieldsConsumer.addNumericField(fieldInfo, new EmptyDocValuesProducer() {
@Override
public NumericDocValues getNumeric(FieldInfo fieldInfoIn) throws IOException {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
final int maxDoc = reader.maxDoc();
final NumericDocValuesFieldUpdates.Iterator updatesIter = fieldUpdates.iterator();
final NumericDocValues currentValues = reader.getNumericDocValues(field);
updatesIter.reset();
// Merge sort of the original doc values with updated doc values:
return new NumericDocValues() {
// merged docID
private int docIDOut = -1;
// docID from our original doc values
private int docIDIn = -1;
// docID from our updates
private int updateDocID = -1;
private long value;
@Override
public int docID() {
return docIDOut;
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
// TODO
return 0;
}
@Override
public long longValue() {
return value;
}
@Override
public int nextDoc() throws IOException {
if (docIDIn == docIDOut) {
if (currentValues == null) {
docIDIn = NO_MORE_DOCS;
} else {
docIDIn = currentValues.nextDoc();
}
}
if (updateDocID == docIDOut) {
updateDocID = updatesIter.nextDoc();
}
if (docIDIn < updateDocID) {
// no update to this doc
docIDOut = docIDIn;
value = currentValues.longValue();
} else {
docIDOut = updateDocID;
if (docIDOut != NO_MORE_DOCS) {
value = updatesIter.value();
}
}
return docIDOut;
}
};
}
});
}
info.advanceDocValuesGen();
assert !fieldFiles.containsKey(fieldInfo.number);
fieldFiles.put(fieldInfo.number, trackingDir.getCreatedFiles());
}
}
Aggregations