Example 1 with FlushInfo

the class DocumentsWriterPerThread method sealFlushedSegment.

   * Seals the {@link SegmentInfo} for the new flushed segment and persists
   * the deleted documents {@link MutableBits}.
void sealFlushedSegment(FlushedSegment flushedSegment, Sorter.DocMap sortMap) throws IOException {
    assert flushedSegment != null;
    SegmentCommitInfo newSegment = flushedSegment.segmentInfo;
    IndexWriter.setDiagnostics(, IndexWriter.SOURCE_FLUSH);
    IOContext context = new IOContext(new FlushInfo(, newSegment.sizeInBytes()));
    boolean success = false;
    try {
        if (indexWriterConfig.getUseCompoundFile()) {
            Set<String> originalFiles =;
            // TODO: like addIndexes, we are relying on createCompoundFile to successfully cleanup...
            indexWriter.createCompoundFile(infoStream, new TrackingDirectoryWrapper(directory),, context);
        // Have codec write SegmentInfo.  Must do this after
        // creating CFS so that 1) .si isn't slurped into CFS,
        // and 2) .si reflects useCompoundFile=true change
        // above:
        codec.segmentInfoFormat().write(directory,, context);
        // slurp the del file into CFS:
        if (flushedSegment.liveDocs != null) {
            final int delCount = flushedSegment.delCount;
            assert delCount > 0;
            if (infoStream.isEnabled("DWPT")) {
                infoStream.message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.getDelGen());
            // TODO: we should prune the segment if it's 100%
            // deleted... but merge will also catch it.
            // TODO: in the NRT case it'd be better to hand
            // this del vector over to the
            // shortly-to-be-opened SegmentReader and let it
            // carry the changes; there's no reason to use
            // filesystem as intermediary here.
            SegmentCommitInfo info = flushedSegment.segmentInfo;
            Codec codec =;
            final MutableBits bits;
            if (sortMap == null) {
                bits = flushedSegment.liveDocs;
            } else {
                bits = sortLiveDocs(flushedSegment.liveDocs, sortMap);
            codec.liveDocsFormat().writeLiveDocs(bits, directory, info, delCount, context);
        success = true;
    } finally {
        if (!success) {
            if (infoStream.isEnabled("DWPT")) {
                infoStream.message("DWPT", "hit exception creating compound file for newly flushed segment " +;
Example 2 with FlushInfo

the class DocumentsWriterPerThread method flush.

/** Flush all pending docs to a new segment */
FlushedSegment flush() throws IOException, AbortingException {
    assert numDocsInRAM > 0;
    assert deleteSlice.isEmpty() : "all deletes must be applied in prepareFlush";
    final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(), pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())));
    final double startMBUsed = bytesUsed() / 1024. / 1024.;
    // doc, eg if analyzer has some problem w/ the text):
    if (pendingUpdates.docIDs.size() > 0) {
        flushState.liveDocs = codec.liveDocsFormat().newLiveDocs(numDocsInRAM);
        for (int delDocID : pendingUpdates.docIDs) {
        flushState.delCountOnFlush = pendingUpdates.docIDs.size();
        pendingUpdates.bytesUsed.addAndGet(-pendingUpdates.docIDs.size() * BufferedUpdates.BYTES_PER_DEL_DOCID);
    if (aborted) {
        if (infoStream.isEnabled("DWPT")) {
            infoStream.message("DWPT", "flush: skip because aborting is set");
        return null;
    long t0 = System.nanoTime();
    if (infoStream.isEnabled("DWPT")) {
        infoStream.message("DWPT", "flush postings as segment " + + " numDocs=" + numDocsInRAM);
    final Sorter.DocMap sortMap;
    try {
        sortMap = consumer.flush(flushState);
        segmentInfo.setFiles(new HashSet<>(directory.getCreatedFiles()));
        final SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, -1L, -1L, -1L);
        if (infoStream.isEnabled("DWPT")) {
            infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : flushState.delCountOnFlush) + " deleted docs");
            infoStream.message("DWPT", "new segment has " + (flushState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " + (flushState.fieldInfos.hasNorms() ? "norms" : "no norms") + "; " + (flushState.fieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " + (flushState.fieldInfos.hasFreq() ? "freqs" : "no freqs"));
            infoStream.message("DWPT", "flushedFiles=" + segmentInfoPerCommit.files());
            infoStream.message("DWPT", "flushed codec=" + codec);
        final BufferedUpdates segmentDeletes;
        if (pendingUpdates.queries.isEmpty() && pendingUpdates.numericUpdates.isEmpty() && pendingUpdates.binaryUpdates.isEmpty()) {
            segmentDeletes = null;
        } else {
            segmentDeletes = pendingUpdates;
        if (infoStream.isEnabled("DWPT")) {
            final double newSegmentSize = segmentInfoPerCommit.sizeInBytes() / 1024. / 1024.;
            infoStream.message("DWPT", "flushed: segment=" + + " ramUsed=" + nf.format(startMBUsed) + " MB" + " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + " docs/MB=" + nf.format(flushState.segmentInfo.maxDoc() / newSegmentSize));
        assert segmentInfo != null;
        FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.fieldInfos, segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush);
        sealFlushedSegment(fs, sortMap);
        if (infoStream.isEnabled("DWPT")) {
            infoStream.message("DWPT", "flush time " + ((System.nanoTime() - t0) / 1000000.0) + " msec");
        return fs;
    } catch (Throwable th) {
        throw AbortingException.wrap(th);
Example 3 with FlushInfo

the class TermVectorsConsumer method initTermVectorsWriter.

void initTermVectorsWriter() throws IOException {
    if (writer == null) {
        IOContext context = new IOContext(new FlushInfo(docWriter.getNumDocsInRAM(), docWriter.bytesUsed()));
        writer = docWriter.codec.termVectorsFormat().vectorsWriter(, docWriter.getSegmentInfo(), context);
        lastDocID = 0;
Example 4 with FlushInfo

the class IndexWriter method addIndexes.

   * Adds all segments from an array of indexes into this index.
   * <p>This may be used to parallelize batch indexing. A large document
   * collection can be broken into sub-collections. Each sub-collection can be
   * indexed in parallel, on a different thread, process or machine. The
   * complete index can then be created by merging sub-collection indexes
   * with this method.
   * <p>
   * <b>NOTE:</b> this method acquires the write lock in
   * each directory, to ensure that no {@code IndexWriter}
   * is currently open or tries to open while this is
   * running.
   * <p>This method is transactional in how Exceptions are
   * handled: it does not commit a new segments_N file until
   * all indexes are added.  This means if an Exception
   * occurs (for example disk full), then either no indexes
   * will have been added or they all will have been.
   * <p>Note that this requires temporary free space in the
   * {@link Directory} up to 2X the sum of all input indexes
   * (including the starting index). If readers/searchers
   * are open against the starting index, then temporary
   * free space required will be higher by the size of the
   * starting index (see {@link #forceMerge(int)} for details).
   * <p>This requires this index not be among those to be added.
   * <p>All added indexes must have been created by the same
   * Lucene version as this index.
   * @return The <a href="#sequence_number">sequence number</a>
   * for this operation
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   * @throws IllegalArgumentException if addIndexes would cause
   *   the index to exceed {@link #MAX_DOCS}, or if the indoming
   *   index sort does not match this index's index sort
public long addIndexes(Directory... dirs) throws IOException {
    List<Lock> locks = acquireWriteLocks(dirs);
    Sort indexSort = config.getIndexSort();
    boolean successTop = false;
    long seqNo;
    try {
        if (infoStream.isEnabled("IW")) {
            infoStream.message("IW", "flush at addIndexes(Directory...)");
        flush(false, true);
        List<SegmentCommitInfo> infos = new ArrayList<>();
        // long so we can detect int overflow:
        long totalMaxDoc = 0;
        List<SegmentInfos> commits = new ArrayList<>(dirs.length);
        for (Directory dir : dirs) {
            if (infoStream.isEnabled("IW")) {
                infoStream.message("IW", "addIndexes: process directory " + dir);
            // read infos from dir
            SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
            if (segmentInfos.getIndexCreatedVersionMajor() != sis.getIndexCreatedVersionMajor()) {
                throw new IllegalArgumentException("Cannot use addIndexes(Directory) with indexes that have been created " + "by a different Lucene version. The current index was generated by Lucene " + segmentInfos.getIndexCreatedVersionMajor() + " while one of the directories contains an index that was generated with Lucene " + sis.getIndexCreatedVersionMajor());
            totalMaxDoc += sis.totalMaxDoc();
        // Best-effort up front check:
        boolean success = false;
        try {
            for (SegmentInfos sis : commits) {
                for (SegmentCommitInfo info : sis) {
                    assert !infos.contains(info) : "dup info dir=" + + " name=" +;
                    Sort segmentIndexSort =;
                    if (indexSort != null && segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) {
                        // TODO: we could make this smarter, e.g. if the incoming indexSort is congruent with our sort ("starts with") then it's OK
                        throw new IllegalArgumentException("cannot change index sort from " + segmentIndexSort + " to " + indexSort);
                    String newSegName = newSegmentName();
                    if (infoStream.isEnabled("IW")) {
                        infoStream.message("IW", "addIndexes: process segment origName=" + + " newName=" + newSegName + " info=" + info);
                    IOContext context = new IOContext(new FlushInfo(, info.sizeInBytes()));
                    FieldInfos fis = readFieldInfos(info);
                    for (FieldInfo fi : fis) {
                        // This will throw exceptions if any of the incoming fields have an illegal schema change:
                        globalFieldNumberMap.addOrGet(, fi.number, fi.getDocValuesType(), fi.getPointDimensionCount(), fi.getPointNumBytes());
                    infos.add(copySegmentAsIs(info, newSegName, context));
            success = true;
        } finally {
            if (!success) {
                for (SegmentCommitInfo sipc : infos) {
                    // Safe: these files must exist
        synchronized (this) {
            success = false;
            try {
                // Now reserve the docs, just before we update SIS:
                seqNo = docWriter.deleteQueue.getNextSequenceNumber();
                success = true;
            } finally {
                if (!success) {
                    for (SegmentCommitInfo sipc : infos) {
                        // Safe: these files must exist
        successTop = true;
    } catch (VirtualMachineError tragedy) {
        tragicEvent(tragedy, "addIndexes(Directory...)");
        // dead code but javac disagrees:
        seqNo = -1;
    } finally {
        if (successTop) {
        } else {
    return seqNo;
Example 5 with FlushInfo

use of in project lucene-solr by apache.

the class ReadersAndUpdates method handleNumericDVUpdates.

private void handleNumericDVUpdates(FieldInfos infos, Map<String, NumericDocValuesFieldUpdates> updates, Directory dir, DocValuesFormat dvFormat, final SegmentReader reader, Map<Integer, Set<String>> fieldFiles) throws IOException {
    for (Entry<String, NumericDocValuesFieldUpdates> e : updates.entrySet()) {
        final String field = e.getKey();
        final NumericDocValuesFieldUpdates fieldUpdates = e.getValue();
        final long nextDocValuesGen = info.getNextDocValuesGen();
        final String segmentSuffix = Long.toString(nextDocValuesGen, Character.MAX_RADIX);
        final long estUpdatesSize = fieldUpdates.ramBytesPerDoc() *;
        final IOContext updatesContext = new IOContext(new FlushInfo(, estUpdatesSize));
        final FieldInfo fieldInfo = infos.fieldInfo(field);
        assert fieldInfo != null;
        final FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { fieldInfo });
        // separately also track which files were created for this gen
        final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
        final SegmentWriteState state = new SegmentWriteState(null, trackingDir,, fieldInfos, null, updatesContext, segmentSuffix);
        try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) {
            // write the numeric updates to a new gen'd docvalues file
            fieldsConsumer.addNumericField(fieldInfo, new EmptyDocValuesProducer() {

                public NumericDocValues getNumeric(FieldInfo fieldInfoIn) throws IOException {
                    if (fieldInfoIn != fieldInfo) {
                        throw new IllegalArgumentException("wrong fieldInfo");
                    final int maxDoc = reader.maxDoc();
                    final NumericDocValuesFieldUpdates.Iterator updatesIter = fieldUpdates.iterator();
                    final NumericDocValues currentValues = reader.getNumericDocValues(field);
                    // Merge sort of the original doc values with updated doc values:
                    return new NumericDocValues() {

                        // merged docID
                        private int docIDOut = -1;

                        // docID from our original doc values
                        private int docIDIn = -1;

                        // docID from our updates
                        private int updateDocID = -1;

                        private long value;

                        public int docID() {
                            return docIDOut;

                        public int advance(int target) {
                            throw new UnsupportedOperationException();

                        public boolean advanceExact(int target) throws IOException {
                            throw new UnsupportedOperationException();

                        public long cost() {
                            // TODO
                            return 0;

                        public long longValue() {
                            return value;

                        public int nextDoc() throws IOException {
                            if (docIDIn == docIDOut) {
                                if (currentValues == null) {
                                    docIDIn = NO_MORE_DOCS;
                                } else {
                                    docIDIn = currentValues.nextDoc();
                            if (updateDocID == docIDOut) {
                                updateDocID = updatesIter.nextDoc();
                            if (docIDIn < updateDocID) {
                                // no update to this doc
                                docIDOut = docIDIn;
                                value = currentValues.longValue();
                            } else {
                                docIDOut = updateDocID;
                                if (docIDOut != NO_MORE_DOCS) {
                                    value = updatesIter.value();
                            return docIDOut;
        assert !fieldFiles.containsKey(fieldInfo.number);
        fieldFiles.put(fieldInfo.number, trackingDir.getCreatedFiles());
Also used : DocValuesConsumer(org.apache.lucene.codecs.DocValuesConsumer) IOException( TrackingDirectoryWrapper( IOContext( FlushInfo(


