Search in sources :

Example 1 with FlushInfo

use of org.apache.lucene.store.FlushInfo in project lucene-solr by apache.

the class DocumentsWriterPerThread method sealFlushedSegment.

/**
   * Seals the {@link SegmentInfo} for the new flushed segment and persists
   * the deleted documents {@link MutableBits}.
   */
void sealFlushedSegment(FlushedSegment flushedSegment, Sorter.DocMap sortMap) throws IOException {
    assert flushedSegment != null;
    SegmentCommitInfo newSegment = flushedSegment.segmentInfo;
    IndexWriter.setDiagnostics(newSegment.info, IndexWriter.SOURCE_FLUSH);
    IOContext context = new IOContext(new FlushInfo(newSegment.info.maxDoc(), newSegment.sizeInBytes()));
    boolean success = false;
    try {
        if (indexWriterConfig.getUseCompoundFile()) {
            Set<String> originalFiles = newSegment.info.files();
            // TODO: like addIndexes, we are relying on createCompoundFile to successfully cleanup...
            indexWriter.createCompoundFile(infoStream, new TrackingDirectoryWrapper(directory), newSegment.info, context);
            filesToDelete.addAll(originalFiles);
            newSegment.info.setUseCompoundFile(true);
        }
        // Have codec write SegmentInfo.  Must do this after
        // creating CFS so that 1) .si isn't slurped into CFS,
        // and 2) .si reflects useCompoundFile=true change
        // above:
        codec.segmentInfoFormat().write(directory, newSegment.info, context);
        // slurp the del file into CFS:
        if (flushedSegment.liveDocs != null) {
            final int delCount = flushedSegment.delCount;
            assert delCount > 0;
            if (infoStream.isEnabled("DWPT")) {
                infoStream.message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.getDelGen());
            }
            // TODO: we should prune the segment if it's 100%
            // deleted... but merge will also catch it.
            // TODO: in the NRT case it'd be better to hand
            // this del vector over to the
            // shortly-to-be-opened SegmentReader and let it
            // carry the changes; there's no reason to use
            // filesystem as intermediary here.
            SegmentCommitInfo info = flushedSegment.segmentInfo;
            Codec codec = info.info.getCodec();
            final MutableBits bits;
            if (sortMap == null) {
                bits = flushedSegment.liveDocs;
            } else {
                bits = sortLiveDocs(flushedSegment.liveDocs, sortMap);
            }
            codec.liveDocsFormat().writeLiveDocs(bits, directory, info, delCount, context);
            newSegment.setDelCount(delCount);
            newSegment.advanceDelGen();
        }
        success = true;
    } finally {
        if (!success) {
            if (infoStream.isEnabled("DWPT")) {
                infoStream.message("DWPT", "hit exception creating compound file for newly flushed segment " + newSegment.info.name);
            }
        }
    }
}
Also used : Codec(org.apache.lucene.codecs.Codec) MutableBits(org.apache.lucene.util.MutableBits) IOContext(org.apache.lucene.store.IOContext) FlushInfo(org.apache.lucene.store.FlushInfo) TrackingDirectoryWrapper(org.apache.lucene.store.TrackingDirectoryWrapper)

Example 2 with FlushInfo

use of org.apache.lucene.store.FlushInfo in project lucene-solr by apache.

the class DocumentsWriterPerThread method flush.

/** Flush all pending docs to a new segment */
FlushedSegment flush() throws IOException, AbortingException {
    assert numDocsInRAM > 0;
    assert deleteSlice.isEmpty() : "all deletes must be applied in prepareFlush";
    segmentInfo.setMaxDoc(numDocsInRAM);
    final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(), pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())));
    final double startMBUsed = bytesUsed() / 1024. / 1024.;
    // doc, eg if analyzer has some problem w/ the text):
    if (pendingUpdates.docIDs.size() > 0) {
        flushState.liveDocs = codec.liveDocsFormat().newLiveDocs(numDocsInRAM);
        for (int delDocID : pendingUpdates.docIDs) {
            flushState.liveDocs.clear(delDocID);
        }
        flushState.delCountOnFlush = pendingUpdates.docIDs.size();
        pendingUpdates.bytesUsed.addAndGet(-pendingUpdates.docIDs.size() * BufferedUpdates.BYTES_PER_DEL_DOCID);
        pendingUpdates.docIDs.clear();
    }
    if (aborted) {
        if (infoStream.isEnabled("DWPT")) {
            infoStream.message("DWPT", "flush: skip because aborting is set");
        }
        return null;
    }
    long t0 = System.nanoTime();
    if (infoStream.isEnabled("DWPT")) {
        infoStream.message("DWPT", "flush postings as segment " + flushState.segmentInfo.name + " numDocs=" + numDocsInRAM);
    }
    final Sorter.DocMap sortMap;
    try {
        sortMap = consumer.flush(flushState);
        pendingUpdates.terms.clear();
        segmentInfo.setFiles(new HashSet<>(directory.getCreatedFiles()));
        final SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, -1L, -1L, -1L);
        if (infoStream.isEnabled("DWPT")) {
            infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : flushState.delCountOnFlush) + " deleted docs");
            infoStream.message("DWPT", "new segment has " + (flushState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " + (flushState.fieldInfos.hasNorms() ? "norms" : "no norms") + "; " + (flushState.fieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " + (flushState.fieldInfos.hasFreq() ? "freqs" : "no freqs"));
            infoStream.message("DWPT", "flushedFiles=" + segmentInfoPerCommit.files());
            infoStream.message("DWPT", "flushed codec=" + codec);
        }
        final BufferedUpdates segmentDeletes;
        if (pendingUpdates.queries.isEmpty() && pendingUpdates.numericUpdates.isEmpty() && pendingUpdates.binaryUpdates.isEmpty()) {
            pendingUpdates.clear();
            segmentDeletes = null;
        } else {
            segmentDeletes = pendingUpdates;
        }
        if (infoStream.isEnabled("DWPT")) {
            final double newSegmentSize = segmentInfoPerCommit.sizeInBytes() / 1024. / 1024.;
            infoStream.message("DWPT", "flushed: segment=" + segmentInfo.name + " ramUsed=" + nf.format(startMBUsed) + " MB" + " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + " docs/MB=" + nf.format(flushState.segmentInfo.maxDoc() / newSegmentSize));
        }
        assert segmentInfo != null;
        FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.fieldInfos, segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush);
        sealFlushedSegment(fs, sortMap);
        if (infoStream.isEnabled("DWPT")) {
            infoStream.message("DWPT", "flush time " + ((System.nanoTime() - t0) / 1000000.0) + " msec");
        }
        return fs;
    } catch (Throwable th) {
        abort();
        throw AbortingException.wrap(th);
    }
}
Also used : IOContext(org.apache.lucene.store.IOContext) FlushInfo(org.apache.lucene.store.FlushInfo)

Example 3 with FlushInfo

use of org.apache.lucene.store.FlushInfo in project lucene-solr by apache.

the class TermVectorsConsumer method initTermVectorsWriter.

void initTermVectorsWriter() throws IOException {
    if (writer == null) {
        IOContext context = new IOContext(new FlushInfo(docWriter.getNumDocsInRAM(), docWriter.bytesUsed()));
        writer = docWriter.codec.termVectorsFormat().vectorsWriter(docWriter.directory, docWriter.getSegmentInfo(), context);
        lastDocID = 0;
    }
}
Also used : IOContext(org.apache.lucene.store.IOContext) FlushInfo(org.apache.lucene.store.FlushInfo)

Example 4 with FlushInfo

use of org.apache.lucene.store.FlushInfo in project lucene-solr by apache.

the class IndexWriter method addIndexes.

/**
   * Adds all segments from an array of indexes into this index.
   *
   * <p>This may be used to parallelize batch indexing. A large document
   * collection can be broken into sub-collections. Each sub-collection can be
   * indexed in parallel, on a different thread, process or machine. The
   * complete index can then be created by merging sub-collection indexes
   * with this method.
   *
   * <p>
   * <b>NOTE:</b> this method acquires the write lock in
   * each directory, to ensure that no {@code IndexWriter}
   * is currently open or tries to open while this is
   * running.
   *
   * <p>This method is transactional in how Exceptions are
   * handled: it does not commit a new segments_N file until
   * all indexes are added.  This means if an Exception
   * occurs (for example disk full), then either no indexes
   * will have been added or they all will have been.
   *
   * <p>Note that this requires temporary free space in the
   * {@link Directory} up to 2X the sum of all input indexes
   * (including the starting index). If readers/searchers
   * are open against the starting index, then temporary
   * free space required will be higher by the size of the
   * starting index (see {@link #forceMerge(int)} for details).
   *
   * <p>This requires this index not be among those to be added.
   *
   * <p>All added indexes must have been created by the same
   * Lucene version as this index.
   *
   * @return The <a href="#sequence_number">sequence number</a>
   * for this operation
   *
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   * @throws IllegalArgumentException if addIndexes would cause
   *   the index to exceed {@link #MAX_DOCS}, or if the indoming
   *   index sort does not match this index's index sort
   */
public long addIndexes(Directory... dirs) throws IOException {
    ensureOpen();
    noDupDirs(dirs);
    List<Lock> locks = acquireWriteLocks(dirs);
    Sort indexSort = config.getIndexSort();
    boolean successTop = false;
    long seqNo;
    try {
        if (infoStream.isEnabled("IW")) {
            infoStream.message("IW", "flush at addIndexes(Directory...)");
        }
        flush(false, true);
        List<SegmentCommitInfo> infos = new ArrayList<>();
        // long so we can detect int overflow:
        long totalMaxDoc = 0;
        List<SegmentInfos> commits = new ArrayList<>(dirs.length);
        for (Directory dir : dirs) {
            if (infoStream.isEnabled("IW")) {
                infoStream.message("IW", "addIndexes: process directory " + dir);
            }
            // read infos from dir
            SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
            if (segmentInfos.getIndexCreatedVersionMajor() != sis.getIndexCreatedVersionMajor()) {
                throw new IllegalArgumentException("Cannot use addIndexes(Directory) with indexes that have been created " + "by a different Lucene version. The current index was generated by Lucene " + segmentInfos.getIndexCreatedVersionMajor() + " while one of the directories contains an index that was generated with Lucene " + sis.getIndexCreatedVersionMajor());
            }
            totalMaxDoc += sis.totalMaxDoc();
            commits.add(sis);
        }
        // Best-effort up front check:
        testReserveDocs(totalMaxDoc);
        boolean success = false;
        try {
            for (SegmentInfos sis : commits) {
                for (SegmentCommitInfo info : sis) {
                    assert !infos.contains(info) : "dup info dir=" + info.info.dir + " name=" + info.info.name;
                    Sort segmentIndexSort = info.info.getIndexSort();
                    if (indexSort != null && segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) {
                        // TODO: we could make this smarter, e.g. if the incoming indexSort is congruent with our sort ("starts with") then it's OK
                        throw new IllegalArgumentException("cannot change index sort from " + segmentIndexSort + " to " + indexSort);
                    }
                    String newSegName = newSegmentName();
                    if (infoStream.isEnabled("IW")) {
                        infoStream.message("IW", "addIndexes: process segment origName=" + info.info.name + " newName=" + newSegName + " info=" + info);
                    }
                    IOContext context = new IOContext(new FlushInfo(info.info.maxDoc(), info.sizeInBytes()));
                    FieldInfos fis = readFieldInfos(info);
                    for (FieldInfo fi : fis) {
                        // This will throw exceptions if any of the incoming fields have an illegal schema change:
                        globalFieldNumberMap.addOrGet(fi.name, fi.number, fi.getDocValuesType(), fi.getPointDimensionCount(), fi.getPointNumBytes());
                    }
                    infos.add(copySegmentAsIs(info, newSegName, context));
                }
            }
            success = true;
        } finally {
            if (!success) {
                for (SegmentCommitInfo sipc : infos) {
                    // Safe: these files must exist
                    deleteNewFiles(sipc.files());
                }
            }
        }
        synchronized (this) {
            success = false;
            try {
                ensureOpen();
                // Now reserve the docs, just before we update SIS:
                reserveDocs(totalMaxDoc);
                seqNo = docWriter.deleteQueue.getNextSequenceNumber();
                success = true;
            } finally {
                if (!success) {
                    for (SegmentCommitInfo sipc : infos) {
                        // Safe: these files must exist
                        deleteNewFiles(sipc.files());
                    }
                }
            }
            segmentInfos.addAll(infos);
            checkpoint();
        }
        successTop = true;
    } catch (VirtualMachineError tragedy) {
        tragicEvent(tragedy, "addIndexes(Directory...)");
        // dead code but javac disagrees:
        seqNo = -1;
    } finally {
        if (successTop) {
            IOUtils.close(locks);
        } else {
            IOUtils.closeWhileHandlingException(locks);
        }
    }
    maybeMerge();
    return seqNo;
}
Also used : ArrayList(java.util.ArrayList) Lock(org.apache.lucene.store.Lock) Sort(org.apache.lucene.search.Sort) IOContext(org.apache.lucene.store.IOContext) FlushInfo(org.apache.lucene.store.FlushInfo) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 5 with FlushInfo

use of org.apache.lucene.store.FlushInfo in project lucene-solr by apache.

the class ReadersAndUpdates method handleNumericDVUpdates.

@SuppressWarnings("synthetic-access")
private void handleNumericDVUpdates(FieldInfos infos, Map<String, NumericDocValuesFieldUpdates> updates, Directory dir, DocValuesFormat dvFormat, final SegmentReader reader, Map<Integer, Set<String>> fieldFiles) throws IOException {
    for (Entry<String, NumericDocValuesFieldUpdates> e : updates.entrySet()) {
        final String field = e.getKey();
        final NumericDocValuesFieldUpdates fieldUpdates = e.getValue();
        final long nextDocValuesGen = info.getNextDocValuesGen();
        final String segmentSuffix = Long.toString(nextDocValuesGen, Character.MAX_RADIX);
        final long estUpdatesSize = fieldUpdates.ramBytesPerDoc() * info.info.maxDoc();
        final IOContext updatesContext = new IOContext(new FlushInfo(info.info.maxDoc(), estUpdatesSize));
        final FieldInfo fieldInfo = infos.fieldInfo(field);
        assert fieldInfo != null;
        fieldInfo.setDocValuesGen(nextDocValuesGen);
        final FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { fieldInfo });
        // separately also track which files were created for this gen
        final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
        final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix);
        try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) {
            // write the numeric updates to a new gen'd docvalues file
            fieldsConsumer.addNumericField(fieldInfo, new EmptyDocValuesProducer() {

                @Override
                public NumericDocValues getNumeric(FieldInfo fieldInfoIn) throws IOException {
                    if (fieldInfoIn != fieldInfo) {
                        throw new IllegalArgumentException("wrong fieldInfo");
                    }
                    final int maxDoc = reader.maxDoc();
                    final NumericDocValuesFieldUpdates.Iterator updatesIter = fieldUpdates.iterator();
                    final NumericDocValues currentValues = reader.getNumericDocValues(field);
                    updatesIter.reset();
                    // Merge sort of the original doc values with updated doc values:
                    return new NumericDocValues() {

                        // merged docID
                        private int docIDOut = -1;

                        // docID from our original doc values
                        private int docIDIn = -1;

                        // docID from our updates
                        private int updateDocID = -1;

                        private long value;

                        @Override
                        public int docID() {
                            return docIDOut;
                        }

                        @Override
                        public int advance(int target) {
                            throw new UnsupportedOperationException();
                        }

                        @Override
                        public boolean advanceExact(int target) throws IOException {
                            throw new UnsupportedOperationException();
                        }

                        @Override
                        public long cost() {
                            // TODO
                            return 0;
                        }

                        @Override
                        public long longValue() {
                            return value;
                        }

                        @Override
                        public int nextDoc() throws IOException {
                            if (docIDIn == docIDOut) {
                                if (currentValues == null) {
                                    docIDIn = NO_MORE_DOCS;
                                } else {
                                    docIDIn = currentValues.nextDoc();
                                }
                            }
                            if (updateDocID == docIDOut) {
                                updateDocID = updatesIter.nextDoc();
                            }
                            if (docIDIn < updateDocID) {
                                // no update to this doc
                                docIDOut = docIDIn;
                                value = currentValues.longValue();
                            } else {
                                docIDOut = updateDocID;
                                if (docIDOut != NO_MORE_DOCS) {
                                    value = updatesIter.value();
                                }
                            }
                            return docIDOut;
                        }
                    };
                }
            });
        }
        info.advanceDocValuesGen();
        assert !fieldFiles.containsKey(fieldInfo.number);
        fieldFiles.put(fieldInfo.number, trackingDir.getCreatedFiles());
    }
}
Also used : DocValuesConsumer(org.apache.lucene.codecs.DocValuesConsumer) IOException(java.io.IOException) TrackingDirectoryWrapper(org.apache.lucene.store.TrackingDirectoryWrapper) IOContext(org.apache.lucene.store.IOContext) FlushInfo(org.apache.lucene.store.FlushInfo)

Aggregations

FlushInfo (org.apache.lucene.store.FlushInfo)12 IOContext (org.apache.lucene.store.IOContext)12 TrackingDirectoryWrapper (org.apache.lucene.store.TrackingDirectoryWrapper)4 IOException (java.io.IOException)3 DocValuesConsumer (org.apache.lucene.codecs.DocValuesConsumer)3 Directory (org.apache.lucene.store.Directory)3 Codec (org.apache.lucene.codecs.Codec)2 FieldsConsumer (org.apache.lucene.codecs.FieldsConsumer)2 BytesRef (org.apache.lucene.util.BytesRef)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 DocValuesProducer (org.apache.lucene.codecs.DocValuesProducer)1 FieldsProducer (org.apache.lucene.codecs.FieldsProducer)1 NormsConsumer (org.apache.lucene.codecs.NormsConsumer)1 NormsProducer (org.apache.lucene.codecs.NormsProducer)1 StoredFieldsReader (org.apache.lucene.codecs.StoredFieldsReader)1 StoredFieldsWriter (org.apache.lucene.codecs.StoredFieldsWriter)1 TermVectorsReader (org.apache.lucene.codecs.TermVectorsReader)1 TermVectorsWriter (org.apache.lucene.codecs.TermVectorsWriter)1