Search in sources :

Example 1 with MergeInfo

use of org.apache.lucene.store.MergeInfo in project lucene-solr by apache.

the class TestSegmentMerger method testMerge.

public void testMerge() throws IOException {
    final Codec codec = Codec.getDefault();
    final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, null, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
    SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(reader1, reader2), si, InfoStream.getDefault(), mergedDir, new FieldInfos.FieldNumbers(), newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1))));
    MergeState mergeState = merger.merge();
    int docsMerged = mergeState.segmentInfo.maxDoc();
    assertTrue(docsMerged == 2);
    //Should be able to open a new SegmentReader against the new directory
    SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(mergeState.segmentInfo, 0, -1L, -1L, -1L), Version.LATEST.major, newIOContext(random()));
    assertTrue(mergedReader != null);
    assertTrue(mergedReader.numDocs() == 2);
    Document newDoc1 = mergedReader.document(0);
    assertTrue(newDoc1 != null);
    //There are 2 unstored fields on the document
    assertTrue(DocHelper.numFields(newDoc1) == DocHelper.numFields(doc1) - DocHelper.unstored.size());
    Document newDoc2 = mergedReader.document(1);
    assertTrue(newDoc2 != null);
    assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size());
    PostingsEnum termDocs = TestUtil.docs(random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), null, 0);
    assertTrue(termDocs != null);
    assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    int tvCount = 0;
    for (FieldInfo fieldInfo : mergedReader.getFieldInfos()) {
        if (fieldInfo.hasVectors()) {
            tvCount++;
        }
    }
    //System.out.println("stored size: " + stored.size());
    assertEquals("We do not have 3 fields that were indexed with term vector", 3, tvCount);
    Terms vector = mergedReader.getTermVectors(0).terms(DocHelper.TEXT_FIELD_2_KEY);
    assertNotNull(vector);
    assertEquals(3, vector.size());
    TermsEnum termsEnum = vector.iterator();
    int i = 0;
    while (termsEnum.next() != null) {
        String term = termsEnum.term().utf8ToString();
        int freq = (int) termsEnum.totalTermFreq();
        //System.out.println("Term: " + term + " Freq: " + freq);
        assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1);
        assertTrue(DocHelper.FIELD_2_FREQS[i] == freq);
        i++;
    }
    TestSegmentReader.checkNorms(mergedReader);
    mergedReader.close();
}
Also used : MergeInfo(org.apache.lucene.store.MergeInfo) Document(org.apache.lucene.document.Document) Codec(org.apache.lucene.codecs.Codec) IOContext(org.apache.lucene.store.IOContext) BytesRef(org.apache.lucene.util.BytesRef)

Example 2 with MergeInfo

use of org.apache.lucene.store.MergeInfo in project lucene-solr by apache.

the class TestDoc method merge.

private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile) throws Exception {
    IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)));
    SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context);
    SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context);
    final Codec codec = Codec.getDefault();
    TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
    final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
    SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2), si, InfoStream.getDefault(), trackingDir, new FieldInfos.FieldNumbers(), context);
    MergeState mergeState = merger.merge();
    r1.close();
    r2.close();
    ;
    si.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
    if (useCompoundFile) {
        Collection<String> filesToDelete = si.files();
        codec.compoundFormat().write(dir, si, context);
        si.setUseCompoundFile(true);
        for (String name : filesToDelete) {
            si1.info.dir.deleteFile(name);
        }
    }
    return new SegmentCommitInfo(si, 0, -1L, -1L, -1L);
}
Also used : MergeInfo(org.apache.lucene.store.MergeInfo) TrackingDirectoryWrapper(org.apache.lucene.store.TrackingDirectoryWrapper) Codec(org.apache.lucene.codecs.Codec) IOContext(org.apache.lucene.store.IOContext)

Example 3 with MergeInfo

use of org.apache.lucene.store.MergeInfo in project lucene-solr by apache.

the class IndexWriter method addIndexes.

/**
   * Merges the provided indexes into this index.
   * 
   * <p>
   * The provided IndexReaders are not closed.
   * 
   * <p>
   * See {@link #addIndexes} for details on transactional semantics, temporary
   * free space required in the Directory, and non-CFS segments on an Exception.
   * 
   * <p>
   * <b>NOTE:</b> empty segments are dropped by this method and not added to this
   * index.
   * 
   * <p>
   * <b>NOTE:</b> this merges all given {@link LeafReader}s in one
   * merge. If you intend to merge a large number of readers, it may be better
   * to call this method multiple times, each time with a small set of readers.
   * In principle, if you use a merge policy with a {@code mergeFactor} or
   * {@code maxMergeAtOnce} parameter, you should pass that many readers in one
   * call.
   * 
   * <p>
   * <b>NOTE:</b> this method does not call or make use of the {@link MergeScheduler},
   * so any custom bandwidth throttling is at the moment ignored.
   * 
   * @return The <a href="#sequence_number">sequence number</a>
   * for this operation
   *
   * @throws CorruptIndexException
   *           if the index is corrupt
   * @throws IOException
   *           if there is a low-level IO error
   * @throws IllegalArgumentException
   *           if addIndexes would cause the index to exceed {@link #MAX_DOCS}
   */
public long addIndexes(CodecReader... readers) throws IOException {
    ensureOpen();
    // long so we can detect int overflow:
    long numDocs = 0;
    Sort indexSort = config.getIndexSort();
    long seqNo;
    try {
        if (infoStream.isEnabled("IW")) {
            infoStream.message("IW", "flush at addIndexes(CodecReader...)");
        }
        flush(false, true);
        String mergedName = newSegmentName();
        for (CodecReader leaf : readers) {
            numDocs += leaf.numDocs();
            validateMergeReader(leaf);
        }
        // Best-effort up front check:
        testReserveDocs(numDocs);
        final IOContext context = new IOContext(new MergeInfo(Math.toIntExact(numDocs), -1, false, UNBOUNDED_MAX_MERGE_SEGMENTS));
        // TODO: somehow we should fix this merge so it's
        // abortable so that IW.close(false) is able to stop it
        TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
        // We set the min version to null for now, it will be set later by SegmentMerger
        SegmentInfo info = new SegmentInfo(directoryOrig, Version.LATEST, null, mergedName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort());
        SegmentMerger merger = new SegmentMerger(Arrays.asList(readers), info, infoStream, trackingDir, globalFieldNumberMap, context);
        if (!merger.shouldMerge()) {
            return docWriter.deleteQueue.getNextSequenceNumber();
        }
        // merge 'em
        merger.merge();
        SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L, -1L);
        info.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
        trackingDir.clearCreatedFiles();
        setDiagnostics(info, SOURCE_ADDINDEXES_READERS);
        final MergePolicy mergePolicy = config.getMergePolicy();
        boolean useCompoundFile;
        synchronized (this) {
            // Guard segmentInfos
            if (stopMerges) {
                // Safe: these files must exist
                deleteNewFiles(infoPerCommit.files());
                return docWriter.deleteQueue.getNextSequenceNumber();
            }
            ensureOpen();
            useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, infoPerCommit, this);
        }
        // Now create the compound file if needed
        if (useCompoundFile) {
            Collection<String> filesToDelete = infoPerCommit.files();
            TrackingDirectoryWrapper trackingCFSDir = new TrackingDirectoryWrapper(directory);
            // createCompoundFile tries to cleanup, but it might not always be able to...
            try {
                createCompoundFile(infoStream, trackingCFSDir, info, context);
            } finally {
                // delete new non cfs files directly: they were never
                // registered with IFD
                deleteNewFiles(filesToDelete);
            }
            info.setUseCompoundFile(true);
        }
        // Have codec write SegmentInfo.  Must do this after
        // creating CFS so that 1) .si isn't slurped into CFS,
        // and 2) .si reflects useCompoundFile=true change
        // above:
        codec.segmentInfoFormat().write(trackingDir, info, context);
        info.addFiles(trackingDir.getCreatedFiles());
        // Register the new segment
        synchronized (this) {
            if (stopMerges) {
                // Safe: these files must exist
                deleteNewFiles(infoPerCommit.files());
                return docWriter.deleteQueue.getNextSequenceNumber();
            }
            ensureOpen();
            // Now reserve the docs, just before we update SIS:
            reserveDocs(numDocs);
            segmentInfos.add(infoPerCommit);
            seqNo = docWriter.deleteQueue.getNextSequenceNumber();
            checkpoint();
        }
    } catch (VirtualMachineError tragedy) {
        tragicEvent(tragedy, "addIndexes(CodecReader...)");
        // dead code but javac disagrees:
        seqNo = -1;
    }
    maybeMerge();
    return seqNo;
}
Also used : MergeInfo(org.apache.lucene.store.MergeInfo) TrackingDirectoryWrapper(org.apache.lucene.store.TrackingDirectoryWrapper) Sort(org.apache.lucene.search.Sort) IOContext(org.apache.lucene.store.IOContext)

Example 4 with MergeInfo

use of org.apache.lucene.store.MergeInfo in project lucene-solr by apache.

the class BlockDirectoryTest method ensureCacheConfigurable.

/**
   * Verify the configuration options for the block cache are handled
   * appropriately.
   */
@Test
public void ensureCacheConfigurable() throws Exception {
    IOContext mergeContext = new IOContext(new MergeInfo(1, 1, false, 1));
    BlockDirectory d = directory;
    assertTrue(d.useReadCache("", IOContext.DEFAULT));
    if (d.getCache() instanceof MapperCache) {
        assertTrue(d.useWriteCache("", IOContext.DEFAULT));
    } else {
        assertFalse(d.useWriteCache("", IOContext.DEFAULT));
    }
    assertFalse(d.useWriteCache("", mergeContext));
    d = new BlockDirectory("test", directory, mapperCache, null, true, false);
    assertTrue(d.useReadCache("", IOContext.DEFAULT));
    assertFalse(d.useWriteCache("", IOContext.DEFAULT));
    assertFalse(d.useWriteCache("", mergeContext));
    d = new BlockDirectory("test", directory, mapperCache, null, false, true);
    assertFalse(d.useReadCache("", IOContext.DEFAULT));
    if (d.getCache() instanceof MapperCache) {
        assertTrue(d.useWriteCache("", IOContext.DEFAULT));
    } else {
        assertFalse(d.useWriteCache("", IOContext.DEFAULT));
    }
    assertFalse(d.useWriteCache("", mergeContext));
}
Also used : MergeInfo(org.apache.lucene.store.MergeInfo) IOContext(org.apache.lucene.store.IOContext) Test(org.junit.Test)

Example 5 with MergeInfo

use of org.apache.lucene.store.MergeInfo in project lucene-solr by apache.

the class LuceneTestCase method newIOContext.

/** TODO: javadoc */
public static IOContext newIOContext(Random random, IOContext oldContext) {
    final int randomNumDocs = random.nextInt(4192);
    final int size = random.nextInt(512) * randomNumDocs;
    if (oldContext.flushInfo != null) {
        // the incoming IOContext:
        return new IOContext(new FlushInfo(randomNumDocs, Math.max(oldContext.flushInfo.estimatedSegmentSize, size)));
    } else if (oldContext.mergeInfo != null) {
        // the incoming IOContext:
        return new IOContext(new MergeInfo(randomNumDocs, Math.max(oldContext.mergeInfo.estimatedMergeBytes, size), random.nextBoolean(), TestUtil.nextInt(random, 1, 100)));
    } else {
        // Make a totally random IOContext:
        final IOContext context;
        switch(random.nextInt(5)) {
            case 0:
                context = IOContext.DEFAULT;
                break;
            case 1:
                context = IOContext.READ;
                break;
            case 2:
                context = IOContext.READONCE;
                break;
            case 3:
                context = new IOContext(new MergeInfo(randomNumDocs, size, true, -1));
                break;
            case 4:
                context = new IOContext(new FlushInfo(randomNumDocs, size));
                break;
            default:
                context = IOContext.DEFAULT;
        }
        return context;
    }
}
Also used : MergeInfo(org.apache.lucene.store.MergeInfo) IOContext(org.apache.lucene.store.IOContext) FlushInfo(org.apache.lucene.store.FlushInfo)

Aggregations

IOContext (org.apache.lucene.store.IOContext)5 MergeInfo (org.apache.lucene.store.MergeInfo)5 Codec (org.apache.lucene.codecs.Codec)2 TrackingDirectoryWrapper (org.apache.lucene.store.TrackingDirectoryWrapper)2 Document (org.apache.lucene.document.Document)1 Sort (org.apache.lucene.search.Sort)1 FlushInfo (org.apache.lucene.store.FlushInfo)1 BytesRef (org.apache.lucene.util.BytesRef)1 Test (org.junit.Test)1