Search in sources :

Example 41 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class Node method readLocalFileMetaData.

/** Opens the specified file, reads its identifying information, including file length, full index header (includes the unique segment
   *  ID) and the full footer (includes checksum), and returns the resulting {@link FileMetaData}.
   *
   *  <p>This returns null, logging a message, if there are any problems (the file does not exist, is corrupt, truncated, etc.).</p> */
public FileMetaData readLocalFileMetaData(String fileName) throws IOException {
    Map<String, FileMetaData> cache = lastFileMetaData;
    FileMetaData result;
    if (cache != null) {
        // We may already have this file cached from the last NRT point:
        result = cache.get(fileName);
    } else {
        result = null;
    }
    if (result == null) {
        // Pull from the filesystem
        long checksum;
        long length;
        byte[] header;
        byte[] footer;
        try (IndexInput in = dir.openInput(fileName, IOContext.DEFAULT)) {
            try {
                length = in.length();
                header = CodecUtil.readIndexHeader(in);
                footer = CodecUtil.readFooter(in);
                checksum = CodecUtil.retrieveChecksum(in);
            } catch (EOFException | CorruptIndexException cie) {
                // to delete such unreferenced files, but virus checker can block that, leaving this bad file.
                if (VERBOSE_FILES) {
                    message("file " + fileName + ": will copy [existing file is corrupt]");
                }
                return null;
            }
            if (VERBOSE_FILES) {
                message("file " + fileName + " has length=" + bytesToString(length));
            }
        } catch (FileNotFoundException | NoSuchFileException e) {
            if (VERBOSE_FILES) {
                message("file " + fileName + ": will copy [file does not exist]");
            }
            return null;
        }
        // NOTE: checksum is redundant w/ footer, but we break it out separately because when the bits cross the wire we need direct access to
        // checksum when copying to catch bit flips:
        result = new FileMetaData(header, footer, length, checksum);
    }
    return result;
}
Also used : EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) NoSuchFileException(java.nio.file.NoSuchFileException) IndexInput(org.apache.lucene.store.IndexInput) CorruptIndexException(org.apache.lucene.index.CorruptIndexException)

Example 42 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class CompressingTermVectorsWriter method merge.

@Override
public int merge(MergeState mergeState) throws IOException {
    if (mergeState.needsIndexSort) {
        // being copied over...?
        return super.merge(mergeState);
    }
    int docCount = 0;
    int numReaders = mergeState.maxDocs.length;
    MatchingReaders matching = new MatchingReaders(mergeState);
    for (int readerIndex = 0; readerIndex < numReaders; readerIndex++) {
        CompressingTermVectorsReader matchingVectorsReader = null;
        final TermVectorsReader vectorsReader = mergeState.termVectorsReaders[readerIndex];
        if (matching.matchingReaders[readerIndex]) {
            // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader
            if (vectorsReader != null && vectorsReader instanceof CompressingTermVectorsReader) {
                matchingVectorsReader = (CompressingTermVectorsReader) vectorsReader;
            }
        }
        final int maxDoc = mergeState.maxDocs[readerIndex];
        final Bits liveDocs = mergeState.liveDocs[readerIndex];
        if (matchingVectorsReader != null && matchingVectorsReader.getCompressionMode() == compressionMode && matchingVectorsReader.getChunkSize() == chunkSize && matchingVectorsReader.getVersion() == VERSION_CURRENT && matchingVectorsReader.getPackedIntsVersion() == PackedInts.VERSION_CURRENT && BULK_MERGE_ENABLED && liveDocs == null && !tooDirty(matchingVectorsReader)) {
            // optimized merge, raw byte copy
            // its not worth fine-graining this if there are deletions.
            matchingVectorsReader.checkIntegrity();
            // flush any pending chunks
            if (!pendingDocs.isEmpty()) {
                flush();
                // incomplete: we had to force this flush
                numDirtyChunks++;
            }
            // iterate over each chunk. we use the vectors index to find chunk boundaries,
            // read the docstart + doccount from the chunk header (we write a new header, since doc numbers will change),
            // and just copy the bytes directly.
            IndexInput rawDocs = matchingVectorsReader.getVectorsStream();
            CompressingStoredFieldsIndexReader index = matchingVectorsReader.getIndexReader();
            rawDocs.seek(index.getStartPointer(0));
            int docID = 0;
            while (docID < maxDoc) {
                // read header
                int base = rawDocs.readVInt();
                if (base != docID) {
                    throw new CorruptIndexException("invalid state: base=" + base + ", docID=" + docID, rawDocs);
                }
                int bufferedDocs = rawDocs.readVInt();
                // write a new index entry and new header for this chunk.
                indexWriter.writeIndex(bufferedDocs, vectorsStream.getFilePointer());
                // rebase
                vectorsStream.writeVInt(docCount);
                vectorsStream.writeVInt(bufferedDocs);
                docID += bufferedDocs;
                docCount += bufferedDocs;
                numDocs += bufferedDocs;
                if (docID > maxDoc) {
                    throw new CorruptIndexException("invalid state: base=" + base + ", count=" + bufferedDocs + ", maxDoc=" + maxDoc, rawDocs);
                }
                // copy bytes until the next chunk boundary (or end of chunk data).
                // using the stored fields index for this isn't the most efficient, but fast enough
                // and is a source of redundancy for detecting bad things.
                final long end;
                if (docID == maxDoc) {
                    end = matchingVectorsReader.getMaxPointer();
                } else {
                    end = index.getStartPointer(docID);
                }
                vectorsStream.copyBytes(rawDocs, end - rawDocs.getFilePointer());
            }
            if (rawDocs.getFilePointer() != matchingVectorsReader.getMaxPointer()) {
                throw new CorruptIndexException("invalid state: pos=" + rawDocs.getFilePointer() + ", max=" + matchingVectorsReader.getMaxPointer(), rawDocs);
            }
            // since we bulk merged all chunks, we inherit any dirty ones from this segment.
            numChunks += matchingVectorsReader.getNumChunks();
            numDirtyChunks += matchingVectorsReader.getNumDirtyChunks();
        } else {
            // naive merge...
            if (vectorsReader != null) {
                vectorsReader.checkIntegrity();
            }
            for (int i = 0; i < maxDoc; i++) {
                if (liveDocs != null && liveDocs.get(i) == false) {
                    continue;
                }
                Fields vectors;
                if (vectorsReader == null) {
                    vectors = null;
                } else {
                    vectors = vectorsReader.get(i);
                }
                addAllDocVectors(vectors, mergeState);
                ++docCount;
            }
        }
    }
    finish(mergeState.mergeFieldInfos, docCount);
    return docCount;
}
Also used : Fields(org.apache.lucene.index.Fields) Bits(org.apache.lucene.util.Bits) IndexInput(org.apache.lucene.store.IndexInput) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) TermVectorsReader(org.apache.lucene.codecs.TermVectorsReader)

Example 43 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class HdfsDirectoryTest method assertInputsEquals.

private void assertInputsEquals(String name, Directory fsDir, HdfsDirectory hdfs) throws IOException {
    int reads = random.nextInt(MAX_NUMBER_OF_READS);
    IndexInput fsInput = fsDir.openInput(name, new IOContext());
    IndexInput hdfsInput = hdfs.openInput(name, new IOContext());
    assertEquals(fsInput.length(), hdfsInput.length());
    int fileLength = (int) fsInput.length();
    for (int i = 0; i < reads; i++) {
        int nextInt = Math.min(MAX_BUFFER_SIZE - MIN_BUFFER_SIZE, fileLength);
        byte[] fsBuf = new byte[random.nextInt(nextInt > 0 ? nextInt : 1) + MIN_BUFFER_SIZE];
        byte[] hdfsBuf = new byte[fsBuf.length];
        int offset = random.nextInt(fsBuf.length);
        nextInt = fsBuf.length - offset;
        int length = random.nextInt(nextInt > 0 ? nextInt : 1);
        nextInt = fileLength - length;
        int pos = random.nextInt(nextInt > 0 ? nextInt : 1);
        fsInput.seek(pos);
        fsInput.readBytes(fsBuf, offset, length);
        hdfsInput.seek(pos);
        hdfsInput.readBytes(hdfsBuf, offset, length);
        for (int f = offset; f < length; f++) {
            if (fsBuf[f] != hdfsBuf[f]) {
                fail();
            }
        }
    }
    fsInput.close();
    hdfsInput.close();
}
Also used : IndexInput(org.apache.lucene.store.IndexInput) IOContext(org.apache.lucene.store.IOContext)

Example 44 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class HdfsDirectoryTest method testEof.

private void testEof(String name, Directory directory, long length) throws IOException {
    IndexInput input = directory.openInput(name, new IOContext());
    input.seek(length);
    try {
        input.readByte();
        fail("should throw eof");
    } catch (IOException e) {
    }
}
Also used : IndexInput(org.apache.lucene.store.IndexInput) IOContext(org.apache.lucene.store.IOContext) IOException(java.io.IOException)

Example 45 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class HdfsDirectoryTest method testRename.

public void testRename() throws IOException {
    String[] listAll = directory.listAll();
    for (String file : listAll) {
        directory.deleteFile(file);
    }
    IndexOutput output = directory.createOutput("testing.test", new IOContext());
    output.writeInt(12345);
    output.close();
    directory.rename("testing.test", "testing.test.renamed");
    assertFalse(slowFileExists(directory, "testing.test"));
    assertTrue(slowFileExists(directory, "testing.test.renamed"));
    IndexInput input = directory.openInput("testing.test.renamed", new IOContext());
    assertEquals(12345, input.readInt());
    assertEquals(input.getFilePointer(), input.length());
    input.close();
    directory.deleteFile("testing.test.renamed");
    assertFalse(slowFileExists(directory, "testing.test.renamed"));
}
Also used : IOContext(org.apache.lucene.store.IOContext) IndexInput(org.apache.lucene.store.IndexInput) IndexOutput(org.apache.lucene.store.IndexOutput)

Aggregations

IndexInput (org.apache.lucene.store.IndexInput)173 Directory (org.apache.lucene.store.Directory)75 IndexOutput (org.apache.lucene.store.IndexOutput)75 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)49 IOException (java.io.IOException)26 RAMDirectory (org.apache.lucene.store.RAMDirectory)25 FilterDirectory (org.apache.lucene.store.FilterDirectory)23 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)21 BytesRef (org.apache.lucene.util.BytesRef)18 ArrayList (java.util.ArrayList)17 BufferedChecksumIndexInput (org.apache.lucene.store.BufferedChecksumIndexInput)17 Test (org.junit.Test)17 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)13 NodeBuilder (org.apache.jackrabbit.oak.spi.state.NodeBuilder)10 IndexFormatTooNewException (org.apache.lucene.index.IndexFormatTooNewException)10 IndexFormatTooOldException (org.apache.lucene.index.IndexFormatTooOldException)10 CorruptingIndexOutput (org.apache.lucene.store.CorruptingIndexOutput)10 NRTCachingDirectory (org.apache.lucene.store.NRTCachingDirectory)10 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)9 Relation (org.apache.lucene.index.PointValues.Relation)9