Search in sources :

Example 21 with ChecksumIndexInput

use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.

the class SimpleTextTermVectorsReader method readIndex.

// we don't actually write a .tvx-like index, instead we read the
// vectors file in entirety up-front and save the offsets
// so we can seek to the data later.
private void readIndex(int maxDoc) throws IOException {
    ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
    offsets = new long[maxDoc];
    int upto = 0;
    while (!scratch.get().equals(END)) {
        SimpleTextUtil.readLine(input, scratch);
        if (StringHelper.startsWith(scratch.get(), DOC)) {
            offsets[upto] = input.getFilePointer();
            upto++;
        }
    }
    SimpleTextUtil.checkFooter(input);
    assert upto == offsets.length;
}
Also used : ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput)

Example 22 with ChecksumIndexInput

use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.

the class BKDWriter method sort.

// useful for debugging:
/*
  private void printPathSlice(String desc, PathSlice slice, int dim) throws IOException {
    System.out.println("    " + desc + " dim=" + dim + " count=" + slice.count + ":");    
    try(PointReader r = slice.writer.getReader(slice.start, slice.count)) {
      int count = 0;
      while (r.next()) {
        byte[] v = r.packedValue();
        System.out.println("      " + count + ": " + new BytesRef(v, dim*bytesPerDim, bytesPerDim));
        count++;
        if (count == slice.count) {
          break;
        }
      }
    }
  }
  */
private PointWriter sort(int dim) throws IOException {
    assert dim >= 0 && dim < numDims;
    if (heapPointWriter != null) {
        assert tempInput == null;
        // We never spilled the incoming points to disk, so now we sort in heap:
        HeapPointWriter sorted;
        if (dim == 0) {
            // First dim can re-use the current heap writer
            sorted = heapPointWriter;
        } else {
            // Subsequent dims need a private copy
            sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength, longOrds, singleValuePerDoc);
            sorted.copyFrom(heapPointWriter);
        }
        //long t0 = System.nanoTime();
        sortHeapPointWriter(sorted, dim);
        //long t1 = System.nanoTime();
        //System.out.println("BKD: sort took " + ((t1-t0)/1000000.0) + " msec");
        sorted.close();
        return sorted;
    } else {
        // Offline sort:
        assert tempInput != null;
        final int offset = bytesPerDim * dim;
        Comparator<BytesRef> cmp;
        if (dim == numDims - 1) {
            // in that case the bytes for the dimension and for the doc id are contiguous,
            // so we don't need a branch
            cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {

                @Override
                protected int byteAt(BytesRef ref, int i) {
                    return ref.bytes[ref.offset + offset + i] & 0xff;
                }
            };
        } else {
            cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {

                @Override
                protected int byteAt(BytesRef ref, int i) {
                    if (i < bytesPerDim) {
                        return ref.bytes[ref.offset + offset + i] & 0xff;
                    } else {
                        return ref.bytes[ref.offset + packedBytesLength + i - bytesPerDim] & 0xff;
                    }
                }
            };
        }
        OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc, null, 0) {

            /** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
            @Override
            protected ByteSequencesWriter getWriter(IndexOutput out, long count) {
                return new ByteSequencesWriter(out) {

                    @Override
                    public void write(byte[] bytes, int off, int len) throws IOException {
                        assert len == bytesPerDoc : "len=" + len + " bytesPerDoc=" + bytesPerDoc;
                        out.writeBytes(bytes, off, len);
                    }
                };
            }

            /** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
            @Override
            protected ByteSequencesReader getReader(ChecksumIndexInput in, String name) throws IOException {
                return new ByteSequencesReader(in, name) {

                    final BytesRef scratch = new BytesRef(new byte[bytesPerDoc]);

                    @Override
                    public BytesRef next() throws IOException {
                        if (in.getFilePointer() >= end) {
                            return null;
                        }
                        in.readBytes(scratch.bytes, 0, bytesPerDoc);
                        return scratch;
                    }
                };
            }
        };
        String name = sorter.sort(tempInput.getName());
        return new OfflinePointWriter(tempDir, name, packedBytesLength, pointCount, longOrds, singleValuePerDoc);
    }
}
Also used : OfflineSorter(org.apache.lucene.util.OfflineSorter) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) BytesRefComparator(org.apache.lucene.util.BytesRefComparator) IndexOutput(org.apache.lucene.store.IndexOutput) BytesRef(org.apache.lucene.util.BytesRef)

Example 23 with ChecksumIndexInput

use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.

the class Lucene50RWSegmentInfoFormat method read.

@Override
public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
    final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
    try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
        Throwable priorE = null;
        SegmentInfo si = null;
        try {
            CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME, Lucene50SegmentInfoFormat.VERSION_START, Lucene50SegmentInfoFormat.VERSION_CURRENT, segmentID, "");
            final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
            final int docCount = input.readInt();
            if (docCount < 0) {
                throw new CorruptIndexException("invalid docCount: " + docCount, input);
            }
            final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
            final Map<String, String> diagnostics = input.readMapOfStrings();
            final Set<String> files = input.readSetOfStrings();
            final Map<String, String> attributes = input.readMapOfStrings();
            si = new SegmentInfo(dir, version, null, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null);
            si.setFiles(files);
        } catch (Throwable exception) {
            priorE = exception;
        } finally {
            CodecUtil.checkFooter(input, priorE);
        }
        return si;
    }
}
Also used : ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) Version(org.apache.lucene.util.Version) SegmentInfo(org.apache.lucene.index.SegmentInfo) CorruptIndexException(org.apache.lucene.index.CorruptIndexException)

Example 24 with ChecksumIndexInput

use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.

the class TestCodecUtil method testCheckFooterValid.

public void testCheckFooterValid() throws Exception {
    RAMFile file = new RAMFile();
    IndexOutput output = new RAMOutputStream(file, true);
    CodecUtil.writeHeader(output, "FooBar", 5);
    output.writeString("this is the data");
    CodecUtil.writeFooter(output);
    output.close();
    ChecksumIndexInput input = new BufferedChecksumIndexInput(new RAMInputStream("file", file));
    Exception mine = new RuntimeException("fake exception");
    RuntimeException expected = expectThrows(RuntimeException.class, () -> {
        CodecUtil.checkFooter(input, mine);
    });
    assertEquals("fake exception", expected.getMessage());
    Throwable[] suppressed = expected.getSuppressed();
    assertEquals(1, suppressed.length);
    assertTrue(suppressed[0].getMessage().contains("checksum passed"));
    input.close();
}
Also used : RAMFile(org.apache.lucene.store.RAMFile) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) RAMInputStream(org.apache.lucene.store.RAMInputStream) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) RAMOutputStream(org.apache.lucene.store.RAMOutputStream) IndexOutput(org.apache.lucene.store.IndexOutput) IOException(java.io.IOException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException)

Example 25 with ChecksumIndexInput

use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.

the class TestCodecUtil method testCheckFooterInvalid.

public void testCheckFooterInvalid() throws Exception {
    RAMFile file = new RAMFile();
    IndexOutput output = new RAMOutputStream(file, true);
    CodecUtil.writeHeader(output, "FooBar", 5);
    output.writeString("this is the data");
    output.writeInt(CodecUtil.FOOTER_MAGIC);
    output.writeInt(0);
    // write a bogus checksum
    output.writeLong(1234567);
    output.close();
    ChecksumIndexInput input = new BufferedChecksumIndexInput(new RAMInputStream("file", file));
    CodecUtil.checkHeader(input, "FooBar", 5, 5);
    assertEquals("this is the data", input.readString());
    Exception mine = new RuntimeException("fake exception");
    RuntimeException expected = expectThrows(RuntimeException.class, () -> {
        CodecUtil.checkFooter(input, mine);
    });
    assertEquals("fake exception", expected.getMessage());
    Throwable[] suppressed = expected.getSuppressed();
    assertEquals(1, suppressed.length);
    assertTrue(suppressed[0].getMessage().contains("checksum failed"));
    input.close();
}
Also used : RAMFile(org.apache.lucene.store.RAMFile) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) RAMInputStream(org.apache.lucene.store.RAMInputStream) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) RAMOutputStream(org.apache.lucene.store.RAMOutputStream) IndexOutput(org.apache.lucene.store.IndexOutput) IOException(java.io.IOException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException)

Aggregations

ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)36 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)19 BufferedChecksumIndexInput (org.apache.lucene.store.BufferedChecksumIndexInput)11 IndexOutput (org.apache.lucene.store.IndexOutput)10 IndexInput (org.apache.lucene.store.IndexInput)9 IOException (java.io.IOException)8 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)6 SegmentInfo (org.apache.lucene.index.SegmentInfo)5 Directory (org.apache.lucene.store.Directory)5 Path (java.nio.file.Path)4 RAMFile (org.apache.lucene.store.RAMFile)4 RAMInputStream (org.apache.lucene.store.RAMInputStream)4 RAMOutputStream (org.apache.lucene.store.RAMOutputStream)4 BytesRef (org.apache.lucene.util.BytesRef)4 Version (org.apache.lucene.util.Version)4 FileChannel (java.nio.channels.FileChannel)3 IndexFormatTooNewException (org.apache.lucene.index.IndexFormatTooNewException)3 IndexFormatTooOldException (org.apache.lucene.index.IndexFormatTooOldException)3 Sort (org.apache.lucene.search.Sort)3 SortField (org.apache.lucene.search.SortField)3