use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class SimpleTextTermVectorsReader method readIndex.
// we don't actually write a .tvx-like index, instead we read the
// vectors file in entirety up-front and save the offsets
// so we can seek to the data later.
private void readIndex(int maxDoc) throws IOException {
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
offsets = new long[maxDoc];
int upto = 0;
while (!scratch.get().equals(END)) {
SimpleTextUtil.readLine(input, scratch);
if (StringHelper.startsWith(scratch.get(), DOC)) {
offsets[upto] = input.getFilePointer();
upto++;
}
}
SimpleTextUtil.checkFooter(input);
assert upto == offsets.length;
}
use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class BKDWriter method sort.
// useful for debugging:
/*
private void printPathSlice(String desc, PathSlice slice, int dim) throws IOException {
System.out.println(" " + desc + " dim=" + dim + " count=" + slice.count + ":");
try(PointReader r = slice.writer.getReader(slice.start, slice.count)) {
int count = 0;
while (r.next()) {
byte[] v = r.packedValue();
System.out.println(" " + count + ": " + new BytesRef(v, dim*bytesPerDim, bytesPerDim));
count++;
if (count == slice.count) {
break;
}
}
}
}
*/
private PointWriter sort(int dim) throws IOException {
assert dim >= 0 && dim < numDims;
if (heapPointWriter != null) {
assert tempInput == null;
// We never spilled the incoming points to disk, so now we sort in heap:
HeapPointWriter sorted;
if (dim == 0) {
// First dim can re-use the current heap writer
sorted = heapPointWriter;
} else {
// Subsequent dims need a private copy
sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength, longOrds, singleValuePerDoc);
sorted.copyFrom(heapPointWriter);
}
//long t0 = System.nanoTime();
sortHeapPointWriter(sorted, dim);
//long t1 = System.nanoTime();
//System.out.println("BKD: sort took " + ((t1-t0)/1000000.0) + " msec");
sorted.close();
return sorted;
} else {
// Offline sort:
assert tempInput != null;
final int offset = bytesPerDim * dim;
Comparator<BytesRef> cmp;
if (dim == numDims - 1) {
// in that case the bytes for the dimension and for the doc id are contiguous,
// so we don't need a branch
cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
@Override
protected int byteAt(BytesRef ref, int i) {
return ref.bytes[ref.offset + offset + i] & 0xff;
}
};
} else {
cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
@Override
protected int byteAt(BytesRef ref, int i) {
if (i < bytesPerDim) {
return ref.bytes[ref.offset + offset + i] & 0xff;
} else {
return ref.bytes[ref.offset + packedBytesLength + i - bytesPerDim] & 0xff;
}
}
};
}
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc, null, 0) {
/** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
@Override
protected ByteSequencesWriter getWriter(IndexOutput out, long count) {
return new ByteSequencesWriter(out) {
@Override
public void write(byte[] bytes, int off, int len) throws IOException {
assert len == bytesPerDoc : "len=" + len + " bytesPerDoc=" + bytesPerDoc;
out.writeBytes(bytes, off, len);
}
};
}
/** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
@Override
protected ByteSequencesReader getReader(ChecksumIndexInput in, String name) throws IOException {
return new ByteSequencesReader(in, name) {
final BytesRef scratch = new BytesRef(new byte[bytesPerDoc]);
@Override
public BytesRef next() throws IOException {
if (in.getFilePointer() >= end) {
return null;
}
in.readBytes(scratch.bytes, 0, bytesPerDoc);
return scratch;
}
};
}
};
String name = sorter.sort(tempInput.getName());
return new OfflinePointWriter(tempDir, name, packedBytesLength, pointCount, longOrds, singleValuePerDoc);
}
}
use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class Lucene50RWSegmentInfoFormat method read.
@Override
public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
Throwable priorE = null;
SegmentInfo si = null;
try {
CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME, Lucene50SegmentInfoFormat.VERSION_START, Lucene50SegmentInfoFormat.VERSION_CURRENT, segmentID, "");
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String, String> diagnostics = input.readMapOfStrings();
final Set<String> files = input.readSetOfStrings();
final Map<String, String> attributes = input.readMapOfStrings();
si = new SegmentInfo(dir, version, null, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null);
si.setFiles(files);
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return si;
}
}
use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class TestCodecUtil method testCheckFooterValid.
public void testCheckFooterValid() throws Exception {
RAMFile file = new RAMFile();
IndexOutput output = new RAMOutputStream(file, true);
CodecUtil.writeHeader(output, "FooBar", 5);
output.writeString("this is the data");
CodecUtil.writeFooter(output);
output.close();
ChecksumIndexInput input = new BufferedChecksumIndexInput(new RAMInputStream("file", file));
Exception mine = new RuntimeException("fake exception");
RuntimeException expected = expectThrows(RuntimeException.class, () -> {
CodecUtil.checkFooter(input, mine);
});
assertEquals("fake exception", expected.getMessage());
Throwable[] suppressed = expected.getSuppressed();
assertEquals(1, suppressed.length);
assertTrue(suppressed[0].getMessage().contains("checksum passed"));
input.close();
}
use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class TestCodecUtil method testCheckFooterInvalid.
public void testCheckFooterInvalid() throws Exception {
RAMFile file = new RAMFile();
IndexOutput output = new RAMOutputStream(file, true);
CodecUtil.writeHeader(output, "FooBar", 5);
output.writeString("this is the data");
output.writeInt(CodecUtil.FOOTER_MAGIC);
output.writeInt(0);
// write a bogus checksum
output.writeLong(1234567);
output.close();
ChecksumIndexInput input = new BufferedChecksumIndexInput(new RAMInputStream("file", file));
CodecUtil.checkHeader(input, "FooBar", 5, 5);
assertEquals("this is the data", input.readString());
Exception mine = new RuntimeException("fake exception");
RuntimeException expected = expectThrows(RuntimeException.class, () -> {
CodecUtil.checkFooter(input, mine);
});
assertEquals("fake exception", expected.getMessage());
Throwable[] suppressed = expected.getSuppressed();
assertEquals(1, suppressed.length);
assertTrue(suppressed[0].getMessage().contains("checksum failed"));
input.close();
}
Aggregations