use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class Lucene50CompoundFormat method write.
@Override
public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException {
String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
String entriesFile = IndexFileNames.segmentFileName(si.name, "", ENTRIES_EXTENSION);
byte[] expectedID = si.getId();
byte[] id = new byte[StringHelper.ID_LENGTH];
try (IndexOutput data = dir.createOutput(dataFile, context);
IndexOutput entries = dir.createOutput(entriesFile, context)) {
CodecUtil.writeIndexHeader(data, DATA_CODEC, VERSION_CURRENT, si.getId(), "");
CodecUtil.writeIndexHeader(entries, ENTRY_CODEC, VERSION_CURRENT, si.getId(), "");
// write number of files
entries.writeVInt(si.files().size());
for (String file : si.files()) {
// write bytes for file
long startOffset = data.getFilePointer();
try (ChecksumIndexInput in = dir.openChecksumInput(file, IOContext.READONCE)) {
// just copies the index header, verifying that its id matches what we expect
CodecUtil.verifyAndCopyIndexHeader(in, data, si.getId());
// copy all bytes except the footer
long numBytesToCopy = in.length() - CodecUtil.footerLength() - in.getFilePointer();
data.copyBytes(in, numBytesToCopy);
// verify footer (checksum) matches for the incoming file we are copying
long checksum = CodecUtil.checkFooter(in);
// this is poached from CodecUtil.writeFooter, but we need to use our own checksum, not data.getChecksum(), but I think
// adding a public method to CodecUtil to do that is somewhat dangerous:
data.writeInt(CodecUtil.FOOTER_MAGIC);
data.writeInt(0);
data.writeLong(checksum);
}
long endOffset = data.getFilePointer();
long length = endOffset - startOffset;
// write entry for file
entries.writeString(IndexFileNames.stripSegmentName(file));
entries.writeLong(startOffset);
entries.writeLong(length);
}
CodecUtil.writeFooter(data);
CodecUtil.writeFooter(entries);
}
}
use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class Lucene50CompoundReader method readEntries.
/** Helper method that reads CFS entries from an input stream */
private Map<String, FileEntry> readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
Map<String, FileEntry> mapping = null;
try (ChecksumIndexInput entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE)) {
Throwable priorE = null;
try {
version = CodecUtil.checkIndexHeader(entriesStream, Lucene50CompoundFormat.ENTRY_CODEC, Lucene50CompoundFormat.VERSION_START, Lucene50CompoundFormat.VERSION_CURRENT, segmentID, "");
final int numEntries = entriesStream.readVInt();
mapping = new HashMap<>(numEntries);
for (int i = 0; i < numEntries; i++) {
final FileEntry fileEntry = new FileEntry();
final String id = entriesStream.readString();
FileEntry previous = mapping.put(id, fileEntry);
if (previous != null) {
throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS ", entriesStream);
}
fileEntry.offset = entriesStream.readLong();
fileEntry.length = entriesStream.readLong();
}
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(entriesStream, priorE);
}
}
return Collections.unmodifiableMap(mapping);
}
use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class MemoryPostingsFormat method fieldsProducer.
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
final SortedMap<String, TermsReader> fields = new TreeMap<>();
try (ChecksumIndexInput in = state.directory.openChecksumInput(fileName, IOContext.READONCE)) {
Throwable priorE = null;
try {
CodecUtil.checkIndexHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
while (true) {
final int termCount = in.readVInt();
if (termCount == 0) {
break;
}
final TermsReader termsReader = new TermsReader(state.fieldInfos, in, termCount);
// System.out.println("load field=" + termsReader.field.name);
fields.put(termsReader.field.name, termsReader);
}
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(in, priorE);
}
}
return new FieldsProducer() {
@Override
public Iterator<String> iterator() {
return Collections.unmodifiableSet(fields.keySet()).iterator();
}
@Override
public Terms terms(String field) {
return fields.get(field);
}
@Override
public int size() {
return fields.size();
}
@Override
public void close() {
// Drop ref to FST:
for (TermsReader termsReader : fields.values()) {
termsReader.fst = null;
}
}
@Override
public long ramBytesUsed() {
long sizeInBytes = 0;
for (Map.Entry<String, TermsReader> entry : fields.entrySet()) {
sizeInBytes += (entry.getKey().length() * Character.BYTES);
sizeInBytes += entry.getValue().ramBytesUsed();
}
return sizeInBytes;
}
@Override
public Collection<Accountable> getChildResources() {
return Accountables.namedAccountables("field", fields);
}
@Override
public String toString() {
return "MemoryPostings(fields=" + fields.size() + ")";
}
@Override
public void checkIntegrity() throws IOException {
}
};
}
use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class SimpleTextBKDWriter method sort.
private PointWriter sort(int dim) throws IOException {
assert dim >= 0 && dim < numDims;
if (heapPointWriter != null) {
assert tempInput == null;
// We never spilled the incoming points to disk, so now we sort in heap:
HeapPointWriter sorted;
if (dim == 0) {
// First dim can re-use the current heap writer
sorted = heapPointWriter;
} else {
// Subsequent dims need a private copy
sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength, longOrds, singleValuePerDoc);
sorted.copyFrom(heapPointWriter);
}
//long t0 = System.nanoTime();
sortHeapPointWriter(sorted, dim);
//long t1 = System.nanoTime();
//System.out.println("BKD: sort took " + ((t1-t0)/1000000.0) + " msec");
sorted.close();
return sorted;
} else {
// Offline sort:
assert tempInput != null;
final int offset = bytesPerDim * dim;
Comparator<BytesRef> cmp;
if (dim == numDims - 1) {
// in that case the bytes for the dimension and for the doc id are contiguous,
// so we don't need a branch
cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
@Override
protected int byteAt(BytesRef ref, int i) {
return ref.bytes[ref.offset + offset + i] & 0xff;
}
};
} else {
cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
@Override
protected int byteAt(BytesRef ref, int i) {
if (i < bytesPerDim) {
return ref.bytes[ref.offset + offset + i] & 0xff;
} else {
return ref.bytes[ref.offset + packedBytesLength + i - bytesPerDim] & 0xff;
}
}
};
}
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc, null, 0) {
/** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
@Override
protected ByteSequencesWriter getWriter(IndexOutput out, long count) {
return new ByteSequencesWriter(out) {
@Override
public void write(byte[] bytes, int off, int len) throws IOException {
assert len == bytesPerDoc : "len=" + len + " bytesPerDoc=" + bytesPerDoc;
out.writeBytes(bytes, off, len);
}
};
}
/** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
@Override
protected ByteSequencesReader getReader(ChecksumIndexInput in, String name) throws IOException {
return new ByteSequencesReader(in, name) {
final BytesRef scratch = new BytesRef(new byte[bytesPerDoc]);
@Override
public BytesRef next() throws IOException {
if (in.getFilePointer() >= end) {
return null;
}
in.readBytes(scratch.bytes, 0, bytesPerDoc);
return scratch;
}
};
}
};
String name = sorter.sort(tempInput.getName());
return new OfflinePointWriter(tempDir, name, packedBytesLength, pointCount, longOrds, singleValuePerDoc);
}
}
use of org.apache.lucene.store.ChecksumIndexInput in project crate by crate.
the class Store method failIfCorrupted.
private static void failIfCorrupted(Directory directory) throws IOException {
final String[] files = directory.listAll();
List<CorruptIndexException> ex = new ArrayList<>();
for (String file : files) {
if (file.startsWith(CORRUPTED_MARKER_NAME_PREFIX)) {
try (ChecksumIndexInput input = directory.openChecksumInput(file, IOContext.READONCE)) {
CodecUtil.checkHeader(input, CODEC, CORRUPTED_MARKER_CODEC_VERSION, CORRUPTED_MARKER_CODEC_VERSION);
final int size = input.readVInt();
final byte[] buffer = new byte[size];
input.readBytes(buffer, 0, buffer.length);
StreamInput in = StreamInput.wrap(buffer);
Exception t = in.readException();
if (t instanceof CorruptIndexException) {
ex.add((CorruptIndexException) t);
} else {
ex.add(new CorruptIndexException(t.getMessage(), "preexisting_corruption", t));
}
CodecUtil.checkFooter(input);
}
}
}
if (ex.isEmpty() == false) {
ExceptionsHelper.rethrowAndSuppress(ex);
}
}
Aggregations