use of org.apache.lucene.store.ChecksumIndexInput in project crate by crate.
the class CorruptionUtils method corruptFile.
/**
* Corrupts a random file at a random position
*/
public static void corruptFile(Random random, Path... files) throws IOException {
assertTrue("files must be non-empty", files.length > 0);
final Path fileToCorrupt = RandomPicks.randomFrom(random, files);
assertTrue(fileToCorrupt + " is not a file", Files.isRegularFile(fileToCorrupt));
try (Directory dir = FSDirectory.open(fileToCorrupt.toAbsolutePath().getParent())) {
long checksumBeforeCorruption;
try (IndexInput input = dir.openInput(fileToCorrupt.getFileName().toString(), IOContext.DEFAULT)) {
checksumBeforeCorruption = CodecUtil.retrieveChecksum(input);
}
try (FileChannel raf = FileChannel.open(fileToCorrupt, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
long maxPosition = raf.size();
final int position = random.nextInt((int) Math.min(Integer.MAX_VALUE, maxPosition));
corruptAt(fileToCorrupt, raf, position);
}
long checksumAfterCorruption;
long actualChecksumAfterCorruption;
try (ChecksumIndexInput input = dir.openChecksumInput(fileToCorrupt.getFileName().toString(), IOContext.DEFAULT)) {
assertThat(input.getFilePointer(), is(0L));
// one long is the checksum... 8 bytes
input.seek(input.length() - 8);
checksumAfterCorruption = input.getChecksum();
actualChecksumAfterCorruption = input.readLong();
}
// we need to add assumptions here that the checksums actually really don't match there is a small chance to get collisions
// in the checksum which is ok though....
StringBuilder msg = new StringBuilder();
msg.append("before: [").append(checksumBeforeCorruption).append("] ");
msg.append("after: [").append(checksumAfterCorruption).append("] ");
msg.append("checksum value after corruption: ").append(actualChecksumAfterCorruption).append("] ");
msg.append("file: ").append(fileToCorrupt.getFileName()).append(" length: ");
msg.append(dir.fileLength(fileToCorrupt.getFileName().toString()));
logger.info("Checksum {}", msg);
assumeTrue("Checksum collision - " + msg.toString(), // collision
checksumAfterCorruption != checksumBeforeCorruption || // checksum corrupted
actualChecksumAfterCorruption != checksumBeforeCorruption);
assertThat("no file corrupted", fileToCorrupt, notNullValue());
}
}
use of org.apache.lucene.store.ChecksumIndexInput in project crate by crate.
the class StoreTests method testVerifyingIndexInput.
public void testVerifyingIndexInput() throws IOException {
Directory dir = newDirectory();
IndexOutput output = dir.createOutput("foo.bar", IOContext.DEFAULT);
int iters = scaledRandomIntBetween(10, 100);
for (int i = 0; i < iters; i++) {
BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024));
output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
}
CodecUtil.writeFooter(output);
output.close();
// Check file
IndexInput indexInput = dir.openInput("foo.bar", IOContext.DEFAULT);
long checksum = CodecUtil.retrieveChecksum(indexInput);
indexInput.seek(0);
IndexInput verifyingIndexInput = new Store.VerifyingIndexInput(dir.openInput("foo.bar", IOContext.DEFAULT));
readIndexInputFullyWithRandomSeeks(verifyingIndexInput);
Store.verify(verifyingIndexInput);
assertThat(checksum, equalTo(((ChecksumIndexInput) verifyingIndexInput).getChecksum()));
IOUtils.close(indexInput, verifyingIndexInput);
// Corrupt file and check again
corruptFile(dir, "foo.bar", "foo1.bar");
verifyingIndexInput = new Store.VerifyingIndexInput(dir.openInput("foo1.bar", IOContext.DEFAULT));
readIndexInputFullyWithRandomSeeks(verifyingIndexInput);
try {
Store.verify(verifyingIndexInput);
fail("should be a corrupted index");
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
// ok
}
IOUtils.close(verifyingIndexInput);
IOUtils.close(dir);
}
use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class SimpleTextFieldsReader method readFields.
private TreeMap<String, Long> readFields(IndexInput in) throws IOException {
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
BytesRefBuilder scratch = new BytesRefBuilder();
TreeMap<String, Long> fields = new TreeMap<>();
while (true) {
SimpleTextUtil.readLine(input, scratch);
if (scratch.get().equals(END)) {
SimpleTextUtil.checkFooter(input);
return fields;
} else if (StringHelper.startsWith(scratch.get(), FIELD)) {
String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8);
fields.put(fieldName, input.getFilePointer());
}
}
}
use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class SimpleTextSegmentInfoFormat method read.
@Override
public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
try (ChecksumIndexInput input = directory.openChecksumInput(segFileName, context)) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_VERSION);
final Version version;
try {
version = Version.parse(readString(SI_VERSION.length, scratch));
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_MIN_VERSION);
Version minVersion;
try {
String versionString = readString(SI_MIN_VERSION.length, scratch);
if (versionString.equals("null")) {
minVersion = null;
} else {
minVersion = Version.parse(versionString);
}
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DOCCOUNT);
final int docCount = Integer.parseInt(readString(SI_DOCCOUNT.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_USECOMPOUND);
final boolean isCompoundFile = Boolean.parseBoolean(readString(SI_USECOMPOUND.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_NUM_DIAG);
int numDiag = Integer.parseInt(readString(SI_NUM_DIAG.length, scratch));
Map<String, String> diagnostics = new HashMap<>();
for (int i = 0; i < numDiag; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DIAG_KEY);
String key = readString(SI_DIAG_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DIAG_VALUE);
String value = readString(SI_DIAG_VALUE.length, scratch);
diagnostics.put(key, value);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_NUM_ATT);
int numAtt = Integer.parseInt(readString(SI_NUM_ATT.length, scratch));
Map<String, String> attributes = new HashMap<>(numAtt);
for (int i = 0; i < numAtt; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_ATT_KEY);
String key = readString(SI_ATT_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_ATT_VALUE);
String value = readString(SI_ATT_VALUE.length, scratch);
attributes.put(key, value);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_NUM_FILES);
int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch));
Set<String> files = new HashSet<>();
for (int i = 0; i < numFiles; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_FILE);
String fileName = readString(SI_FILE.length, scratch);
files.add(fileName);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_ID);
final byte[] id = Arrays.copyOfRange(scratch.bytes(), SI_ID.length, scratch.length());
if (!Arrays.equals(segmentID, id)) {
throw new CorruptIndexException("file mismatch, expected: " + StringHelper.idToString(segmentID) + ", got: " + StringHelper.idToString(id), input);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SORT);
final int numSortFields = Integer.parseInt(readString(SI_SORT.length, scratch));
SortField[] sortField = new SortField[numSortFields];
for (int i = 0; i < numSortFields; ++i) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SORT_FIELD);
final String field = readString(SI_SORT_FIELD.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SORT_TYPE);
final String typeAsString = readString(SI_SORT_TYPE.length, scratch);
final SortField.Type type;
SortedSetSelector.Type selectorSet = null;
SortedNumericSelector.Type selectorNumeric = null;
switch(typeAsString) {
case "string":
type = SortField.Type.STRING;
break;
case "long":
type = SortField.Type.LONG;
break;
case "int":
type = SortField.Type.INT;
break;
case "double":
type = SortField.Type.DOUBLE;
break;
case "float":
type = SortField.Type.FLOAT;
break;
case "multi_valued_string":
type = SortField.Type.STRING;
selectorSet = readSetSelector(input, scratch);
break;
case "multi_valued_long":
type = SortField.Type.LONG;
selectorNumeric = readNumericSelector(input, scratch);
break;
case "multi_valued_int":
type = SortField.Type.INT;
selectorNumeric = readNumericSelector(input, scratch);
break;
case "multi_valued_double":
type = SortField.Type.DOUBLE;
selectorNumeric = readNumericSelector(input, scratch);
break;
case "multi_valued_float":
type = SortField.Type.FLOAT;
selectorNumeric = readNumericSelector(input, scratch);
break;
default:
throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SORT_REVERSE);
final boolean reverse = Boolean.parseBoolean(readString(SI_SORT_REVERSE.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SORT_MISSING);
final String missingLastAsString = readString(SI_SORT_MISSING.length, scratch);
final Object missingValue;
switch(type) {
case STRING:
switch(missingLastAsString) {
case "null":
missingValue = null;
break;
case "first":
missingValue = SortField.STRING_FIRST;
break;
case "last":
missingValue = SortField.STRING_LAST;
break;
default:
throw new CorruptIndexException("unable to parse missing string: " + typeAsString, input);
}
break;
case LONG:
switch(missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Long.parseLong(missingLastAsString);
break;
}
break;
case INT:
switch(missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Integer.parseInt(missingLastAsString);
break;
}
break;
case DOUBLE:
switch(missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Double.parseDouble(missingLastAsString);
break;
}
break;
case FLOAT:
switch(missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Float.parseFloat(missingLastAsString);
break;
}
break;
default:
throw new AssertionError();
}
if (selectorSet != null) {
sortField[i] = new SortedSetSortField(field, reverse);
} else if (selectorNumeric != null) {
sortField[i] = new SortedNumericSortField(field, type, reverse);
} else {
sortField[i] = new SortField(field, type, reverse);
}
if (missingValue != null) {
sortField[i].setMissingValue(missingValue);
}
}
Sort indexSort = sortField.length == 0 ? null : new Sort(sortField);
SimpleTextUtil.checkFooter(input);
SegmentInfo info = new SegmentInfo(directory, version, minVersion, segmentName, docCount, isCompoundFile, null, Collections.unmodifiableMap(diagnostics), id, Collections.unmodifiableMap(attributes), indexSort);
info.setFiles(files);
return info;
}
}
use of org.apache.lucene.store.ChecksumIndexInput in project lucene-solr by apache.
the class SimpleTextStoredFieldsReader method readIndex.
// we don't actually write a .fdx-like index, instead we read the
// stored fields file in entirety up-front and save the offsets
// so we can seek to the documents later.
private void readIndex(int size) throws IOException {
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
offsets = new long[size];
int upto = 0;
while (!scratch.get().equals(END)) {
SimpleTextUtil.readLine(input, scratch);
if (StringHelper.startsWith(scratch.get(), DOC)) {
offsets[upto] = input.getFilePointer();
upto++;
}
}
SimpleTextUtil.checkFooter(input);
assert upto == offsets.length;
}
Aggregations