use of org.apache.lucene.index.CorruptIndexException in project elasticsearch by elastic.
the class StoreTests method testVerifyingIndexOutputOnEmptyFile.
public void testVerifyingIndexOutputOnEmptyFile() throws IOException {
Directory dir = newDirectory();
IndexOutput verifyingOutput = new Store.LuceneVerifyingIndexOutput(new StoreFileMetaData("foo.bar", 0, Store.digestToString(0)), dir.createOutput("foo1.bar", IOContext.DEFAULT));
try {
Store.verify(verifyingOutput);
fail("should be a corrupted index");
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
// ok
}
IOUtils.close(verifyingOutput, dir);
}
use of org.apache.lucene.index.CorruptIndexException in project elasticsearch by elastic.
the class ExceptionSerializationTests method testWriteThrowable.
public void testWriteThrowable() throws IOException {
final QueryShardException queryShardException = new QueryShardException(new Index("foo", "_na_"), "foobar", null);
final UnknownException unknownException = new UnknownException("this exception is unknown", queryShardException);
final Exception[] causes = new Exception[] { new IllegalStateException("foobar"), new IllegalArgumentException("alalaal"), new NullPointerException("boom"), new EOFException("dadada"), new ElasticsearchSecurityException("nono!"), new NumberFormatException("not a number"), new CorruptIndexException("baaaam booom", "this is my resource"), new IndexFormatTooNewException("tooo new", 1, 2, 3), new IndexFormatTooOldException("tooo new", 1, 2, 3), new IndexFormatTooOldException("tooo new", "very old version"), new ArrayIndexOutOfBoundsException("booom"), new StringIndexOutOfBoundsException("booom"), new FileNotFoundException("booom"), new NoSuchFileException("booom"), new AlreadyClosedException("closed!!", new NullPointerException()), new LockObtainFailedException("can't lock directory", new NullPointerException()), unknownException };
for (final Exception cause : causes) {
ElasticsearchException ex = new ElasticsearchException("topLevel", cause);
ElasticsearchException deserialized = serialize(ex);
assertEquals(deserialized.getMessage(), ex.getMessage());
assertTrue("Expected: " + deserialized.getCause().getMessage() + " to contain: " + ex.getCause().getClass().getName() + " but it didn't", deserialized.getCause().getMessage().contains(ex.getCause().getMessage()));
if (ex.getCause().getClass() != UnknownException.class) {
// unknown exception is not directly mapped
assertEquals(deserialized.getCause().getClass(), ex.getCause().getClass());
} else {
assertEquals(deserialized.getCause().getClass(), NotSerializableExceptionWrapper.class);
}
assertArrayEquals(deserialized.getStackTrace(), ex.getStackTrace());
assertTrue(deserialized.getStackTrace().length > 1);
assertVersionSerializable(VersionUtils.randomVersion(random()), cause);
assertVersionSerializable(VersionUtils.randomVersion(random()), ex);
assertVersionSerializable(VersionUtils.randomVersion(random()), deserialized);
}
}
use of org.apache.lucene.index.CorruptIndexException in project lucene-solr by apache.
the class CompressingStoredFieldsWriter method merge.
@Override
public int merge(MergeState mergeState) throws IOException {
int docCount = 0;
int numReaders = mergeState.maxDocs.length;
MatchingReaders matching = new MatchingReaders(mergeState);
if (mergeState.needsIndexSort) {
/**
* If all readers are compressed and they have the same fieldinfos then we can merge the serialized document
* directly.
*/
List<CompressingStoredFieldsMergeSub> subs = new ArrayList<>();
for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) {
if (matching.matchingReaders[i] && mergeState.storedFieldsReaders[i] instanceof CompressingStoredFieldsReader) {
CompressingStoredFieldsReader storedFieldsReader = (CompressingStoredFieldsReader) mergeState.storedFieldsReaders[i];
storedFieldsReader.checkIntegrity();
subs.add(new CompressingStoredFieldsMergeSub(storedFieldsReader, mergeState.docMaps[i], mergeState.maxDocs[i]));
} else {
return super.merge(mergeState);
}
}
final DocIDMerger<CompressingStoredFieldsMergeSub> docIDMerger = DocIDMerger.of(subs, true);
while (true) {
CompressingStoredFieldsMergeSub sub = docIDMerger.next();
if (sub == null) {
break;
}
assert sub.mappedDocID == docCount;
SerializedDocument doc = sub.reader.document(sub.docID);
startDocument();
bufferedDocs.copyBytes(doc.in, doc.length);
numStoredFieldsInDoc = doc.numStoredFields;
finishDocument();
++docCount;
}
finish(mergeState.mergeFieldInfos, docCount);
return docCount;
}
for (int readerIndex = 0; readerIndex < numReaders; readerIndex++) {
MergeVisitor visitor = new MergeVisitor(mergeState, readerIndex);
CompressingStoredFieldsReader matchingFieldsReader = null;
if (matching.matchingReaders[readerIndex]) {
final StoredFieldsReader fieldsReader = mergeState.storedFieldsReaders[readerIndex];
// we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) {
matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
}
}
final int maxDoc = mergeState.maxDocs[readerIndex];
final Bits liveDocs = mergeState.liveDocs[readerIndex];
// if its some other format, or an older version of this format, or safety switch:
if (matchingFieldsReader == null || matchingFieldsReader.getVersion() != VERSION_CURRENT || BULK_MERGE_ENABLED == false) {
// naive merge...
StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[readerIndex];
if (storedFieldsReader != null) {
storedFieldsReader.checkIntegrity();
}
for (int docID = 0; docID < maxDoc; docID++) {
if (liveDocs != null && liveDocs.get(docID) == false) {
continue;
}
startDocument();
storedFieldsReader.visitDocument(docID, visitor);
finishDocument();
++docCount;
}
} else if (matchingFieldsReader.getCompressionMode() == compressionMode && matchingFieldsReader.getChunkSize() == chunkSize && matchingFieldsReader.getPackedIntsVersion() == PackedInts.VERSION_CURRENT && liveDocs == null && !tooDirty(matchingFieldsReader)) {
// if the format is older, its always handled by the naive merge case above
assert matchingFieldsReader.getVersion() == VERSION_CURRENT;
matchingFieldsReader.checkIntegrity();
// flush any pending chunks
if (numBufferedDocs > 0) {
flush();
// incomplete: we had to force this flush
numDirtyChunks++;
}
// iterate over each chunk. we use the stored fields index to find chunk boundaries,
// read the docstart + doccount from the chunk header (we write a new header, since doc numbers will change),
// and just copy the bytes directly.
IndexInput rawDocs = matchingFieldsReader.getFieldsStream();
CompressingStoredFieldsIndexReader index = matchingFieldsReader.getIndexReader();
rawDocs.seek(index.getStartPointer(0));
int docID = 0;
while (docID < maxDoc) {
// read header
int base = rawDocs.readVInt();
if (base != docID) {
throw new CorruptIndexException("invalid state: base=" + base + ", docID=" + docID, rawDocs);
}
int code = rawDocs.readVInt();
// write a new index entry and new header for this chunk.
int bufferedDocs = code >>> 1;
indexWriter.writeIndex(bufferedDocs, fieldsStream.getFilePointer());
// rebase
fieldsStream.writeVInt(docBase);
fieldsStream.writeVInt(code);
docID += bufferedDocs;
docBase += bufferedDocs;
docCount += bufferedDocs;
if (docID > maxDoc) {
throw new CorruptIndexException("invalid state: base=" + base + ", count=" + bufferedDocs + ", maxDoc=" + maxDoc, rawDocs);
}
// copy bytes until the next chunk boundary (or end of chunk data).
// using the stored fields index for this isn't the most efficient, but fast enough
// and is a source of redundancy for detecting bad things.
final long end;
if (docID == maxDoc) {
end = matchingFieldsReader.getMaxPointer();
} else {
end = index.getStartPointer(docID);
}
fieldsStream.copyBytes(rawDocs, end - rawDocs.getFilePointer());
}
if (rawDocs.getFilePointer() != matchingFieldsReader.getMaxPointer()) {
throw new CorruptIndexException("invalid state: pos=" + rawDocs.getFilePointer() + ", max=" + matchingFieldsReader.getMaxPointer(), rawDocs);
}
// since we bulk merged all chunks, we inherit any dirty ones from this segment.
numChunks += matchingFieldsReader.getNumChunks();
numDirtyChunks += matchingFieldsReader.getNumDirtyChunks();
} else {
// if the format is older, its always handled by the naive merge case above
assert matchingFieldsReader.getVersion() == VERSION_CURRENT;
matchingFieldsReader.checkIntegrity();
for (int docID = 0; docID < maxDoc; docID++) {
if (liveDocs != null && liveDocs.get(docID) == false) {
continue;
}
SerializedDocument doc = matchingFieldsReader.document(docID);
startDocument();
bufferedDocs.copyBytes(doc.in, doc.length);
numStoredFieldsInDoc = doc.numStoredFields;
finishDocument();
++docCount;
}
}
}
finish(mergeState.mergeFieldInfos, docCount);
return docCount;
}
use of org.apache.lucene.index.CorruptIndexException in project lucene-solr by apache.
the class SimpleTextSegmentInfoFormat method read.
@Override
public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
try (ChecksumIndexInput input = directory.openChecksumInput(segFileName, context)) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_VERSION);
final Version version;
try {
version = Version.parse(readString(SI_VERSION.length, scratch));
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_MIN_VERSION);
Version minVersion;
try {
String versionString = readString(SI_MIN_VERSION.length, scratch);
if (versionString.equals("null")) {
minVersion = null;
} else {
minVersion = Version.parse(versionString);
}
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DOCCOUNT);
final int docCount = Integer.parseInt(readString(SI_DOCCOUNT.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_USECOMPOUND);
final boolean isCompoundFile = Boolean.parseBoolean(readString(SI_USECOMPOUND.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_NUM_DIAG);
int numDiag = Integer.parseInt(readString(SI_NUM_DIAG.length, scratch));
Map<String, String> diagnostics = new HashMap<>();
for (int i = 0; i < numDiag; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DIAG_KEY);
String key = readString(SI_DIAG_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DIAG_VALUE);
String value = readString(SI_DIAG_VALUE.length, scratch);
diagnostics.put(key, value);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_NUM_ATT);
int numAtt = Integer.parseInt(readString(SI_NUM_ATT.length, scratch));
Map<String, String> attributes = new HashMap<>(numAtt);
for (int i = 0; i < numAtt; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_ATT_KEY);
String key = readString(SI_ATT_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_ATT_VALUE);
String value = readString(SI_ATT_VALUE.length, scratch);
attributes.put(key, value);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_NUM_FILES);
int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch));
Set<String> files = new HashSet<>();
for (int i = 0; i < numFiles; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_FILE);
String fileName = readString(SI_FILE.length, scratch);
files.add(fileName);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_ID);
final byte[] id = Arrays.copyOfRange(scratch.bytes(), SI_ID.length, scratch.length());
if (!Arrays.equals(segmentID, id)) {
throw new CorruptIndexException("file mismatch, expected: " + StringHelper.idToString(segmentID) + ", got: " + StringHelper.idToString(id), input);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SORT);
final int numSortFields = Integer.parseInt(readString(SI_SORT.length, scratch));
SortField[] sortField = new SortField[numSortFields];
for (int i = 0; i < numSortFields; ++i) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SORT_FIELD);
final String field = readString(SI_SORT_FIELD.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SORT_TYPE);
final String typeAsString = readString(SI_SORT_TYPE.length, scratch);
final SortField.Type type;
SortedSetSelector.Type selectorSet = null;
SortedNumericSelector.Type selectorNumeric = null;
switch(typeAsString) {
case "string":
type = SortField.Type.STRING;
break;
case "long":
type = SortField.Type.LONG;
break;
case "int":
type = SortField.Type.INT;
break;
case "double":
type = SortField.Type.DOUBLE;
break;
case "float":
type = SortField.Type.FLOAT;
break;
case "multi_valued_string":
type = SortField.Type.STRING;
selectorSet = readSetSelector(input, scratch);
break;
case "multi_valued_long":
type = SortField.Type.LONG;
selectorNumeric = readNumericSelector(input, scratch);
break;
case "multi_valued_int":
type = SortField.Type.INT;
selectorNumeric = readNumericSelector(input, scratch);
break;
case "multi_valued_double":
type = SortField.Type.DOUBLE;
selectorNumeric = readNumericSelector(input, scratch);
break;
case "multi_valued_float":
type = SortField.Type.FLOAT;
selectorNumeric = readNumericSelector(input, scratch);
break;
default:
throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SORT_REVERSE);
final boolean reverse = Boolean.parseBoolean(readString(SI_SORT_REVERSE.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SORT_MISSING);
final String missingLastAsString = readString(SI_SORT_MISSING.length, scratch);
final Object missingValue;
switch(type) {
case STRING:
switch(missingLastAsString) {
case "null":
missingValue = null;
break;
case "first":
missingValue = SortField.STRING_FIRST;
break;
case "last":
missingValue = SortField.STRING_LAST;
break;
default:
throw new CorruptIndexException("unable to parse missing string: " + typeAsString, input);
}
break;
case LONG:
switch(missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Long.parseLong(missingLastAsString);
break;
}
break;
case INT:
switch(missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Integer.parseInt(missingLastAsString);
break;
}
break;
case DOUBLE:
switch(missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Double.parseDouble(missingLastAsString);
break;
}
break;
case FLOAT:
switch(missingLastAsString) {
case "null":
missingValue = null;
break;
default:
missingValue = Float.parseFloat(missingLastAsString);
break;
}
break;
default:
throw new AssertionError();
}
if (selectorSet != null) {
sortField[i] = new SortedSetSortField(field, reverse);
} else if (selectorNumeric != null) {
sortField[i] = new SortedNumericSortField(field, type, reverse);
} else {
sortField[i] = new SortField(field, type, reverse);
}
if (missingValue != null) {
sortField[i].setMissingValue(missingValue);
}
}
Sort indexSort = sortField.length == 0 ? null : new Sort(sortField);
SimpleTextUtil.checkFooter(input);
SegmentInfo info = new SegmentInfo(directory, version, minVersion, segmentName, docCount, isCompoundFile, null, Collections.unmodifiableMap(diagnostics), id, Collections.unmodifiableMap(attributes), indexSort);
info.setFiles(files);
return info;
}
}
use of org.apache.lucene.index.CorruptIndexException in project lucene-solr by apache.
the class Lucene50RWSegmentInfoFormat method read.
@Override
public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
Throwable priorE = null;
SegmentInfo si = null;
try {
CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME, Lucene50SegmentInfoFormat.VERSION_START, Lucene50SegmentInfoFormat.VERSION_CURRENT, segmentID, "");
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String, String> diagnostics = input.readMapOfStrings();
final Set<String> files = input.readSetOfStrings();
final Map<String, String> attributes = input.readMapOfStrings();
si = new SegmentInfo(dir, version, null, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null);
si.setFiles(files);
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return si;
}
}
Aggregations