use of org.apache.lucene.index.CorruptIndexException in project elasticsearch by elastic.
the class Store method failIfCorrupted.
private static void failIfCorrupted(Directory directory, ShardId shardId) throws IOException {
final String[] files = directory.listAll();
List<CorruptIndexException> ex = new ArrayList<>();
for (String file : files) {
if (file.startsWith(CORRUPTED)) {
try (ChecksumIndexInput input = directory.openChecksumInput(file, IOContext.READONCE)) {
int version = CodecUtil.checkHeader(input, CODEC, VERSION_START, VERSION);
if (version == VERSION_WRITE_THROWABLE) {
final int size = input.readVInt();
final byte[] buffer = new byte[size];
input.readBytes(buffer, 0, buffer.length);
StreamInput in = StreamInput.wrap(buffer);
Exception t = in.readException();
if (t instanceof CorruptIndexException) {
ex.add((CorruptIndexException) t);
} else {
ex.add(new CorruptIndexException(t.getMessage(), "preexisting_corruption", t));
}
} else {
assert version == VERSION_START || version == VERSION_STACK_TRACE;
String msg = input.readString();
StringBuilder builder = new StringBuilder(shardId.toString());
builder.append(" Preexisting corrupted index [");
builder.append(file).append("] caused by: ");
builder.append(msg);
if (version == VERSION_STACK_TRACE) {
builder.append(System.lineSeparator());
builder.append(input.readString());
}
ex.add(new CorruptIndexException(builder.toString(), "preexisting_corruption"));
}
CodecUtil.checkFooter(input);
}
}
}
if (ex.isEmpty() == false) {
ExceptionsHelper.rethrowAndSuppress(ex);
}
}
use of org.apache.lucene.index.CorruptIndexException in project Solbase by Photobucket.
the class DocumentLoader method loadObject.
public CachedObjectWrapper<Document, Long> loadObject(Integer docNum, int start, int end, LayeredCache<Integer, Document, Long, ParsedDoc> cache) throws IOException {
Document document = new Document();
Get documentGet = new Get(SolbaseUtil.randomize(docNum));
if (fieldNames == null || fieldNames.size() == 0) {
// get all columns ( except this skips meta info )
documentGet.addFamily(Bytes.toBytes("field"));
} else {
for (byte[] fieldName : fieldNames) {
documentGet.addColumn(Bytes.toBytes("field"), fieldName);
}
}
Result documentResult = null;
// if docTable is set up, reuse instance, otherwise create brand new one and close after done
if (this.docTable == null) {
HTableInterface docTable = null;
try {
docTable = SolbaseUtil.getDocTable();
documentResult = docTable.get(documentGet);
} finally {
SolbaseUtil.releaseTable(docTable);
}
} else {
documentResult = this.docTable.get(documentGet);
}
if (documentResult == null || documentResult.isEmpty()) {
return null;
}
// TODO, get from result
Long versionIdentifier = 0l;
NavigableMap<byte[], byte[]> familyMap = documentResult.getFamilyMap(Bytes.toBytes("field"));
for (Map.Entry<byte[], byte[]> fieldColumn : familyMap.entrySet()) {
Field field = null;
String fieldName = Bytes.toString(fieldColumn.getKey());
byte[] value;
ByteBuffer v = ByteBuffer.wrap(fieldColumn.getValue());
int vlimit = v.limit() + v.arrayOffset();
if (v.array()[vlimit - 1] != Byte.MAX_VALUE && v.array()[vlimit - 1] != Byte.MIN_VALUE) {
throw new CorruptIndexException("Solbase field is not properly encoded: " + docNum + "(" + fieldName + ")");
} else if (v.array()[vlimit - 1] == Byte.MAX_VALUE) {
// Binary
value = new byte[vlimit - 1];
System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
field = new Field(fieldName, value, Store.YES);
document.add(field);
} else if (v.array()[vlimit - 1] == Byte.MIN_VALUE) {
// String
value = new byte[vlimit - 1];
System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
// Check for multi-fields
String fieldString = new String(value, "UTF-8");
if (fieldString.indexOf(Bytes.toString(SolbaseUtil.delimiter)) >= 0) {
StringTokenizer tok = new StringTokenizer(fieldString, Bytes.toString(SolbaseUtil.delimiter));
while (tok.hasMoreTokens()) {
// update logic
if (schema != null) {
SchemaField sfield = schema.getFieldOrNull(fieldName);
if (sfield.getType() instanceof EmbeddedIndexedIntField) {
EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
EmbeddedSortField sf = new EmbeddedSortField(fieldName, tok.nextToken(), Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
document.add(sf);
} else {
Field f = sfield.createField(tok.nextToken(), 1.0f);
if (f != null) {
// null fields are not added
document.add(f);
}
}
} else {
field = new Field(fieldName, tok.nextToken(), Store.YES, Index.ANALYZED);
document.add(field);
}
}
} else {
// update logic
if (schema != null) {
SchemaField sfield = schema.getFieldOrNull(fieldName);
if (sfield.getType() instanceof EmbeddedIndexedIntField) {
EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
EmbeddedSortField sf = new EmbeddedSortField(fieldName, fieldString, Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
document.add(sf);
} else {
Field f = sfield.createField(fieldString, 1.0f);
if (f != null) {
// null fields are not added
document.add(f);
}
}
} else {
field = new Field(fieldName, fieldString, Store.YES, Index.ANALYZED);
document.add(field);
}
}
}
}
return new CachedObjectWrapper<Document, Long>(document, versionIdentifier, System.currentTimeMillis());
}
use of org.apache.lucene.index.CorruptIndexException in project Solbase by Photobucket.
the class IndexWriter method addTermVector.
public void addTermVector(TermDocMetadata termDocMeta, int startDocId, int endDocId) throws CorruptIndexException, IOException {
// getting terVector and doc tables
HTableInterface termVectorTable = SolbaseUtil.getTermVectorTable();
try {
byte[] key = termDocMeta.getFieldTermKey();
ByteBuffer buf = termDocMeta.serialize();
int docNumber = termDocMeta.getDocId();
Put put = null;
switch(TermDocMetadataLoader.storageType) {
case KEY_ONLY:
{
put = new Put(Bytes.add(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(docNumber)), Bytes.toBytes(buf)));
put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(""), Bytes.toBytes(""));
}
break;
case WIDE_ROW:
int chunkId = TermDocMetadataLoader.getChunkId(docNumber);
put = new Put(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(chunkId)));
put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(docNumber), Bytes.toBytes(buf));
break;
case NARROW_ROW:
default:
{
put = new Put(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(docNumber)));
put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(""), Bytes.toBytes(buf));
}
}
termVectorTable.put(put);
} catch (Exception e) {
logger.error("failed to add term vector: " + termDocMeta.getTerm().toString() + " and docId: " + termDocMeta.docId);
} finally {
SolbaseUtil.releaseTable(termVectorTable);
}
}
use of org.apache.lucene.index.CorruptIndexException in project lucene-solr by apache.
the class CodecUtil method checksumEntireFile.
/**
* Clones the provided input, reads all bytes from the file, and calls {@link #checkFooter}
* <p>
* Note that this method may be slow, as it must process the entire file.
* If you just need to extract the checksum value, call {@link #retrieveChecksum}.
*/
public static long checksumEntireFile(IndexInput input) throws IOException {
IndexInput clone = input.clone();
clone.seek(0);
ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
assert in.getFilePointer() == 0;
if (in.length() < footerLength()) {
throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), input);
}
in.seek(in.length() - footerLength());
return checkFooter(in);
}
use of org.apache.lucene.index.CorruptIndexException in project lucene-solr by apache.
the class CodecUtil method checkFooter.
/**
* Validates the codec footer previously written by {@link #writeFooter}, optionally
* passing an unexpected exception that has already occurred.
* <p>
* When a {@code priorException} is provided, this method will add a suppressed exception
* indicating whether the checksum for the stream passes, fails, or cannot be computed, and
* rethrow it. Otherwise it behaves the same as {@link #checkFooter(ChecksumIndexInput)}.
* <p>
* Example usage:
* <pre class="prettyprint">
* try (ChecksumIndexInput input = ...) {
* Throwable priorE = null;
* try {
* // ... read a bunch of stuff ...
* } catch (Throwable exception) {
* priorE = exception;
* } finally {
* CodecUtil.checkFooter(input, priorE);
* }
* }
* </pre>
*/
public static void checkFooter(ChecksumIndexInput in, Throwable priorException) throws IOException {
if (priorException == null) {
checkFooter(in);
} else {
try {
long remaining = in.length() - in.getFilePointer();
if (remaining < footerLength()) {
// corruption caused us to read into the checksum footer already: we can't proceed
priorException.addSuppressed(new CorruptIndexException("checksum status indeterminate: remaining=" + remaining + ", please run checkindex for more details", in));
} else {
// otherwise, skip any unread bytes.
in.skipBytes(remaining - footerLength());
// now check the footer
try {
long checksum = checkFooter(in);
priorException.addSuppressed(new CorruptIndexException("checksum passed (" + Long.toHexString(checksum) + "). possibly transient resource issue, or a Lucene or JVM bug", in));
} catch (CorruptIndexException t) {
priorException.addSuppressed(t);
}
}
} catch (Throwable t) {
// catch-all for things that shouldn't go wrong (e.g. OOM during readInt) but could...
priorException.addSuppressed(new CorruptIndexException("checksum status indeterminate: unexpected exception", in, t));
}
throw IOUtils.rethrowAlways(priorException);
}
}
Aggregations