Search in sources :

Example 16 with CorruptIndexException

use of org.apache.lucene.index.CorruptIndexException in project elasticsearch by elastic.

the class Store method failIfCorrupted.

private static void failIfCorrupted(Directory directory, ShardId shardId) throws IOException {
    final String[] files = directory.listAll();
    List<CorruptIndexException> ex = new ArrayList<>();
    for (String file : files) {
        if (file.startsWith(CORRUPTED)) {
            try (ChecksumIndexInput input = directory.openChecksumInput(file, IOContext.READONCE)) {
                int version = CodecUtil.checkHeader(input, CODEC, VERSION_START, VERSION);
                if (version == VERSION_WRITE_THROWABLE) {
                    final int size = input.readVInt();
                    final byte[] buffer = new byte[size];
                    input.readBytes(buffer, 0, buffer.length);
                    StreamInput in = StreamInput.wrap(buffer);
                    Exception t = in.readException();
                    if (t instanceof CorruptIndexException) {
                        ex.add((CorruptIndexException) t);
                    } else {
                        ex.add(new CorruptIndexException(t.getMessage(), "preexisting_corruption", t));
                    }
                } else {
                    assert version == VERSION_START || version == VERSION_STACK_TRACE;
                    String msg = input.readString();
                    StringBuilder builder = new StringBuilder(shardId.toString());
                    builder.append(" Preexisting corrupted index [");
                    builder.append(file).append("] caused by: ");
                    builder.append(msg);
                    if (version == VERSION_STACK_TRACE) {
                        builder.append(System.lineSeparator());
                        builder.append(input.readString());
                    }
                    ex.add(new CorruptIndexException(builder.toString(), "preexisting_corruption"));
                }
                CodecUtil.checkFooter(input);
            }
        }
    }
    if (ex.isEmpty() == false) {
        ExceptionsHelper.rethrowAndSuppress(ex);
    }
}
Also used : ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) ArrayList(java.util.ArrayList) StreamInput(org.elasticsearch.common.io.stream.StreamInput) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IndexNotFoundException(org.apache.lucene.index.IndexNotFoundException) ElasticsearchException(org.elasticsearch.ElasticsearchException) NoSuchFileException(java.nio.file.NoSuchFileException) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) ShardLockObtainFailedException(org.elasticsearch.env.ShardLockObtainFailedException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) AccessDeniedException(java.nio.file.AccessDeniedException) IOException(java.io.IOException) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException)

Example 17 with CorruptIndexException

use of org.apache.lucene.index.CorruptIndexException in project Solbase by Photobucket.

the class DocumentLoader method loadObject.

public CachedObjectWrapper<Document, Long> loadObject(Integer docNum, int start, int end, LayeredCache<Integer, Document, Long, ParsedDoc> cache) throws IOException {
    Document document = new Document();
    Get documentGet = new Get(SolbaseUtil.randomize(docNum));
    if (fieldNames == null || fieldNames.size() == 0) {
        // get all columns ( except this skips meta info )
        documentGet.addFamily(Bytes.toBytes("field"));
    } else {
        for (byte[] fieldName : fieldNames) {
            documentGet.addColumn(Bytes.toBytes("field"), fieldName);
        }
    }
    Result documentResult = null;
    // if docTable is set up, reuse instance, otherwise create brand new one and close after done
    if (this.docTable == null) {
        HTableInterface docTable = null;
        try {
            docTable = SolbaseUtil.getDocTable();
            documentResult = docTable.get(documentGet);
        } finally {
            SolbaseUtil.releaseTable(docTable);
        }
    } else {
        documentResult = this.docTable.get(documentGet);
    }
    if (documentResult == null || documentResult.isEmpty()) {
        return null;
    }
    // TODO, get from result
    Long versionIdentifier = 0l;
    NavigableMap<byte[], byte[]> familyMap = documentResult.getFamilyMap(Bytes.toBytes("field"));
    for (Map.Entry<byte[], byte[]> fieldColumn : familyMap.entrySet()) {
        Field field = null;
        String fieldName = Bytes.toString(fieldColumn.getKey());
        byte[] value;
        ByteBuffer v = ByteBuffer.wrap(fieldColumn.getValue());
        int vlimit = v.limit() + v.arrayOffset();
        if (v.array()[vlimit - 1] != Byte.MAX_VALUE && v.array()[vlimit - 1] != Byte.MIN_VALUE) {
            throw new CorruptIndexException("Solbase field is not properly encoded: " + docNum + "(" + fieldName + ")");
        } else if (v.array()[vlimit - 1] == Byte.MAX_VALUE) {
            // Binary
            value = new byte[vlimit - 1];
            System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
            field = new Field(fieldName, value, Store.YES);
            document.add(field);
        } else if (v.array()[vlimit - 1] == Byte.MIN_VALUE) {
            // String
            value = new byte[vlimit - 1];
            System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
            // Check for multi-fields
            String fieldString = new String(value, "UTF-8");
            if (fieldString.indexOf(Bytes.toString(SolbaseUtil.delimiter)) >= 0) {
                StringTokenizer tok = new StringTokenizer(fieldString, Bytes.toString(SolbaseUtil.delimiter));
                while (tok.hasMoreTokens()) {
                    // update logic
                    if (schema != null) {
                        SchemaField sfield = schema.getFieldOrNull(fieldName);
                        if (sfield.getType() instanceof EmbeddedIndexedIntField) {
                            EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
                            EmbeddedSortField sf = new EmbeddedSortField(fieldName, tok.nextToken(), Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
                            document.add(sf);
                        } else {
                            Field f = sfield.createField(tok.nextToken(), 1.0f);
                            if (f != null) {
                                // null fields are not added
                                document.add(f);
                            }
                        }
                    } else {
                        field = new Field(fieldName, tok.nextToken(), Store.YES, Index.ANALYZED);
                        document.add(field);
                    }
                }
            } else {
                // update logic
                if (schema != null) {
                    SchemaField sfield = schema.getFieldOrNull(fieldName);
                    if (sfield.getType() instanceof EmbeddedIndexedIntField) {
                        EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
                        EmbeddedSortField sf = new EmbeddedSortField(fieldName, fieldString, Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
                        document.add(sf);
                    } else {
                        Field f = sfield.createField(fieldString, 1.0f);
                        if (f != null) {
                            // null fields are not added
                            document.add(f);
                        }
                    }
                } else {
                    field = new Field(fieldName, fieldString, Store.YES, Index.ANALYZED);
                    document.add(field);
                }
            }
        }
    }
    return new CachedObjectWrapper<Document, Long>(document, versionIdentifier, System.currentTimeMillis());
}
Also used : CachedObjectWrapper(org.solbase.cache.CachedObjectWrapper) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) EmbeddedSortField(org.apache.lucene.document.EmbeddedSortField) Document(org.apache.lucene.document.Document) SolrInputDocument(org.apache.solr.common.SolrInputDocument) HTableInterface(org.apache.hadoop.hbase.client.HTableInterface) ByteBuffer(java.nio.ByteBuffer) Result(org.apache.hadoop.hbase.client.Result) SchemaField(org.apache.solr.schema.SchemaField) EmbeddedIndexedIntField(org.apache.solr.schema.EmbeddedIndexedIntField) SchemaField(org.apache.solr.schema.SchemaField) Field(org.apache.lucene.document.Field) EmbeddedSortField(org.apache.lucene.document.EmbeddedSortField) StringTokenizer(java.util.StringTokenizer) Get(org.apache.hadoop.hbase.client.Get) Map(java.util.Map) NavigableMap(java.util.NavigableMap) EmbeddedIndexedIntField(org.apache.solr.schema.EmbeddedIndexedIntField)

Example 18 with CorruptIndexException

use of org.apache.lucene.index.CorruptIndexException in project Solbase by Photobucket.

the class IndexWriter method addTermVector.

public void addTermVector(TermDocMetadata termDocMeta, int startDocId, int endDocId) throws CorruptIndexException, IOException {
    // getting terVector and doc tables
    HTableInterface termVectorTable = SolbaseUtil.getTermVectorTable();
    try {
        byte[] key = termDocMeta.getFieldTermKey();
        ByteBuffer buf = termDocMeta.serialize();
        int docNumber = termDocMeta.getDocId();
        Put put = null;
        switch(TermDocMetadataLoader.storageType) {
            case KEY_ONLY:
                {
                    put = new Put(Bytes.add(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(docNumber)), Bytes.toBytes(buf)));
                    put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(""), Bytes.toBytes(""));
                }
                break;
            case WIDE_ROW:
                int chunkId = TermDocMetadataLoader.getChunkId(docNumber);
                put = new Put(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(chunkId)));
                put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(docNumber), Bytes.toBytes(buf));
                break;
            case NARROW_ROW:
            default:
                {
                    put = new Put(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(docNumber)));
                    put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(""), Bytes.toBytes(buf));
                }
        }
        termVectorTable.put(put);
    } catch (Exception e) {
        logger.error("failed to add term vector: " + termDocMeta.getTerm().toString() + " and docId: " + termDocMeta.docId);
    } finally {
        SolbaseUtil.releaseTable(termVectorTable);
    }
}
Also used : HTableInterface(org.apache.hadoop.hbase.client.HTableInterface) ByteBuffer(java.nio.ByteBuffer) Put(org.apache.hadoop.hbase.client.Put) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IOException(java.io.IOException) SolbaseException(org.solbase.common.SolbaseException)

Example 19 with CorruptIndexException

use of org.apache.lucene.index.CorruptIndexException in project lucene-solr by apache.

the class CodecUtil method checksumEntireFile.

/** 
   * Clones the provided input, reads all bytes from the file, and calls {@link #checkFooter} 
   * <p>
   * Note that this method may be slow, as it must process the entire file.
   * If you just need to extract the checksum value, call {@link #retrieveChecksum}.
   */
public static long checksumEntireFile(IndexInput input) throws IOException {
    IndexInput clone = input.clone();
    clone.seek(0);
    ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
    assert in.getFilePointer() == 0;
    if (in.length() < footerLength()) {
        throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), input);
    }
    in.seek(in.length() - footerLength());
    return checkFooter(in);
}
Also used : ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) IndexInput(org.apache.lucene.store.IndexInput) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) CorruptIndexException(org.apache.lucene.index.CorruptIndexException)

Example 20 with CorruptIndexException

use of org.apache.lucene.index.CorruptIndexException in project lucene-solr by apache.

the class CodecUtil method checkFooter.

/** 
   * Validates the codec footer previously written by {@link #writeFooter}, optionally
   * passing an unexpected exception that has already occurred.
   * <p>
   * When a {@code priorException} is provided, this method will add a suppressed exception 
   * indicating whether the checksum for the stream passes, fails, or cannot be computed, and 
   * rethrow it. Otherwise it behaves the same as {@link #checkFooter(ChecksumIndexInput)}.
   * <p>
   * Example usage:
   * <pre class="prettyprint">
   * try (ChecksumIndexInput input = ...) {
   *   Throwable priorE = null;
   *   try {
   *     // ... read a bunch of stuff ... 
   *   } catch (Throwable exception) {
   *     priorE = exception;
   *   } finally {
   *     CodecUtil.checkFooter(input, priorE);
   *   }
   * }
   * </pre>
   */
public static void checkFooter(ChecksumIndexInput in, Throwable priorException) throws IOException {
    if (priorException == null) {
        checkFooter(in);
    } else {
        try {
            long remaining = in.length() - in.getFilePointer();
            if (remaining < footerLength()) {
                // corruption caused us to read into the checksum footer already: we can't proceed
                priorException.addSuppressed(new CorruptIndexException("checksum status indeterminate: remaining=" + remaining + ", please run checkindex for more details", in));
            } else {
                // otherwise, skip any unread bytes.
                in.skipBytes(remaining - footerLength());
                // now check the footer
                try {
                    long checksum = checkFooter(in);
                    priorException.addSuppressed(new CorruptIndexException("checksum passed (" + Long.toHexString(checksum) + "). possibly transient resource issue, or a Lucene or JVM bug", in));
                } catch (CorruptIndexException t) {
                    priorException.addSuppressed(t);
                }
            }
        } catch (Throwable t) {
            // catch-all for things that shouldn't go wrong (e.g. OOM during readInt) but could...
            priorException.addSuppressed(new CorruptIndexException("checksum status indeterminate: unexpected exception", in, t));
        }
        throw IOUtils.rethrowAlways(priorException);
    }
}
Also used : CorruptIndexException(org.apache.lucene.index.CorruptIndexException)

Aggregations

CorruptIndexException (org.apache.lucene.index.CorruptIndexException)64 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)19 IndexFormatTooNewException (org.apache.lucene.index.IndexFormatTooNewException)17 IndexFormatTooOldException (org.apache.lucene.index.IndexFormatTooOldException)17 Directory (org.apache.lucene.store.Directory)16 IndexInput (org.apache.lucene.store.IndexInput)16 IndexOutput (org.apache.lucene.store.IndexOutput)15 IOException (java.io.IOException)14 ArrayList (java.util.ArrayList)9 FileNotFoundException (java.io.FileNotFoundException)8 RAMDirectory (org.apache.lucene.store.RAMDirectory)8 BytesRef (org.apache.lucene.util.BytesRef)8 EOFException (java.io.EOFException)7 HashMap (java.util.HashMap)7 IOContext (org.apache.lucene.store.IOContext)7 NoSuchFileException (java.nio.file.NoSuchFileException)6 AlreadyClosedException (org.apache.lucene.store.AlreadyClosedException)6 List (java.util.List)5 ElasticsearchException (org.elasticsearch.ElasticsearchException)5 AccessDeniedException (java.nio.file.AccessDeniedException)4