Search in sources :

Example 51 with CorruptIndexException

use of org.apache.lucene.index.CorruptIndexException in project neo4j by neo4j.

the class LuceneSchemaIndexCorruptionTest method shouldRequestIndexPopulationIfTheIndexIsCorrupt.

@Test
public void shouldRequestIndexPopulationIfTheIndexIsCorrupt() throws Exception {
    // Given
    long faultyIndexId = 1;
    CorruptIndexException error = new CorruptIndexException("It's broken.", "");
    LuceneSchemaIndexProvider provider = newFaultySchemaIndexProvider(faultyIndexId, error);
    // When
    NewIndexDescriptor descriptor = NewIndexDescriptorFactory.forLabel(1, 1);
    InternalIndexState initialState = provider.getInitialState(faultyIndexId, descriptor);
    // Then
    assertThat(initialState, equalTo(InternalIndexState.POPULATING));
    logProvider.assertAtLeastOnce(loggedException(error));
}
Also used : InternalIndexState(org.neo4j.kernel.api.index.InternalIndexState) NewIndexDescriptor(org.neo4j.kernel.api.schema_new.index.NewIndexDescriptor) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) Test(org.junit.Test)

Example 52 with CorruptIndexException

use of org.apache.lucene.index.CorruptIndexException in project lucene-solr by apache.

the class CompressingStoredFieldsWriter method merge.

@Override
public int merge(MergeState mergeState) throws IOException {
    int docCount = 0;
    int numReaders = mergeState.maxDocs.length;
    MatchingReaders matching = new MatchingReaders(mergeState);
    if (mergeState.needsIndexSort) {
        /**
       * If all readers are compressed and they have the same fieldinfos then we can merge the serialized document
       * directly.
       */
        List<CompressingStoredFieldsMergeSub> subs = new ArrayList<>();
        for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) {
            if (matching.matchingReaders[i] && mergeState.storedFieldsReaders[i] instanceof CompressingStoredFieldsReader) {
                CompressingStoredFieldsReader storedFieldsReader = (CompressingStoredFieldsReader) mergeState.storedFieldsReaders[i];
                storedFieldsReader.checkIntegrity();
                subs.add(new CompressingStoredFieldsMergeSub(storedFieldsReader, mergeState.docMaps[i], mergeState.maxDocs[i]));
            } else {
                return super.merge(mergeState);
            }
        }
        final DocIDMerger<CompressingStoredFieldsMergeSub> docIDMerger = DocIDMerger.of(subs, true);
        while (true) {
            CompressingStoredFieldsMergeSub sub = docIDMerger.next();
            if (sub == null) {
                break;
            }
            assert sub.mappedDocID == docCount;
            SerializedDocument doc = sub.reader.document(sub.docID);
            startDocument();
            bufferedDocs.copyBytes(doc.in, doc.length);
            numStoredFieldsInDoc = doc.numStoredFields;
            finishDocument();
            ++docCount;
        }
        finish(mergeState.mergeFieldInfos, docCount);
        return docCount;
    }
    for (int readerIndex = 0; readerIndex < numReaders; readerIndex++) {
        MergeVisitor visitor = new MergeVisitor(mergeState, readerIndex);
        CompressingStoredFieldsReader matchingFieldsReader = null;
        if (matching.matchingReaders[readerIndex]) {
            final StoredFieldsReader fieldsReader = mergeState.storedFieldsReaders[readerIndex];
            // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
            if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) {
                matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
            }
        }
        final int maxDoc = mergeState.maxDocs[readerIndex];
        final Bits liveDocs = mergeState.liveDocs[readerIndex];
        // if its some other format, or an older version of this format, or safety switch:
        if (matchingFieldsReader == null || matchingFieldsReader.getVersion() != VERSION_CURRENT || BULK_MERGE_ENABLED == false) {
            // naive merge...
            StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[readerIndex];
            if (storedFieldsReader != null) {
                storedFieldsReader.checkIntegrity();
            }
            for (int docID = 0; docID < maxDoc; docID++) {
                if (liveDocs != null && liveDocs.get(docID) == false) {
                    continue;
                }
                startDocument();
                storedFieldsReader.visitDocument(docID, visitor);
                finishDocument();
                ++docCount;
            }
        } else if (matchingFieldsReader.getCompressionMode() == compressionMode && matchingFieldsReader.getChunkSize() == chunkSize && matchingFieldsReader.getPackedIntsVersion() == PackedInts.VERSION_CURRENT && liveDocs == null && !tooDirty(matchingFieldsReader)) {
            // if the format is older, its always handled by the naive merge case above
            assert matchingFieldsReader.getVersion() == VERSION_CURRENT;
            matchingFieldsReader.checkIntegrity();
            // flush any pending chunks
            if (numBufferedDocs > 0) {
                flush();
                // incomplete: we had to force this flush
                numDirtyChunks++;
            }
            // iterate over each chunk. we use the stored fields index to find chunk boundaries,
            // read the docstart + doccount from the chunk header (we write a new header, since doc numbers will change),
            // and just copy the bytes directly.
            IndexInput rawDocs = matchingFieldsReader.getFieldsStream();
            CompressingStoredFieldsIndexReader index = matchingFieldsReader.getIndexReader();
            rawDocs.seek(index.getStartPointer(0));
            int docID = 0;
            while (docID < maxDoc) {
                // read header
                int base = rawDocs.readVInt();
                if (base != docID) {
                    throw new CorruptIndexException("invalid state: base=" + base + ", docID=" + docID, rawDocs);
                }
                int code = rawDocs.readVInt();
                // write a new index entry and new header for this chunk.
                int bufferedDocs = code >>> 1;
                indexWriter.writeIndex(bufferedDocs, fieldsStream.getFilePointer());
                // rebase
                fieldsStream.writeVInt(docBase);
                fieldsStream.writeVInt(code);
                docID += bufferedDocs;
                docBase += bufferedDocs;
                docCount += bufferedDocs;
                if (docID > maxDoc) {
                    throw new CorruptIndexException("invalid state: base=" + base + ", count=" + bufferedDocs + ", maxDoc=" + maxDoc, rawDocs);
                }
                // copy bytes until the next chunk boundary (or end of chunk data).
                // using the stored fields index for this isn't the most efficient, but fast enough
                // and is a source of redundancy for detecting bad things.
                final long end;
                if (docID == maxDoc) {
                    end = matchingFieldsReader.getMaxPointer();
                } else {
                    end = index.getStartPointer(docID);
                }
                fieldsStream.copyBytes(rawDocs, end - rawDocs.getFilePointer());
            }
            if (rawDocs.getFilePointer() != matchingFieldsReader.getMaxPointer()) {
                throw new CorruptIndexException("invalid state: pos=" + rawDocs.getFilePointer() + ", max=" + matchingFieldsReader.getMaxPointer(), rawDocs);
            }
            // since we bulk merged all chunks, we inherit any dirty ones from this segment.
            numChunks += matchingFieldsReader.getNumChunks();
            numDirtyChunks += matchingFieldsReader.getNumDirtyChunks();
        } else {
            // if the format is older, its always handled by the naive merge case above
            assert matchingFieldsReader.getVersion() == VERSION_CURRENT;
            matchingFieldsReader.checkIntegrity();
            for (int docID = 0; docID < maxDoc; docID++) {
                if (liveDocs != null && liveDocs.get(docID) == false) {
                    continue;
                }
                SerializedDocument doc = matchingFieldsReader.document(docID);
                startDocument();
                bufferedDocs.copyBytes(doc.in, doc.length);
                numStoredFieldsInDoc = doc.numStoredFields;
                finishDocument();
                ++docCount;
            }
        }
    }
    finish(mergeState.mergeFieldInfos, docCount);
    return docCount;
}
Also used : SerializedDocument(org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.SerializedDocument) ArrayList(java.util.ArrayList) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) StoredFieldsReader(org.apache.lucene.codecs.StoredFieldsReader) Bits(org.apache.lucene.util.Bits) IndexInput(org.apache.lucene.store.IndexInput)

Example 53 with CorruptIndexException

use of org.apache.lucene.index.CorruptIndexException in project lucene-solr by apache.

the class SimpleTextSegmentInfoFormat method read.

@Override
public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException {
    BytesRefBuilder scratch = new BytesRefBuilder();
    String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
    try (ChecksumIndexInput input = directory.openChecksumInput(segFileName, context)) {
        SimpleTextUtil.readLine(input, scratch);
        assert StringHelper.startsWith(scratch.get(), SI_VERSION);
        final Version version;
        try {
            version = Version.parse(readString(SI_VERSION.length, scratch));
        } catch (ParseException pe) {
            throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
        }
        SimpleTextUtil.readLine(input, scratch);
        assert StringHelper.startsWith(scratch.get(), SI_MIN_VERSION);
        Version minVersion;
        try {
            String versionString = readString(SI_MIN_VERSION.length, scratch);
            if (versionString.equals("null")) {
                minVersion = null;
            } else {
                minVersion = Version.parse(versionString);
            }
        } catch (ParseException pe) {
            throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
        }
        SimpleTextUtil.readLine(input, scratch);
        assert StringHelper.startsWith(scratch.get(), SI_DOCCOUNT);
        final int docCount = Integer.parseInt(readString(SI_DOCCOUNT.length, scratch));
        SimpleTextUtil.readLine(input, scratch);
        assert StringHelper.startsWith(scratch.get(), SI_USECOMPOUND);
        final boolean isCompoundFile = Boolean.parseBoolean(readString(SI_USECOMPOUND.length, scratch));
        SimpleTextUtil.readLine(input, scratch);
        assert StringHelper.startsWith(scratch.get(), SI_NUM_DIAG);
        int numDiag = Integer.parseInt(readString(SI_NUM_DIAG.length, scratch));
        Map<String, String> diagnostics = new HashMap<>();
        for (int i = 0; i < numDiag; i++) {
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), SI_DIAG_KEY);
            String key = readString(SI_DIAG_KEY.length, scratch);
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), SI_DIAG_VALUE);
            String value = readString(SI_DIAG_VALUE.length, scratch);
            diagnostics.put(key, value);
        }
        SimpleTextUtil.readLine(input, scratch);
        assert StringHelper.startsWith(scratch.get(), SI_NUM_ATT);
        int numAtt = Integer.parseInt(readString(SI_NUM_ATT.length, scratch));
        Map<String, String> attributes = new HashMap<>(numAtt);
        for (int i = 0; i < numAtt; i++) {
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), SI_ATT_KEY);
            String key = readString(SI_ATT_KEY.length, scratch);
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), SI_ATT_VALUE);
            String value = readString(SI_ATT_VALUE.length, scratch);
            attributes.put(key, value);
        }
        SimpleTextUtil.readLine(input, scratch);
        assert StringHelper.startsWith(scratch.get(), SI_NUM_FILES);
        int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch));
        Set<String> files = new HashSet<>();
        for (int i = 0; i < numFiles; i++) {
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), SI_FILE);
            String fileName = readString(SI_FILE.length, scratch);
            files.add(fileName);
        }
        SimpleTextUtil.readLine(input, scratch);
        assert StringHelper.startsWith(scratch.get(), SI_ID);
        final byte[] id = Arrays.copyOfRange(scratch.bytes(), SI_ID.length, scratch.length());
        if (!Arrays.equals(segmentID, id)) {
            throw new CorruptIndexException("file mismatch, expected: " + StringHelper.idToString(segmentID) + ", got: " + StringHelper.idToString(id), input);
        }
        SimpleTextUtil.readLine(input, scratch);
        assert StringHelper.startsWith(scratch.get(), SI_SORT);
        final int numSortFields = Integer.parseInt(readString(SI_SORT.length, scratch));
        SortField[] sortField = new SortField[numSortFields];
        for (int i = 0; i < numSortFields; ++i) {
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), SI_SORT_FIELD);
            final String field = readString(SI_SORT_FIELD.length, scratch);
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), SI_SORT_TYPE);
            final String typeAsString = readString(SI_SORT_TYPE.length, scratch);
            final SortField.Type type;
            SortedSetSelector.Type selectorSet = null;
            SortedNumericSelector.Type selectorNumeric = null;
            switch(typeAsString) {
                case "string":
                    type = SortField.Type.STRING;
                    break;
                case "long":
                    type = SortField.Type.LONG;
                    break;
                case "int":
                    type = SortField.Type.INT;
                    break;
                case "double":
                    type = SortField.Type.DOUBLE;
                    break;
                case "float":
                    type = SortField.Type.FLOAT;
                    break;
                case "multi_valued_string":
                    type = SortField.Type.STRING;
                    selectorSet = readSetSelector(input, scratch);
                    break;
                case "multi_valued_long":
                    type = SortField.Type.LONG;
                    selectorNumeric = readNumericSelector(input, scratch);
                    break;
                case "multi_valued_int":
                    type = SortField.Type.INT;
                    selectorNumeric = readNumericSelector(input, scratch);
                    break;
                case "multi_valued_double":
                    type = SortField.Type.DOUBLE;
                    selectorNumeric = readNumericSelector(input, scratch);
                    break;
                case "multi_valued_float":
                    type = SortField.Type.FLOAT;
                    selectorNumeric = readNumericSelector(input, scratch);
                    break;
                default:
                    throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
            }
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), SI_SORT_REVERSE);
            final boolean reverse = Boolean.parseBoolean(readString(SI_SORT_REVERSE.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), SI_SORT_MISSING);
            final String missingLastAsString = readString(SI_SORT_MISSING.length, scratch);
            final Object missingValue;
            switch(type) {
                case STRING:
                    switch(missingLastAsString) {
                        case "null":
                            missingValue = null;
                            break;
                        case "first":
                            missingValue = SortField.STRING_FIRST;
                            break;
                        case "last":
                            missingValue = SortField.STRING_LAST;
                            break;
                        default:
                            throw new CorruptIndexException("unable to parse missing string: " + typeAsString, input);
                    }
                    break;
                case LONG:
                    switch(missingLastAsString) {
                        case "null":
                            missingValue = null;
                            break;
                        default:
                            missingValue = Long.parseLong(missingLastAsString);
                            break;
                    }
                    break;
                case INT:
                    switch(missingLastAsString) {
                        case "null":
                            missingValue = null;
                            break;
                        default:
                            missingValue = Integer.parseInt(missingLastAsString);
                            break;
                    }
                    break;
                case DOUBLE:
                    switch(missingLastAsString) {
                        case "null":
                            missingValue = null;
                            break;
                        default:
                            missingValue = Double.parseDouble(missingLastAsString);
                            break;
                    }
                    break;
                case FLOAT:
                    switch(missingLastAsString) {
                        case "null":
                            missingValue = null;
                            break;
                        default:
                            missingValue = Float.parseFloat(missingLastAsString);
                            break;
                    }
                    break;
                default:
                    throw new AssertionError();
            }
            if (selectorSet != null) {
                sortField[i] = new SortedSetSortField(field, reverse);
            } else if (selectorNumeric != null) {
                sortField[i] = new SortedNumericSortField(field, type, reverse);
            } else {
                sortField[i] = new SortField(field, type, reverse);
            }
            if (missingValue != null) {
                sortField[i].setMissingValue(missingValue);
            }
        }
        Sort indexSort = sortField.length == 0 ? null : new Sort(sortField);
        SimpleTextUtil.checkFooter(input);
        SegmentInfo info = new SegmentInfo(directory, version, minVersion, segmentName, docCount, isCompoundFile, null, Collections.unmodifiableMap(diagnostics), id, Collections.unmodifiableMap(attributes), indexSort);
        info.setFiles(files);
        return info;
    }
}
Also used : ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) HashMap(java.util.HashMap) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) SortedNumericSelector(org.apache.lucene.search.SortedNumericSelector) Version(org.apache.lucene.util.Version) Sort(org.apache.lucene.search.Sort) SortedSetSelector(org.apache.lucene.search.SortedSetSelector) HashSet(java.util.HashSet) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) SegmentInfo(org.apache.lucene.index.SegmentInfo) ParseException(java.text.ParseException)

Example 54 with CorruptIndexException

use of org.apache.lucene.index.CorruptIndexException in project lucene-solr by apache.

the class Lucene50RWSegmentInfoFormat method read.

@Override
public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
    final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
    try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
        Throwable priorE = null;
        SegmentInfo si = null;
        try {
            CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME, Lucene50SegmentInfoFormat.VERSION_START, Lucene50SegmentInfoFormat.VERSION_CURRENT, segmentID, "");
            final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
            final int docCount = input.readInt();
            if (docCount < 0) {
                throw new CorruptIndexException("invalid docCount: " + docCount, input);
            }
            final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
            final Map<String, String> diagnostics = input.readMapOfStrings();
            final Set<String> files = input.readSetOfStrings();
            final Map<String, String> attributes = input.readMapOfStrings();
            si = new SegmentInfo(dir, version, null, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null);
            si.setFiles(files);
        } catch (Throwable exception) {
            priorE = exception;
        } finally {
            CodecUtil.checkFooter(input, priorE);
        }
        return si;
    }
}
Also used : ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) Version(org.apache.lucene.util.Version) SegmentInfo(org.apache.lucene.index.SegmentInfo) CorruptIndexException(org.apache.lucene.index.CorruptIndexException)

Example 55 with CorruptIndexException

use of org.apache.lucene.index.CorruptIndexException in project lucene-solr by apache.

the class Lucene53NormsProducer method readFields.

private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
    int fieldNumber = meta.readVInt();
    while (fieldNumber != -1) {
        FieldInfo info = infos.fieldInfo(fieldNumber);
        if (info == null) {
            throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
        } else if (!info.hasNorms()) {
            throw new CorruptIndexException("Invalid field: " + info.name, meta);
        }
        NormsEntry entry = new NormsEntry();
        entry.bytesPerValue = meta.readByte();
        switch(entry.bytesPerValue) {
            case 0:
            case 1:
            case 2:
            case 4:
            case 8:
                break;
            default:
                throw new CorruptIndexException("Invalid bytesPerValue: " + entry.bytesPerValue + ", field: " + info.name, meta);
        }
        entry.offset = meta.readLong();
        norms.put(info.number, entry);
        fieldNumber = meta.readVInt();
    }
}
Also used : CorruptIndexException(org.apache.lucene.index.CorruptIndexException) FieldInfo(org.apache.lucene.index.FieldInfo)

Aggregations

CorruptIndexException (org.apache.lucene.index.CorruptIndexException)64 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)19 IndexFormatTooNewException (org.apache.lucene.index.IndexFormatTooNewException)17 IndexFormatTooOldException (org.apache.lucene.index.IndexFormatTooOldException)17 Directory (org.apache.lucene.store.Directory)16 IndexInput (org.apache.lucene.store.IndexInput)16 IndexOutput (org.apache.lucene.store.IndexOutput)15 IOException (java.io.IOException)14 ArrayList (java.util.ArrayList)9 FileNotFoundException (java.io.FileNotFoundException)8 RAMDirectory (org.apache.lucene.store.RAMDirectory)8 BytesRef (org.apache.lucene.util.BytesRef)8 EOFException (java.io.EOFException)7 HashMap (java.util.HashMap)7 IOContext (org.apache.lucene.store.IOContext)7 NoSuchFileException (java.nio.file.NoSuchFileException)6 AlreadyClosedException (org.apache.lucene.store.AlreadyClosedException)6 List (java.util.List)5 ElasticsearchException (org.elasticsearch.ElasticsearchException)5 AccessDeniedException (java.nio.file.AccessDeniedException)4