Search in sources :

Example 6 with IndexOutput

use of org.apache.lucene.store.IndexOutput in project elasticsearch by elastic.

the class StoreTests method testCanReadOldCorruptionMarker.

public void testCanReadOldCorruptionMarker() throws IOException {
    final ShardId shardId = new ShardId("index", "_na_", 1);
    // I use ram dir to prevent that virusscanner being a PITA
    final Directory dir = new RAMDirectory();
    DirectoryService directoryService = new DirectoryService(shardId, INDEX_SETTINGS) {

        @Override
        public Directory newDirectory() throws IOException {
            return dir;
        }
    };
    Store store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId));
    CorruptIndexException exception = new CorruptIndexException("foo", "bar");
    String uuid = Store.CORRUPTED + UUIDs.randomBase64UUID();
    try (IndexOutput output = dir.createOutput(uuid, IOContext.DEFAULT)) {
        CodecUtil.writeHeader(output, Store.CODEC, Store.VERSION_STACK_TRACE);
        output.writeString(ExceptionsHelper.detailedMessage(exception));
        output.writeString(ExceptionsHelper.stackTrace(exception));
        CodecUtil.writeFooter(output);
    }
    try {
        store.failIfCorrupted();
        fail("should be corrupted");
    } catch (CorruptIndexException e) {
        assertTrue(e.getMessage().startsWith("[index][1] Preexisting corrupted index [" + uuid + "] caused by: CorruptIndexException[foo (resource=bar)]"));
        assertTrue(e.getMessage().contains(ExceptionsHelper.stackTrace(exception)));
    }
    store.removeCorruptionMarker();
    try (IndexOutput output = dir.createOutput(uuid, IOContext.DEFAULT)) {
        CodecUtil.writeHeader(output, Store.CODEC, Store.VERSION_START);
        output.writeString(ExceptionsHelper.detailedMessage(exception));
        CodecUtil.writeFooter(output);
    }
    try {
        store.failIfCorrupted();
        fail("should be corrupted");
    } catch (CorruptIndexException e) {
        assertTrue(e.getMessage().startsWith("[index][1] Preexisting corrupted index [" + uuid + "] caused by: CorruptIndexException[foo (resource=bar)]"));
        assertFalse(e.getMessage().contains(ExceptionsHelper.stackTrace(exception)));
    }
    store.removeCorruptionMarker();
    try (IndexOutput output = dir.createOutput(uuid, IOContext.DEFAULT)) {
        // corrupted header
        CodecUtil.writeHeader(output, Store.CODEC, Store.VERSION_START - 1);
        CodecUtil.writeFooter(output);
    }
    try {
        store.failIfCorrupted();
        fail("should be too old");
    } catch (IndexFormatTooOldException e) {
    }
    store.removeCorruptionMarker();
    try (IndexOutput output = dir.createOutput(uuid, IOContext.DEFAULT)) {
        // corrupted header
        CodecUtil.writeHeader(output, Store.CODEC, Store.VERSION + 1);
        CodecUtil.writeFooter(output);
    }
    try {
        store.failIfCorrupted();
        fail("should be too new");
    } catch (IndexFormatTooNewException e) {
    }
    store.close();
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IndexOutput(org.apache.lucene.store.IndexOutput) DummyShardLock(org.elasticsearch.test.DummyShardLock) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory)

Example 7 with IndexOutput

use of org.apache.lucene.store.IndexOutput in project elasticsearch by elastic.

the class StoreTests method testChecksumCorrupted.

public void testChecksumCorrupted() throws IOException {
    Directory dir = newDirectory();
    IndexOutput output = dir.createOutput("foo.bar", IOContext.DEFAULT);
    int iters = scaledRandomIntBetween(10, 100);
    for (int i = 0; i < iters; i++) {
        BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024));
        output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
    }
    output.writeInt(CodecUtil.FOOTER_MAGIC);
    output.writeInt(0);
    String checksum = Store.digestToString(output.getChecksum());
    // write a wrong checksum to the file
    output.writeLong(output.getChecksum() + 1);
    output.close();
    IndexInput indexInput = dir.openInput("foo.bar", IOContext.DEFAULT);
    indexInput.seek(0);
    BytesRef ref = new BytesRef(scaledRandomIntBetween(1, 1024));
    long length = indexInput.length();
    IndexOutput verifyingOutput = new Store.LuceneVerifyingIndexOutput(new StoreFileMetaData("foo1.bar", length, checksum), dir.createOutput("foo1.bar", IOContext.DEFAULT));
    // we write the checksum in the try / catch block below
    length -= 8;
    while (length > 0) {
        if (random().nextInt(10) == 0) {
            verifyingOutput.writeByte(indexInput.readByte());
            length--;
        } else {
            int min = (int) Math.min(length, ref.bytes.length);
            indexInput.readBytes(ref.bytes, ref.offset, min);
            verifyingOutput.writeBytes(ref.bytes, ref.offset, min);
            length -= min;
        }
    }
    try {
        BytesRef checksumBytes = new BytesRef(8);
        checksumBytes.length = 8;
        indexInput.readBytes(checksumBytes.bytes, checksumBytes.offset, checksumBytes.length);
        if (randomBoolean()) {
            verifyingOutput.writeBytes(checksumBytes.bytes, checksumBytes.offset, checksumBytes.length);
        } else {
            for (int i = 0; i < checksumBytes.length; i++) {
                verifyingOutput.writeByte(checksumBytes.bytes[i]);
            }
        }
        fail("should be a corrupted index");
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
    // ok
    }
    IOUtils.close(indexInput, verifyingOutput, dir);
}
Also used : CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IndexOutput(org.apache.lucene.store.IndexOutput) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) IndexInput(org.apache.lucene.store.IndexInput) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory)

Example 8 with IndexOutput

use of org.apache.lucene.store.IndexOutput in project elasticsearch by elastic.

the class StoreTests method testVerifyingIndexOutputWithBogusInput.

public void testVerifyingIndexOutputWithBogusInput() throws IOException {
    Directory dir = newDirectory();
    int length = scaledRandomIntBetween(10, 1024);
    IndexOutput verifyingOutput = new Store.LuceneVerifyingIndexOutput(new StoreFileMetaData("foo1.bar", length, ""), dir.createOutput("foo1.bar", IOContext.DEFAULT));
    try {
        while (length > 0) {
            verifyingOutput.writeByte((byte) random().nextInt());
            length--;
        }
        fail("should be a corrupted index");
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
    // ok
    }
    IOUtils.close(verifyingOutput, dir);
}
Also used : IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IndexOutput(org.apache.lucene.store.IndexOutput) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory)

Example 9 with IndexOutput

use of org.apache.lucene.store.IndexOutput in project elasticsearch by elastic.

the class XAnalyzingSuggester method build.

@Override
public void build(InputIterator iterator) throws IOException {
    String prefix = getClass().getSimpleName();
    Directory tempDir = getTempDir();
    OfflineSorter sorter = new OfflineSorter(tempDir, prefix, new AnalyzingComparator(hasPayloads));
    IndexOutput tempInput = tempDir.createTempOutput(prefix, "input", IOContext.DEFAULT);
    OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
    OfflineSorter.ByteSequencesReader reader = null;
    hasPayloads = iterator.hasPayloads();
    BytesRefBuilder scratch = new BytesRefBuilder();
    TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
    String tempSortedFileName = null;
    count = 0;
    byte[] buffer = new byte[8];
    try {
        ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
        for (BytesRef surfaceForm; (surfaceForm = iterator.next()) != null; ) {
            LimitedFiniteStringsIterator finiteStrings = new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions);
            for (IntsRef string; (string = finiteStrings.next()) != null; count++) {
                Util.toBytesRef(string, scratch);
                // length of the analyzed text (FST input)
                if (scratch.length() > Short.MAX_VALUE - 2) {
                    throw new IllegalArgumentException("cannot handle analyzed forms > " + (Short.MAX_VALUE - 2) + " in length (got " + scratch.length() + ")");
                }
                short analyzedLength = (short) scratch.length();
                // compute the required length:
                // analyzed sequence + weight (4) + surface + analyzedLength (short)
                int requiredLength = analyzedLength + 4 + surfaceForm.length + 2;
                BytesRef payload;
                if (hasPayloads) {
                    if (surfaceForm.length > (Short.MAX_VALUE - 2)) {
                        throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE - 2) + " in length (got " + surfaceForm.length + ")");
                    }
                    payload = iterator.payload();
                    // payload + surfaceLength (short)
                    requiredLength += payload.length + 2;
                } else {
                    payload = null;
                }
                buffer = ArrayUtil.grow(buffer, requiredLength);
                output.reset(buffer);
                output.writeShort(analyzedLength);
                output.writeBytes(scratch.bytes(), 0, scratch.length());
                output.writeInt(encodeWeight(iterator.weight()));
                if (hasPayloads) {
                    for (int i = 0; i < surfaceForm.length; i++) {
                        if (surfaceForm.bytes[i] == payloadSep) {
                            throw new IllegalArgumentException("surface form cannot contain unit separator character U+001F; this character is reserved");
                        }
                    }
                    output.writeShort((short) surfaceForm.length);
                    output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
                    output.writeBytes(payload.bytes, payload.offset, payload.length);
                } else {
                    output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
                }
                assert output.getPosition() == requiredLength : output.getPosition() + " vs " + requiredLength;
                writer.write(buffer, 0, output.getPosition());
            }
            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, finiteStrings.size());
        }
        writer.close();
        // Sort all input/output pairs (required by FST.Builder):
        tempSortedFileName = sorter.sort(tempInput.getName());
        // Free disk space:
        tempDir.deleteFile(tempInput.getName());
        reader = new OfflineSorter.ByteSequencesReader(tempDir.openChecksumInput(tempSortedFileName, IOContext.READONCE), prefix);
        PairOutputs<Long, BytesRef> outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
        Builder<Pair<Long, BytesRef>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
        // Build FST:
        BytesRefBuilder previousAnalyzed = null;
        BytesRefBuilder analyzed = new BytesRefBuilder();
        BytesRef surface = new BytesRef();
        IntsRefBuilder scratchInts = new IntsRefBuilder();
        ByteArrayDataInput input = new ByteArrayDataInput();
        // Used to remove duplicate surface forms (but we
        // still index the hightest-weight one).  We clear
        // this when we see a new analyzed form, so it cannot
        // grow unbounded (at most 256 entries):
        Set<BytesRef> seenSurfaceForms = new HashSet<>();
        int dedup = 0;
        while (true) {
            BytesRef bytes = reader.next();
            if (bytes == null) {
                break;
            }
            input.reset(bytes.bytes, bytes.offset, bytes.length);
            short analyzedLength = input.readShort();
            analyzed.grow(analyzedLength + 2);
            input.readBytes(analyzed.bytes(), 0, analyzedLength);
            analyzed.setLength(analyzedLength);
            long cost = input.readInt();
            surface.bytes = bytes.bytes;
            if (hasPayloads) {
                surface.length = input.readShort();
                surface.offset = input.getPosition();
            } else {
                surface.offset = input.getPosition();
                surface.length = bytes.length - surface.offset;
            }
            if (previousAnalyzed == null) {
                previousAnalyzed = new BytesRefBuilder();
                previousAnalyzed.copyBytes(analyzed);
                seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
            } else if (analyzed.get().equals(previousAnalyzed.get())) {
                dedup++;
                if (dedup >= maxSurfaceFormsPerAnalyzedForm) {
                    // dups: skip the rest:
                    continue;
                }
                if (seenSurfaceForms.contains(surface)) {
                    continue;
                }
                seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
            } else {
                dedup = 0;
                previousAnalyzed.copyBytes(analyzed);
                seenSurfaceForms.clear();
                seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
            }
            // TODO: I think we can avoid the extra 2 bytes when
            // there is no dup (dedup==0), but we'd have to fix
            // the exactFirst logic ... which would be sort of
            // hairy because we'd need to special case the two
            // (dup/not dup)...
            // NOTE: must be byte 0 so we sort before whatever
            // is next
            analyzed.append((byte) 0);
            analyzed.append((byte) dedup);
            Util.toIntsRef(analyzed.get(), scratchInts);
            //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
            if (!hasPayloads) {
                builder.add(scratchInts.get(), outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
            } else {
                int payloadOffset = input.getPosition() + surface.length;
                int payloadLength = bytes.length - payloadOffset;
                BytesRef br = new BytesRef(surface.length + 1 + payloadLength);
                System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
                br.bytes[surface.length] = (byte) payloadSep;
                System.arraycopy(bytes.bytes, payloadOffset, br.bytes, surface.length + 1, payloadLength);
                br.length = br.bytes.length;
                builder.add(scratchInts.get(), outputs.newPair(cost, br));
            }
        }
        fst = builder.finish();
    //PrintWriter pw = new PrintWriter("/tmp/out.dot");
    //Util.toDot(fst, pw, true, true);
    //pw.close();
    } finally {
        IOUtils.closeWhileHandlingException(reader, writer);
        IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName);
    }
}
Also used : OfflineSorter(org.apache.lucene.util.OfflineSorter) Builder(org.apache.lucene.util.fst.Builder) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) PairOutputs(org.apache.lucene.util.fst.PairOutputs) IntsRef(org.apache.lucene.util.IntsRef) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory) Pair(org.apache.lucene.util.fst.PairOutputs.Pair) HashSet(java.util.HashSet) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IndexOutput(org.apache.lucene.store.IndexOutput) LimitedFiniteStringsIterator(org.apache.lucene.util.automaton.LimitedFiniteStringsIterator) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) ByteArrayDataOutput(org.apache.lucene.store.ByteArrayDataOutput) TokenStreamToAutomaton(org.apache.lucene.analysis.TokenStreamToAutomaton)

Example 10 with IndexOutput

use of org.apache.lucene.store.IndexOutput in project elasticsearch by elastic.

the class InputStreamIndexInputTests method testSingleReadSingleByteLimit.

public void testSingleReadSingleByteLimit() throws IOException {
    RAMDirectory dir = new RAMDirectory();
    IndexOutput output = dir.createOutput("test", IOContext.DEFAULT);
    for (int i = 0; i < 3; i++) {
        output.writeByte((byte) 1);
    }
    for (int i = 0; i < 3; i++) {
        output.writeByte((byte) 2);
    }
    output.close();
    IndexInput input = dir.openInput("test", IOContext.DEFAULT);
    for (int i = 0; i < 3; i++) {
        InputStreamIndexInput is = new InputStreamIndexInput(input, 1);
        assertThat(input.getFilePointer(), lessThan(input.length()));
        assertThat(is.actualSizeToRead(), equalTo(1L));
        assertThat(is.read(), equalTo(1));
        assertThat(is.read(), equalTo(-1));
    }
    for (int i = 0; i < 3; i++) {
        InputStreamIndexInput is = new InputStreamIndexInput(input, 1);
        assertThat(input.getFilePointer(), lessThan(input.length()));
        assertThat(is.actualSizeToRead(), equalTo(1L));
        assertThat(is.read(), equalTo(2));
        assertThat(is.read(), equalTo(-1));
    }
    assertThat(input.getFilePointer(), equalTo(input.length()));
    InputStreamIndexInput is = new InputStreamIndexInput(input, 1);
    assertThat(is.actualSizeToRead(), equalTo(0L));
    assertThat(is.read(), equalTo(-1));
}
Also used : IndexInput(org.apache.lucene.store.IndexInput) IndexOutput(org.apache.lucene.store.IndexOutput) RAMDirectory(org.apache.lucene.store.RAMDirectory)

Aggregations

IndexOutput (org.apache.lucene.store.IndexOutput)182 Directory (org.apache.lucene.store.Directory)79 IndexInput (org.apache.lucene.store.IndexInput)76 RAMDirectory (org.apache.lucene.store.RAMDirectory)36 FilterDirectory (org.apache.lucene.store.FilterDirectory)34 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)27 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)27 BytesRef (org.apache.lucene.util.BytesRef)26 IOException (java.io.IOException)20 CorruptingIndexOutput (org.apache.lucene.store.CorruptingIndexOutput)18 RAMFile (org.apache.lucene.store.RAMFile)16 RAMOutputStream (org.apache.lucene.store.RAMOutputStream)16 IndexFormatTooNewException (org.apache.lucene.index.IndexFormatTooNewException)14 IndexFormatTooOldException (org.apache.lucene.index.IndexFormatTooOldException)14 IOContext (org.apache.lucene.store.IOContext)13 ArrayList (java.util.ArrayList)11 BufferedChecksumIndexInput (org.apache.lucene.store.BufferedChecksumIndexInput)11 RAMInputStream (org.apache.lucene.store.RAMInputStream)11 NIOFSDirectory (org.apache.lucene.store.NIOFSDirectory)10 NRTCachingDirectory (org.apache.lucene.store.NRTCachingDirectory)10