Search in sources :

Example 86 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class BlockDirectoryTest method assertInputsEquals.

private void assertInputsEquals(String name, Directory fsDir, Directory hdfs) throws IOException {
    int reads = random.nextInt(MAX_NUMBER_OF_READS);
    IndexInput fsInput = fsDir.openInput(name, new IOContext());
    IndexInput hdfsInput = hdfs.openInput(name, new IOContext());
    assertEquals(fsInput.length(), hdfsInput.length());
    int fileLength = (int) fsInput.length();
    for (int i = 0; i < reads; i++) {
        int rnd;
        if (fileLength == 0) {
            rnd = 0;
        } else {
            rnd = random.nextInt(Math.min(MAX_BUFFER_SIZE - MIN_BUFFER_SIZE, fileLength));
        }
        byte[] fsBuf = new byte[rnd + MIN_BUFFER_SIZE];
        byte[] hdfsBuf = new byte[fsBuf.length];
        int offset = random.nextInt(fsBuf.length);
        int length = random.nextInt(fsBuf.length - offset);
        int pos;
        if (fileLength == 0) {
            pos = 0;
        } else {
            pos = random.nextInt(fileLength - length);
        }
        fsInput.seek(pos);
        fsInput.readBytes(fsBuf, offset, length);
        hdfsInput.seek(pos);
        hdfsInput.readBytes(hdfsBuf, offset, length);
        for (int f = offset; f < length; f++) {
            if (fsBuf[f] != hdfsBuf[f]) {
                fail("read [" + i + "]");
            }
        }
    }
    fsInput.close();
    hdfsInput.close();
}
Also used : IndexInput(org.apache.lucene.store.IndexInput) IOContext(org.apache.lucene.store.IOContext)

Example 87 with IndexInput

use of org.apache.lucene.store.IndexInput in project jackrabbit-oak by apache.

the class LuceneBlobCacheTest method assertWrites.

byte[] assertWrites(Directory dir, int blobSize) throws IOException {
    byte[] data = randomBytes(blobSize);
    IndexOutput o = dir.createOutput("test", IOContext.DEFAULT);
    o.writeBytes(data, data.length);
    o.close();
    IndexInput i = dir.openInput("test", IOContext.DEFAULT);
    assertEquals(blobSize, i.length());
    byte[] result = new byte[blobSize];
    i.readBytes(result, 0, result.length);
    assertTrue(Arrays.equals(data, result));
    // Load agagin to see if cached
    i = dir.openInput("test", IOContext.DEFAULT);
    assertEquals(blobSize, i.length());
    result = new byte[blobSize];
    i.readBytes(result, 0, result.length);
    assertTrue(Arrays.equals(data, result));
    assertEquals(1, fileDataStore.count);
    return data;
}
Also used : IndexInput(org.apache.lucene.store.IndexInput) IndexOutput(org.apache.lucene.store.IndexOutput)

Example 88 with IndexInput

use of org.apache.lucene.store.IndexInput in project jackrabbit-oak by apache.

the class OakDirectoryTest method largeFile.

@Test
public void largeFile() throws Exception {
    FileStore store = FileStoreBuilder.fileStoreBuilder(tempFolder.getRoot()).withMemoryMapping(false).withBlobStore(new BlackHoleBlobStore()).build();
    SegmentNodeStore nodeStore = SegmentNodeStoreBuilders.builder(store).build();
    IndexDefinition defn = new IndexDefinition(INITIAL_CONTENT, EmptyNodeState.EMPTY_NODE, "/foo");
    Directory directory = new OakDirectory(nodeStore.getRoot().builder(), defn, false);
    long expectedSize = ONE_GB * 2 + ONE_MB;
    String fileName = "test";
    writeFile(directory, fileName, expectedSize);
    assertEquals(expectedSize, directory.fileLength(fileName));
    IndexInput input = directory.openInput(fileName, IOContext.DEFAULT);
    readInputToEnd(expectedSize, input);
    store.close();
}
Also used : FileStore(org.apache.jackrabbit.oak.segment.file.FileStore) IndexInput(org.apache.lucene.store.IndexInput) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) SegmentNodeStore(org.apache.jackrabbit.oak.segment.SegmentNodeStore) Directory(org.apache.lucene.store.Directory) Test(org.junit.Test)

Example 89 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class SimpleTextDocValuesReader method getSortedSet.

@Override
public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
    final OneField field = fields.get(fieldInfo.name);
    // valid:
    assert field != null;
    final IndexInput in = data.clone();
    final BytesRefBuilder scratch = new BytesRefBuilder();
    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
    return new SortedSetDocValues() {

        String[] currentOrds = new String[0];

        int currentIndex = 0;

        final BytesRefBuilder term = new BytesRefBuilder();

        int doc = -1;

        @Override
        public int nextDoc() throws IOException {
            return advance(doc + 1);
        }

        @Override
        public int docID() {
            return doc;
        }

        @Override
        public long cost() {
            return maxDoc;
        }

        @Override
        public int advance(int target) throws IOException {
            for (int i = target; i < maxDoc; ++i) {
                in.seek(field.dataStartFilePointer + field.numValues * (9 + field.pattern.length() + field.maxLength) + i * (1 + field.ordPattern.length()));
                SimpleTextUtil.readLine(in, scratch);
                String ordList = scratch.get().utf8ToString().trim();
                if (ordList.isEmpty() == false) {
                    currentOrds = ordList.split(",");
                    currentIndex = 0;
                    return doc = i;
                }
            }
            return doc = NO_MORE_DOCS;
        }

        @Override
        public boolean advanceExact(int target) throws IOException {
            in.seek(field.dataStartFilePointer + field.numValues * (9 + field.pattern.length() + field.maxLength) + target * (1 + field.ordPattern.length()));
            SimpleTextUtil.readLine(in, scratch);
            String ordList = scratch.get().utf8ToString().trim();
            doc = target;
            if (ordList.isEmpty() == false) {
                currentOrds = ordList.split(",");
                currentIndex = 0;
                return true;
            }
            return false;
        }

        @Override
        public long nextOrd() throws IOException {
            if (currentIndex == currentOrds.length) {
                return NO_MORE_ORDS;
            } else {
                return Long.parseLong(currentOrds[currentIndex++]);
            }
        }

        @Override
        public BytesRef lookupOrd(long ord) throws IOException {
            if (ord < 0 || ord >= field.numValues) {
                throw new IndexOutOfBoundsException("ord must be 0 .. " + (field.numValues - 1) + "; got " + ord);
            }
            in.seek(field.dataStartFilePointer + ord * (9 + field.pattern.length() + field.maxLength));
            SimpleTextUtil.readLine(in, scratch);
            assert StringHelper.startsWith(scratch.get(), LENGTH) : "got " + scratch.get().utf8ToString() + " in=" + in;
            int len;
            try {
                len = decoder.parse(new String(scratch.bytes(), LENGTH.length, scratch.length() - LENGTH.length, StandardCharsets.UTF_8)).intValue();
            } catch (ParseException pe) {
                throw new CorruptIndexException("failed to parse int length", in, pe);
            }
            term.grow(len);
            term.setLength(len);
            in.readBytes(term.bytes(), 0, len);
            return term.get();
        }

        @Override
        public long getValueCount() {
            return field.numValues;
        }
    };
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) DecimalFormatSymbols(java.text.DecimalFormatSymbols) DecimalFormat(java.text.DecimalFormat) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) IndexInput(org.apache.lucene.store.IndexInput) ParseException(java.text.ParseException)

Example 90 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class CompressingStoredFieldsWriter method merge.

@Override
public int merge(MergeState mergeState) throws IOException {
    int docCount = 0;
    int numReaders = mergeState.maxDocs.length;
    MatchingReaders matching = new MatchingReaders(mergeState);
    if (mergeState.needsIndexSort) {
        /**
       * If all readers are compressed and they have the same fieldinfos then we can merge the serialized document
       * directly.
       */
        List<CompressingStoredFieldsMergeSub> subs = new ArrayList<>();
        for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) {
            if (matching.matchingReaders[i] && mergeState.storedFieldsReaders[i] instanceof CompressingStoredFieldsReader) {
                CompressingStoredFieldsReader storedFieldsReader = (CompressingStoredFieldsReader) mergeState.storedFieldsReaders[i];
                storedFieldsReader.checkIntegrity();
                subs.add(new CompressingStoredFieldsMergeSub(storedFieldsReader, mergeState.docMaps[i], mergeState.maxDocs[i]));
            } else {
                return super.merge(mergeState);
            }
        }
        final DocIDMerger<CompressingStoredFieldsMergeSub> docIDMerger = DocIDMerger.of(subs, true);
        while (true) {
            CompressingStoredFieldsMergeSub sub = docIDMerger.next();
            if (sub == null) {
                break;
            }
            assert sub.mappedDocID == docCount;
            SerializedDocument doc = sub.reader.document(sub.docID);
            startDocument();
            bufferedDocs.copyBytes(doc.in, doc.length);
            numStoredFieldsInDoc = doc.numStoredFields;
            finishDocument();
            ++docCount;
        }
        finish(mergeState.mergeFieldInfos, docCount);
        return docCount;
    }
    for (int readerIndex = 0; readerIndex < numReaders; readerIndex++) {
        MergeVisitor visitor = new MergeVisitor(mergeState, readerIndex);
        CompressingStoredFieldsReader matchingFieldsReader = null;
        if (matching.matchingReaders[readerIndex]) {
            final StoredFieldsReader fieldsReader = mergeState.storedFieldsReaders[readerIndex];
            // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
            if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) {
                matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
            }
        }
        final int maxDoc = mergeState.maxDocs[readerIndex];
        final Bits liveDocs = mergeState.liveDocs[readerIndex];
        // if its some other format, or an older version of this format, or safety switch:
        if (matchingFieldsReader == null || matchingFieldsReader.getVersion() != VERSION_CURRENT || BULK_MERGE_ENABLED == false) {
            // naive merge...
            StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[readerIndex];
            if (storedFieldsReader != null) {
                storedFieldsReader.checkIntegrity();
            }
            for (int docID = 0; docID < maxDoc; docID++) {
                if (liveDocs != null && liveDocs.get(docID) == false) {
                    continue;
                }
                startDocument();
                storedFieldsReader.visitDocument(docID, visitor);
                finishDocument();
                ++docCount;
            }
        } else if (matchingFieldsReader.getCompressionMode() == compressionMode && matchingFieldsReader.getChunkSize() == chunkSize && matchingFieldsReader.getPackedIntsVersion() == PackedInts.VERSION_CURRENT && liveDocs == null && !tooDirty(matchingFieldsReader)) {
            // if the format is older, its always handled by the naive merge case above
            assert matchingFieldsReader.getVersion() == VERSION_CURRENT;
            matchingFieldsReader.checkIntegrity();
            // flush any pending chunks
            if (numBufferedDocs > 0) {
                flush();
                // incomplete: we had to force this flush
                numDirtyChunks++;
            }
            // iterate over each chunk. we use the stored fields index to find chunk boundaries,
            // read the docstart + doccount from the chunk header (we write a new header, since doc numbers will change),
            // and just copy the bytes directly.
            IndexInput rawDocs = matchingFieldsReader.getFieldsStream();
            CompressingStoredFieldsIndexReader index = matchingFieldsReader.getIndexReader();
            rawDocs.seek(index.getStartPointer(0));
            int docID = 0;
            while (docID < maxDoc) {
                // read header
                int base = rawDocs.readVInt();
                if (base != docID) {
                    throw new CorruptIndexException("invalid state: base=" + base + ", docID=" + docID, rawDocs);
                }
                int code = rawDocs.readVInt();
                // write a new index entry and new header for this chunk.
                int bufferedDocs = code >>> 1;
                indexWriter.writeIndex(bufferedDocs, fieldsStream.getFilePointer());
                // rebase
                fieldsStream.writeVInt(docBase);
                fieldsStream.writeVInt(code);
                docID += bufferedDocs;
                docBase += bufferedDocs;
                docCount += bufferedDocs;
                if (docID > maxDoc) {
                    throw new CorruptIndexException("invalid state: base=" + base + ", count=" + bufferedDocs + ", maxDoc=" + maxDoc, rawDocs);
                }
                // copy bytes until the next chunk boundary (or end of chunk data).
                // using the stored fields index for this isn't the most efficient, but fast enough
                // and is a source of redundancy for detecting bad things.
                final long end;
                if (docID == maxDoc) {
                    end = matchingFieldsReader.getMaxPointer();
                } else {
                    end = index.getStartPointer(docID);
                }
                fieldsStream.copyBytes(rawDocs, end - rawDocs.getFilePointer());
            }
            if (rawDocs.getFilePointer() != matchingFieldsReader.getMaxPointer()) {
                throw new CorruptIndexException("invalid state: pos=" + rawDocs.getFilePointer() + ", max=" + matchingFieldsReader.getMaxPointer(), rawDocs);
            }
            // since we bulk merged all chunks, we inherit any dirty ones from this segment.
            numChunks += matchingFieldsReader.getNumChunks();
            numDirtyChunks += matchingFieldsReader.getNumDirtyChunks();
        } else {
            // if the format is older, its always handled by the naive merge case above
            assert matchingFieldsReader.getVersion() == VERSION_CURRENT;
            matchingFieldsReader.checkIntegrity();
            for (int docID = 0; docID < maxDoc; docID++) {
                if (liveDocs != null && liveDocs.get(docID) == false) {
                    continue;
                }
                SerializedDocument doc = matchingFieldsReader.document(docID);
                startDocument();
                bufferedDocs.copyBytes(doc.in, doc.length);
                numStoredFieldsInDoc = doc.numStoredFields;
                finishDocument();
                ++docCount;
            }
        }
    }
    finish(mergeState.mergeFieldInfos, docCount);
    return docCount;
}
Also used : SerializedDocument(org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.SerializedDocument) ArrayList(java.util.ArrayList) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) StoredFieldsReader(org.apache.lucene.codecs.StoredFieldsReader) Bits(org.apache.lucene.util.Bits) IndexInput(org.apache.lucene.store.IndexInput)

Aggregations

IndexInput (org.apache.lucene.store.IndexInput)150 IndexOutput (org.apache.lucene.store.IndexOutput)69 Directory (org.apache.lucene.store.Directory)62 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)41 IOException (java.io.IOException)21 RAMDirectory (org.apache.lucene.store.RAMDirectory)21 FilterDirectory (org.apache.lucene.store.FilterDirectory)19 BufferedChecksumIndexInput (org.apache.lucene.store.BufferedChecksumIndexInput)17 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)14 ArrayList (java.util.ArrayList)13 BytesRef (org.apache.lucene.util.BytesRef)13 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)13 CorruptingIndexOutput (org.apache.lucene.store.CorruptingIndexOutput)10 IOContext (org.apache.lucene.store.IOContext)10 NRTCachingDirectory (org.apache.lucene.store.NRTCachingDirectory)10 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)9 Relation (org.apache.lucene.index.PointValues.Relation)9 Test (org.junit.Test)8 FileNotFoundException (java.io.FileNotFoundException)7 BaseDirectoryWrapper (org.apache.lucene.store.BaseDirectoryWrapper)7