Search in sources :

Example 1 with StoredFieldsReader

use of org.apache.lucene.codecs.StoredFieldsReader in project lucene-solr by apache.

the class SortingStoredFieldsConsumer method flush.

@Override
void flush(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
    super.flush(state, sortMap);
    if (sortMap == null) {
        // we're lucky the index is already sorted, just rename the temporary file and return
        for (Map.Entry<String, String> entry : tmpDirectory.getTemporaryFiles().entrySet()) {
            tmpDirectory.rename(entry.getValue(), entry.getKey());
        }
        return;
    }
    StoredFieldsReader reader = docWriter.codec.storedFieldsFormat().fieldsReader(tmpDirectory, state.segmentInfo, state.fieldInfos, IOContext.DEFAULT);
    StoredFieldsReader mergeReader = reader.getMergeInstance();
    StoredFieldsWriter sortWriter = docWriter.codec.storedFieldsFormat().fieldsWriter(state.directory, state.segmentInfo, IOContext.DEFAULT);
    try {
        reader.checkIntegrity();
        CopyVisitor visitor = new CopyVisitor(sortWriter);
        for (int docID = 0; docID < state.segmentInfo.maxDoc(); docID++) {
            sortWriter.startDocument();
            mergeReader.visitDocument(sortMap.newToOld(docID), visitor);
            sortWriter.finishDocument();
        }
        sortWriter.finish(state.fieldInfos, state.segmentInfo.maxDoc());
    } finally {
        IOUtils.close(reader, sortWriter);
        IOUtils.deleteFiles(tmpDirectory, tmpDirectory.getTemporaryFiles().values());
    }
}
Also used : StoredFieldsReader(org.apache.lucene.codecs.StoredFieldsReader) Map(java.util.Map) StoredFieldsWriter(org.apache.lucene.codecs.StoredFieldsWriter)

Example 2 with StoredFieldsReader

use of org.apache.lucene.codecs.StoredFieldsReader in project lucene-solr by apache.

the class CheckIndex method testStoredFields.

/**
   * Test stored fields.
   * @lucene.experimental
   */
public static Status.StoredFieldStatus testStoredFields(CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
    long startNS = System.nanoTime();
    final Status.StoredFieldStatus status = new Status.StoredFieldStatus();
    try {
        if (infoStream != null) {
            infoStream.print("    test: stored fields.......");
        }
        // Scan stored fields for all documents
        final Bits liveDocs = reader.getLiveDocs();
        StoredFieldsReader storedFields = reader.getFieldsReader().getMergeInstance();
        for (int j = 0; j < reader.maxDoc(); ++j) {
            // Intentionally pull even deleted documents to
            // make sure they too are not corrupt:
            DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
            storedFields.visitDocument(j, visitor);
            Document doc = visitor.getDocument();
            if (liveDocs == null || liveDocs.get(j)) {
                status.docCount++;
                status.totFields += doc.getFields().size();
            }
        }
        // Validate docCount
        if (status.docCount != reader.numDocs()) {
            throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
        }
        msg(infoStream, String.format(Locale.ROOT, "OK [%d total field count; avg %.1f fields per doc] [took %.3f sec]", status.totFields, (((float) status.totFields) / status.docCount), nsToSec(System.nanoTime() - startNS)));
    } catch (Throwable e) {
        if (failFast) {
            throw IOUtils.rethrowAlways(e);
        }
        msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
        status.error = e;
        if (infoStream != null) {
            e.printStackTrace(infoStream);
        }
    }
    return status;
}
Also used : DocValuesStatus(org.apache.lucene.index.CheckIndex.Status.DocValuesStatus) StoredFieldsReader(org.apache.lucene.codecs.StoredFieldsReader) DocumentStoredFieldVisitor(org.apache.lucene.document.DocumentStoredFieldVisitor) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document)

Example 3 with StoredFieldsReader

use of org.apache.lucene.codecs.StoredFieldsReader in project lucene-solr by apache.

the class BaseIndexFileFormatTestCase method testMultiClose.

/** Calls close multiple times on closeable codec apis */
public void testMultiClose() throws IOException {
    // first make a one doc index
    Directory oneDocIndex = applyCreatedVersionMajor(newDirectory());
    IndexWriter iw = new IndexWriter(oneDocIndex, new IndexWriterConfig(new MockAnalyzer(random())));
    Document oneDoc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    Field customField = new Field("field", "contents", customType);
    oneDoc.add(customField);
    oneDoc.add(new NumericDocValuesField("field", 5));
    iw.addDocument(oneDoc);
    LeafReader oneDocReader = getOnlyLeafReader(DirectoryReader.open(iw));
    iw.close();
    // now feed to codec apis manually
    // we use FSDir, things like ramdir are not guaranteed to cause fails if you write to them after close(), etc
    Directory dir = newFSDirectory(createTempDir("justSoYouGetSomeChannelErrors"));
    Codec codec = getCodec();
    SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
    FieldInfo proto = oneDocReader.getFieldInfos().fieldInfo("field");
    FieldInfo field = new FieldInfo(proto.name, proto.number, proto.hasVectors(), proto.omitsNorms(), proto.hasPayloads(), proto.getIndexOptions(), proto.getDocValuesType(), proto.getDocValuesGen(), new HashMap<>(), proto.getPointDimensionCount(), proto.getPointNumBytes());
    FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { field });
    SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, fieldInfos, null, new IOContext(new FlushInfo(1, 20)));
    SegmentReadState readState = new SegmentReadState(dir, segmentInfo, fieldInfos, IOContext.READ);
    // PostingsFormat
    try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
        consumer.write(oneDocReader.fields());
        IOUtils.close(consumer);
        IOUtils.close(consumer);
    }
    try (FieldsProducer producer = codec.postingsFormat().fieldsProducer(readState)) {
        IOUtils.close(producer);
        IOUtils.close(producer);
    }
    // DocValuesFormat
    try (DocValuesConsumer consumer = codec.docValuesFormat().fieldsConsumer(writeState)) {
        consumer.addNumericField(field, new EmptyDocValuesProducer() {

            @Override
            public NumericDocValues getNumeric(FieldInfo field) {
                return new NumericDocValues() {

                    int docID = -1;

                    @Override
                    public int docID() {
                        return docID;
                    }

                    @Override
                    public int nextDoc() {
                        docID++;
                        if (docID == 1) {
                            docID = NO_MORE_DOCS;
                        }
                        return docID;
                    }

                    @Override
                    public int advance(int target) {
                        if (docID <= 0 && target == 0) {
                            docID = 0;
                        } else {
                            docID = NO_MORE_DOCS;
                        }
                        return docID;
                    }

                    @Override
                    public boolean advanceExact(int target) throws IOException {
                        docID = target;
                        return target == 0;
                    }

                    @Override
                    public long cost() {
                        return 1;
                    }

                    @Override
                    public long longValue() {
                        return 5;
                    }
                };
            }
        });
        IOUtils.close(consumer);
        IOUtils.close(consumer);
    }
    try (DocValuesProducer producer = codec.docValuesFormat().fieldsProducer(readState)) {
        IOUtils.close(producer);
        IOUtils.close(producer);
    }
    // NormsFormat
    try (NormsConsumer consumer = codec.normsFormat().normsConsumer(writeState)) {
        consumer.addNormsField(field, new NormsProducer() {

            @Override
            public NumericDocValues getNorms(FieldInfo field) {
                return new NumericDocValues() {

                    int docID = -1;

                    @Override
                    public int docID() {
                        return docID;
                    }

                    @Override
                    public int nextDoc() {
                        docID++;
                        if (docID == 1) {
                            docID = NO_MORE_DOCS;
                        }
                        return docID;
                    }

                    @Override
                    public int advance(int target) {
                        if (docID <= 0 && target == 0) {
                            docID = 0;
                        } else {
                            docID = NO_MORE_DOCS;
                        }
                        return docID;
                    }

                    @Override
                    public boolean advanceExact(int target) throws IOException {
                        docID = target;
                        return target == 0;
                    }

                    @Override
                    public long cost() {
                        return 1;
                    }

                    @Override
                    public long longValue() {
                        return 5;
                    }
                };
            }

            @Override
            public void checkIntegrity() {
            }

            @Override
            public void close() {
            }

            @Override
            public long ramBytesUsed() {
                return 0;
            }
        });
        IOUtils.close(consumer);
        IOUtils.close(consumer);
    }
    try (NormsProducer producer = codec.normsFormat().normsProducer(readState)) {
        IOUtils.close(producer);
        IOUtils.close(producer);
    }
    // TermVectorsFormat
    try (TermVectorsWriter consumer = codec.termVectorsFormat().vectorsWriter(dir, segmentInfo, writeState.context)) {
        consumer.startDocument(1);
        consumer.startField(field, 1, false, false, false);
        consumer.startTerm(new BytesRef("testing"), 2);
        consumer.finishTerm();
        consumer.finishField();
        consumer.finishDocument();
        consumer.finish(fieldInfos, 1);
        IOUtils.close(consumer);
        IOUtils.close(consumer);
    }
    try (TermVectorsReader producer = codec.termVectorsFormat().vectorsReader(dir, segmentInfo, fieldInfos, readState.context)) {
        IOUtils.close(producer);
        IOUtils.close(producer);
    }
    // StoredFieldsFormat
    try (StoredFieldsWriter consumer = codec.storedFieldsFormat().fieldsWriter(dir, segmentInfo, writeState.context)) {
        consumer.startDocument();
        consumer.writeField(field, customField);
        consumer.finishDocument();
        consumer.finish(fieldInfos, 1);
        IOUtils.close(consumer);
        IOUtils.close(consumer);
    }
    try (StoredFieldsReader producer = codec.storedFieldsFormat().fieldsReader(dir, segmentInfo, fieldInfos, readState.context)) {
        IOUtils.close(producer);
        IOUtils.close(producer);
    }
    IOUtils.close(oneDocReader, oneDocIndex, dir);
}
Also used : FieldsConsumer(org.apache.lucene.codecs.FieldsConsumer) Document(org.apache.lucene.document.Document) TermVectorsReader(org.apache.lucene.codecs.TermVectorsReader) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Codec(org.apache.lucene.codecs.Codec) StoredFieldsReader(org.apache.lucene.codecs.StoredFieldsReader) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) DocValuesProducer(org.apache.lucene.codecs.DocValuesProducer) IOContext(org.apache.lucene.store.IOContext) NormsProducer(org.apache.lucene.codecs.NormsProducer) BytesRef(org.apache.lucene.util.BytesRef) StoredFieldsWriter(org.apache.lucene.codecs.StoredFieldsWriter) Directory(org.apache.lucene.store.Directory) DocValuesConsumer(org.apache.lucene.codecs.DocValuesConsumer) FieldsProducer(org.apache.lucene.codecs.FieldsProducer) IOException(java.io.IOException) FieldType(org.apache.lucene.document.FieldType) FlushInfo(org.apache.lucene.store.FlushInfo) TermVectorsWriter(org.apache.lucene.codecs.TermVectorsWriter) NormsConsumer(org.apache.lucene.codecs.NormsConsumer)

Example 4 with StoredFieldsReader

use of org.apache.lucene.codecs.StoredFieldsReader in project lucene-solr by apache.

the class CompressingStoredFieldsWriter method merge.

@Override
public int merge(MergeState mergeState) throws IOException {
    int docCount = 0;
    int numReaders = mergeState.maxDocs.length;
    MatchingReaders matching = new MatchingReaders(mergeState);
    if (mergeState.needsIndexSort) {
        /**
       * If all readers are compressed and they have the same fieldinfos then we can merge the serialized document
       * directly.
       */
        List<CompressingStoredFieldsMergeSub> subs = new ArrayList<>();
        for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) {
            if (matching.matchingReaders[i] && mergeState.storedFieldsReaders[i] instanceof CompressingStoredFieldsReader) {
                CompressingStoredFieldsReader storedFieldsReader = (CompressingStoredFieldsReader) mergeState.storedFieldsReaders[i];
                storedFieldsReader.checkIntegrity();
                subs.add(new CompressingStoredFieldsMergeSub(storedFieldsReader, mergeState.docMaps[i], mergeState.maxDocs[i]));
            } else {
                return super.merge(mergeState);
            }
        }
        final DocIDMerger<CompressingStoredFieldsMergeSub> docIDMerger = DocIDMerger.of(subs, true);
        while (true) {
            CompressingStoredFieldsMergeSub sub = docIDMerger.next();
            if (sub == null) {
                break;
            }
            assert sub.mappedDocID == docCount;
            SerializedDocument doc = sub.reader.document(sub.docID);
            startDocument();
            bufferedDocs.copyBytes(doc.in, doc.length);
            numStoredFieldsInDoc = doc.numStoredFields;
            finishDocument();
            ++docCount;
        }
        finish(mergeState.mergeFieldInfos, docCount);
        return docCount;
    }
    for (int readerIndex = 0; readerIndex < numReaders; readerIndex++) {
        MergeVisitor visitor = new MergeVisitor(mergeState, readerIndex);
        CompressingStoredFieldsReader matchingFieldsReader = null;
        if (matching.matchingReaders[readerIndex]) {
            final StoredFieldsReader fieldsReader = mergeState.storedFieldsReaders[readerIndex];
            // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
            if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) {
                matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
            }
        }
        final int maxDoc = mergeState.maxDocs[readerIndex];
        final Bits liveDocs = mergeState.liveDocs[readerIndex];
        // if its some other format, or an older version of this format, or safety switch:
        if (matchingFieldsReader == null || matchingFieldsReader.getVersion() != VERSION_CURRENT || BULK_MERGE_ENABLED == false) {
            // naive merge...
            StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[readerIndex];
            if (storedFieldsReader != null) {
                storedFieldsReader.checkIntegrity();
            }
            for (int docID = 0; docID < maxDoc; docID++) {
                if (liveDocs != null && liveDocs.get(docID) == false) {
                    continue;
                }
                startDocument();
                storedFieldsReader.visitDocument(docID, visitor);
                finishDocument();
                ++docCount;
            }
        } else if (matchingFieldsReader.getCompressionMode() == compressionMode && matchingFieldsReader.getChunkSize() == chunkSize && matchingFieldsReader.getPackedIntsVersion() == PackedInts.VERSION_CURRENT && liveDocs == null && !tooDirty(matchingFieldsReader)) {
            // if the format is older, its always handled by the naive merge case above
            assert matchingFieldsReader.getVersion() == VERSION_CURRENT;
            matchingFieldsReader.checkIntegrity();
            // flush any pending chunks
            if (numBufferedDocs > 0) {
                flush();
                // incomplete: we had to force this flush
                numDirtyChunks++;
            }
            // iterate over each chunk. we use the stored fields index to find chunk boundaries,
            // read the docstart + doccount from the chunk header (we write a new header, since doc numbers will change),
            // and just copy the bytes directly.
            IndexInput rawDocs = matchingFieldsReader.getFieldsStream();
            CompressingStoredFieldsIndexReader index = matchingFieldsReader.getIndexReader();
            rawDocs.seek(index.getStartPointer(0));
            int docID = 0;
            while (docID < maxDoc) {
                // read header
                int base = rawDocs.readVInt();
                if (base != docID) {
                    throw new CorruptIndexException("invalid state: base=" + base + ", docID=" + docID, rawDocs);
                }
                int code = rawDocs.readVInt();
                // write a new index entry and new header for this chunk.
                int bufferedDocs = code >>> 1;
                indexWriter.writeIndex(bufferedDocs, fieldsStream.getFilePointer());
                // rebase
                fieldsStream.writeVInt(docBase);
                fieldsStream.writeVInt(code);
                docID += bufferedDocs;
                docBase += bufferedDocs;
                docCount += bufferedDocs;
                if (docID > maxDoc) {
                    throw new CorruptIndexException("invalid state: base=" + base + ", count=" + bufferedDocs + ", maxDoc=" + maxDoc, rawDocs);
                }
                // copy bytes until the next chunk boundary (or end of chunk data).
                // using the stored fields index for this isn't the most efficient, but fast enough
                // and is a source of redundancy for detecting bad things.
                final long end;
                if (docID == maxDoc) {
                    end = matchingFieldsReader.getMaxPointer();
                } else {
                    end = index.getStartPointer(docID);
                }
                fieldsStream.copyBytes(rawDocs, end - rawDocs.getFilePointer());
            }
            if (rawDocs.getFilePointer() != matchingFieldsReader.getMaxPointer()) {
                throw new CorruptIndexException("invalid state: pos=" + rawDocs.getFilePointer() + ", max=" + matchingFieldsReader.getMaxPointer(), rawDocs);
            }
            // since we bulk merged all chunks, we inherit any dirty ones from this segment.
            numChunks += matchingFieldsReader.getNumChunks();
            numDirtyChunks += matchingFieldsReader.getNumDirtyChunks();
        } else {
            // if the format is older, its always handled by the naive merge case above
            assert matchingFieldsReader.getVersion() == VERSION_CURRENT;
            matchingFieldsReader.checkIntegrity();
            for (int docID = 0; docID < maxDoc; docID++) {
                if (liveDocs != null && liveDocs.get(docID) == false) {
                    continue;
                }
                SerializedDocument doc = matchingFieldsReader.document(docID);
                startDocument();
                bufferedDocs.copyBytes(doc.in, doc.length);
                numStoredFieldsInDoc = doc.numStoredFields;
                finishDocument();
                ++docCount;
            }
        }
    }
    finish(mergeState.mergeFieldInfos, docCount);
    return docCount;
}
Also used : SerializedDocument(org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.SerializedDocument) ArrayList(java.util.ArrayList) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) StoredFieldsReader(org.apache.lucene.codecs.StoredFieldsReader) Bits(org.apache.lucene.util.Bits) IndexInput(org.apache.lucene.store.IndexInput)

Aggregations

StoredFieldsReader (org.apache.lucene.codecs.StoredFieldsReader)4 StoredFieldsWriter (org.apache.lucene.codecs.StoredFieldsWriter)2 Document (org.apache.lucene.document.Document)2 Bits (org.apache.lucene.util.Bits)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 Codec (org.apache.lucene.codecs.Codec)1 DocValuesConsumer (org.apache.lucene.codecs.DocValuesConsumer)1 DocValuesProducer (org.apache.lucene.codecs.DocValuesProducer)1 FieldsConsumer (org.apache.lucene.codecs.FieldsConsumer)1 FieldsProducer (org.apache.lucene.codecs.FieldsProducer)1 NormsConsumer (org.apache.lucene.codecs.NormsConsumer)1 NormsProducer (org.apache.lucene.codecs.NormsProducer)1 TermVectorsReader (org.apache.lucene.codecs.TermVectorsReader)1 TermVectorsWriter (org.apache.lucene.codecs.TermVectorsWriter)1 SerializedDocument (org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.SerializedDocument)1 DocumentStoredFieldVisitor (org.apache.lucene.document.DocumentStoredFieldVisitor)1 Field (org.apache.lucene.document.Field)1