use of org.apache.lucene.codecs.StoredFieldsReader in project lucene-solr by apache.
the class SortingStoredFieldsConsumer method flush.
@Override
void flush(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
super.flush(state, sortMap);
if (sortMap == null) {
// we're lucky the index is already sorted, just rename the temporary file and return
for (Map.Entry<String, String> entry : tmpDirectory.getTemporaryFiles().entrySet()) {
tmpDirectory.rename(entry.getValue(), entry.getKey());
}
return;
}
StoredFieldsReader reader = docWriter.codec.storedFieldsFormat().fieldsReader(tmpDirectory, state.segmentInfo, state.fieldInfos, IOContext.DEFAULT);
StoredFieldsReader mergeReader = reader.getMergeInstance();
StoredFieldsWriter sortWriter = docWriter.codec.storedFieldsFormat().fieldsWriter(state.directory, state.segmentInfo, IOContext.DEFAULT);
try {
reader.checkIntegrity();
CopyVisitor visitor = new CopyVisitor(sortWriter);
for (int docID = 0; docID < state.segmentInfo.maxDoc(); docID++) {
sortWriter.startDocument();
mergeReader.visitDocument(sortMap.newToOld(docID), visitor);
sortWriter.finishDocument();
}
sortWriter.finish(state.fieldInfos, state.segmentInfo.maxDoc());
} finally {
IOUtils.close(reader, sortWriter);
IOUtils.deleteFiles(tmpDirectory, tmpDirectory.getTemporaryFiles().values());
}
}
use of org.apache.lucene.codecs.StoredFieldsReader in project lucene-solr by apache.
the class CheckIndex method testStoredFields.
/**
* Test stored fields.
* @lucene.experimental
*/
public static Status.StoredFieldStatus testStoredFields(CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
long startNS = System.nanoTime();
final Status.StoredFieldStatus status = new Status.StoredFieldStatus();
try {
if (infoStream != null) {
infoStream.print(" test: stored fields.......");
}
// Scan stored fields for all documents
final Bits liveDocs = reader.getLiveDocs();
StoredFieldsReader storedFields = reader.getFieldsReader().getMergeInstance();
for (int j = 0; j < reader.maxDoc(); ++j) {
// Intentionally pull even deleted documents to
// make sure they too are not corrupt:
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
storedFields.visitDocument(j, visitor);
Document doc = visitor.getDocument();
if (liveDocs == null || liveDocs.get(j)) {
status.docCount++;
status.totFields += doc.getFields().size();
}
}
// Validate docCount
if (status.docCount != reader.numDocs()) {
throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
}
msg(infoStream, String.format(Locale.ROOT, "OK [%d total field count; avg %.1f fields per doc] [took %.3f sec]", status.totFields, (((float) status.totFields) / status.docCount), nsToSec(System.nanoTime() - startNS)));
} catch (Throwable e) {
if (failFast) {
throw IOUtils.rethrowAlways(e);
}
msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
status.error = e;
if (infoStream != null) {
e.printStackTrace(infoStream);
}
}
return status;
}
use of org.apache.lucene.codecs.StoredFieldsReader in project lucene-solr by apache.
the class BaseIndexFileFormatTestCase method testMultiClose.
/** Calls close multiple times on closeable codec apis */
public void testMultiClose() throws IOException {
// first make a one doc index
Directory oneDocIndex = applyCreatedVersionMajor(newDirectory());
IndexWriter iw = new IndexWriter(oneDocIndex, new IndexWriterConfig(new MockAnalyzer(random())));
Document oneDoc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
Field customField = new Field("field", "contents", customType);
oneDoc.add(customField);
oneDoc.add(new NumericDocValuesField("field", 5));
iw.addDocument(oneDoc);
LeafReader oneDocReader = getOnlyLeafReader(DirectoryReader.open(iw));
iw.close();
// now feed to codec apis manually
// we use FSDir, things like ramdir are not guaranteed to cause fails if you write to them after close(), etc
Directory dir = newFSDirectory(createTempDir("justSoYouGetSomeChannelErrors"));
Codec codec = getCodec();
SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
FieldInfo proto = oneDocReader.getFieldInfos().fieldInfo("field");
FieldInfo field = new FieldInfo(proto.name, proto.number, proto.hasVectors(), proto.omitsNorms(), proto.hasPayloads(), proto.getIndexOptions(), proto.getDocValuesType(), proto.getDocValuesGen(), new HashMap<>(), proto.getPointDimensionCount(), proto.getPointNumBytes());
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { field });
SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, fieldInfos, null, new IOContext(new FlushInfo(1, 20)));
SegmentReadState readState = new SegmentReadState(dir, segmentInfo, fieldInfos, IOContext.READ);
// PostingsFormat
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
consumer.write(oneDocReader.fields());
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (FieldsProducer producer = codec.postingsFormat().fieldsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// DocValuesFormat
try (DocValuesConsumer consumer = codec.docValuesFormat().fieldsConsumer(writeState)) {
consumer.addNumericField(field, new EmptyDocValuesProducer() {
@Override
public NumericDocValues getNumeric(FieldInfo field) {
return new NumericDocValues() {
int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
docID++;
if (docID == 1) {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public int advance(int target) {
if (docID <= 0 && target == 0) {
docID = 0;
} else {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return target == 0;
}
@Override
public long cost() {
return 1;
}
@Override
public long longValue() {
return 5;
}
};
}
});
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (DocValuesProducer producer = codec.docValuesFormat().fieldsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// NormsFormat
try (NormsConsumer consumer = codec.normsFormat().normsConsumer(writeState)) {
consumer.addNormsField(field, new NormsProducer() {
@Override
public NumericDocValues getNorms(FieldInfo field) {
return new NumericDocValues() {
int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
docID++;
if (docID == 1) {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public int advance(int target) {
if (docID <= 0 && target == 0) {
docID = 0;
} else {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return target == 0;
}
@Override
public long cost() {
return 1;
}
@Override
public long longValue() {
return 5;
}
};
}
@Override
public void checkIntegrity() {
}
@Override
public void close() {
}
@Override
public long ramBytesUsed() {
return 0;
}
});
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (NormsProducer producer = codec.normsFormat().normsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// TermVectorsFormat
try (TermVectorsWriter consumer = codec.termVectorsFormat().vectorsWriter(dir, segmentInfo, writeState.context)) {
consumer.startDocument(1);
consumer.startField(field, 1, false, false, false);
consumer.startTerm(new BytesRef("testing"), 2);
consumer.finishTerm();
consumer.finishField();
consumer.finishDocument();
consumer.finish(fieldInfos, 1);
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (TermVectorsReader producer = codec.termVectorsFormat().vectorsReader(dir, segmentInfo, fieldInfos, readState.context)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// StoredFieldsFormat
try (StoredFieldsWriter consumer = codec.storedFieldsFormat().fieldsWriter(dir, segmentInfo, writeState.context)) {
consumer.startDocument();
consumer.writeField(field, customField);
consumer.finishDocument();
consumer.finish(fieldInfos, 1);
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (StoredFieldsReader producer = codec.storedFieldsFormat().fieldsReader(dir, segmentInfo, fieldInfos, readState.context)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
IOUtils.close(oneDocReader, oneDocIndex, dir);
}
use of org.apache.lucene.codecs.StoredFieldsReader in project lucene-solr by apache.
the class CompressingStoredFieldsWriter method merge.
@Override
public int merge(MergeState mergeState) throws IOException {
int docCount = 0;
int numReaders = mergeState.maxDocs.length;
MatchingReaders matching = new MatchingReaders(mergeState);
if (mergeState.needsIndexSort) {
/**
* If all readers are compressed and they have the same fieldinfos then we can merge the serialized document
* directly.
*/
List<CompressingStoredFieldsMergeSub> subs = new ArrayList<>();
for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) {
if (matching.matchingReaders[i] && mergeState.storedFieldsReaders[i] instanceof CompressingStoredFieldsReader) {
CompressingStoredFieldsReader storedFieldsReader = (CompressingStoredFieldsReader) mergeState.storedFieldsReaders[i];
storedFieldsReader.checkIntegrity();
subs.add(new CompressingStoredFieldsMergeSub(storedFieldsReader, mergeState.docMaps[i], mergeState.maxDocs[i]));
} else {
return super.merge(mergeState);
}
}
final DocIDMerger<CompressingStoredFieldsMergeSub> docIDMerger = DocIDMerger.of(subs, true);
while (true) {
CompressingStoredFieldsMergeSub sub = docIDMerger.next();
if (sub == null) {
break;
}
assert sub.mappedDocID == docCount;
SerializedDocument doc = sub.reader.document(sub.docID);
startDocument();
bufferedDocs.copyBytes(doc.in, doc.length);
numStoredFieldsInDoc = doc.numStoredFields;
finishDocument();
++docCount;
}
finish(mergeState.mergeFieldInfos, docCount);
return docCount;
}
for (int readerIndex = 0; readerIndex < numReaders; readerIndex++) {
MergeVisitor visitor = new MergeVisitor(mergeState, readerIndex);
CompressingStoredFieldsReader matchingFieldsReader = null;
if (matching.matchingReaders[readerIndex]) {
final StoredFieldsReader fieldsReader = mergeState.storedFieldsReaders[readerIndex];
// we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) {
matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
}
}
final int maxDoc = mergeState.maxDocs[readerIndex];
final Bits liveDocs = mergeState.liveDocs[readerIndex];
// if its some other format, or an older version of this format, or safety switch:
if (matchingFieldsReader == null || matchingFieldsReader.getVersion() != VERSION_CURRENT || BULK_MERGE_ENABLED == false) {
// naive merge...
StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[readerIndex];
if (storedFieldsReader != null) {
storedFieldsReader.checkIntegrity();
}
for (int docID = 0; docID < maxDoc; docID++) {
if (liveDocs != null && liveDocs.get(docID) == false) {
continue;
}
startDocument();
storedFieldsReader.visitDocument(docID, visitor);
finishDocument();
++docCount;
}
} else if (matchingFieldsReader.getCompressionMode() == compressionMode && matchingFieldsReader.getChunkSize() == chunkSize && matchingFieldsReader.getPackedIntsVersion() == PackedInts.VERSION_CURRENT && liveDocs == null && !tooDirty(matchingFieldsReader)) {
// if the format is older, its always handled by the naive merge case above
assert matchingFieldsReader.getVersion() == VERSION_CURRENT;
matchingFieldsReader.checkIntegrity();
// flush any pending chunks
if (numBufferedDocs > 0) {
flush();
// incomplete: we had to force this flush
numDirtyChunks++;
}
// iterate over each chunk. we use the stored fields index to find chunk boundaries,
// read the docstart + doccount from the chunk header (we write a new header, since doc numbers will change),
// and just copy the bytes directly.
IndexInput rawDocs = matchingFieldsReader.getFieldsStream();
CompressingStoredFieldsIndexReader index = matchingFieldsReader.getIndexReader();
rawDocs.seek(index.getStartPointer(0));
int docID = 0;
while (docID < maxDoc) {
// read header
int base = rawDocs.readVInt();
if (base != docID) {
throw new CorruptIndexException("invalid state: base=" + base + ", docID=" + docID, rawDocs);
}
int code = rawDocs.readVInt();
// write a new index entry and new header for this chunk.
int bufferedDocs = code >>> 1;
indexWriter.writeIndex(bufferedDocs, fieldsStream.getFilePointer());
// rebase
fieldsStream.writeVInt(docBase);
fieldsStream.writeVInt(code);
docID += bufferedDocs;
docBase += bufferedDocs;
docCount += bufferedDocs;
if (docID > maxDoc) {
throw new CorruptIndexException("invalid state: base=" + base + ", count=" + bufferedDocs + ", maxDoc=" + maxDoc, rawDocs);
}
// copy bytes until the next chunk boundary (or end of chunk data).
// using the stored fields index for this isn't the most efficient, but fast enough
// and is a source of redundancy for detecting bad things.
final long end;
if (docID == maxDoc) {
end = matchingFieldsReader.getMaxPointer();
} else {
end = index.getStartPointer(docID);
}
fieldsStream.copyBytes(rawDocs, end - rawDocs.getFilePointer());
}
if (rawDocs.getFilePointer() != matchingFieldsReader.getMaxPointer()) {
throw new CorruptIndexException("invalid state: pos=" + rawDocs.getFilePointer() + ", max=" + matchingFieldsReader.getMaxPointer(), rawDocs);
}
// since we bulk merged all chunks, we inherit any dirty ones from this segment.
numChunks += matchingFieldsReader.getNumChunks();
numDirtyChunks += matchingFieldsReader.getNumDirtyChunks();
} else {
// if the format is older, its always handled by the naive merge case above
assert matchingFieldsReader.getVersion() == VERSION_CURRENT;
matchingFieldsReader.checkIntegrity();
for (int docID = 0; docID < maxDoc; docID++) {
if (liveDocs != null && liveDocs.get(docID) == false) {
continue;
}
SerializedDocument doc = matchingFieldsReader.document(docID);
startDocument();
bufferedDocs.copyBytes(doc.in, doc.length);
numStoredFieldsInDoc = doc.numStoredFields;
finishDocument();
++docCount;
}
}
}
finish(mergeState.mergeFieldInfos, docCount);
return docCount;
}
Aggregations