use of org.apache.lucene.codecs.DocValuesConsumer in project lucene-solr by apache.
the class ReadersAndUpdates method handleNumericDVUpdates.
@SuppressWarnings("synthetic-access")
private void handleNumericDVUpdates(FieldInfos infos, Map<String, NumericDocValuesFieldUpdates> updates, Directory dir, DocValuesFormat dvFormat, final SegmentReader reader, Map<Integer, Set<String>> fieldFiles) throws IOException {
for (Entry<String, NumericDocValuesFieldUpdates> e : updates.entrySet()) {
final String field = e.getKey();
final NumericDocValuesFieldUpdates fieldUpdates = e.getValue();
final long nextDocValuesGen = info.getNextDocValuesGen();
final String segmentSuffix = Long.toString(nextDocValuesGen, Character.MAX_RADIX);
final long estUpdatesSize = fieldUpdates.ramBytesPerDoc() * info.info.maxDoc();
final IOContext updatesContext = new IOContext(new FlushInfo(info.info.maxDoc(), estUpdatesSize));
final FieldInfo fieldInfo = infos.fieldInfo(field);
assert fieldInfo != null;
fieldInfo.setDocValuesGen(nextDocValuesGen);
final FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { fieldInfo });
// separately also track which files were created for this gen
final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix);
try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) {
// write the numeric updates to a new gen'd docvalues file
fieldsConsumer.addNumericField(fieldInfo, new EmptyDocValuesProducer() {
@Override
public NumericDocValues getNumeric(FieldInfo fieldInfoIn) throws IOException {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
final int maxDoc = reader.maxDoc();
final NumericDocValuesFieldUpdates.Iterator updatesIter = fieldUpdates.iterator();
final NumericDocValues currentValues = reader.getNumericDocValues(field);
updatesIter.reset();
// Merge sort of the original doc values with updated doc values:
return new NumericDocValues() {
// merged docID
private int docIDOut = -1;
// docID from our original doc values
private int docIDIn = -1;
// docID from our updates
private int updateDocID = -1;
private long value;
@Override
public int docID() {
return docIDOut;
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
// TODO
return 0;
}
@Override
public long longValue() {
return value;
}
@Override
public int nextDoc() throws IOException {
if (docIDIn == docIDOut) {
if (currentValues == null) {
docIDIn = NO_MORE_DOCS;
} else {
docIDIn = currentValues.nextDoc();
}
}
if (updateDocID == docIDOut) {
updateDocID = updatesIter.nextDoc();
}
if (docIDIn < updateDocID) {
// no update to this doc
docIDOut = docIDIn;
value = currentValues.longValue();
} else {
docIDOut = updateDocID;
if (docIDOut != NO_MORE_DOCS) {
value = updatesIter.value();
}
}
return docIDOut;
}
};
}
});
}
info.advanceDocValuesGen();
assert !fieldFiles.containsKey(fieldInfo.number);
fieldFiles.put(fieldInfo.number, trackingDir.getCreatedFiles());
}
}
use of org.apache.lucene.codecs.DocValuesConsumer in project lucene-solr by apache.
the class DefaultIndexingChain method writeDocValues.
/** Writes all buffered doc values (called from {@link #flush}). */
private void writeDocValues(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
int maxDoc = state.segmentInfo.maxDoc();
DocValuesConsumer dvConsumer = null;
boolean success = false;
try {
for (int i = 0; i < fieldHash.length; i++) {
PerField perField = fieldHash[i];
while (perField != null) {
if (perField.docValuesWriter != null) {
if (perField.fieldInfo.getDocValuesType() == DocValuesType.NONE) {
// BUG
throw new AssertionError("segment=" + state.segmentInfo + ": field=\"" + perField.fieldInfo.name + "\" has no docValues but wrote them");
}
if (dvConsumer == null) {
// lazy init
DocValuesFormat fmt = state.segmentInfo.getCodec().docValuesFormat();
dvConsumer = fmt.fieldsConsumer(state);
}
if (finishedDocValues.contains(perField.fieldInfo.name) == false) {
perField.docValuesWriter.finish(maxDoc);
}
perField.docValuesWriter.flush(state, sortMap, dvConsumer);
perField.docValuesWriter = null;
} else if (perField.fieldInfo.getDocValuesType() != DocValuesType.NONE) {
// BUG
throw new AssertionError("segment=" + state.segmentInfo + ": field=\"" + perField.fieldInfo.name + "\" has docValues but did not write them");
}
perField = perField.next;
}
}
// TODO: catch missing DV fields here? else we have
// null/"" depending on how docs landed in segments?
// but we can't detect all cases, and we should leave
// this behavior undefined. dv is not "schemaless": it's column-stride.
success = true;
} finally {
if (success) {
IOUtils.close(dvConsumer);
} else {
IOUtils.closeWhileHandlingException(dvConsumer);
}
}
if (state.fieldInfos.hasDocValues() == false) {
if (dvConsumer != null) {
// BUG
throw new AssertionError("segment=" + state.segmentInfo + ": fieldInfos has no docValues but wrote them");
}
} else if (dvConsumer == null) {
// BUG
throw new AssertionError("segment=" + state.segmentInfo + ": fieldInfos has docValues but did not wrote them");
}
}
use of org.apache.lucene.codecs.DocValuesConsumer in project lucene-solr by apache.
the class BaseIndexFileFormatTestCase method testMultiClose.
/** Calls close multiple times on closeable codec apis */
public void testMultiClose() throws IOException {
// first make a one doc index
Directory oneDocIndex = applyCreatedVersionMajor(newDirectory());
IndexWriter iw = new IndexWriter(oneDocIndex, new IndexWriterConfig(new MockAnalyzer(random())));
Document oneDoc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
Field customField = new Field("field", "contents", customType);
oneDoc.add(customField);
oneDoc.add(new NumericDocValuesField("field", 5));
iw.addDocument(oneDoc);
LeafReader oneDocReader = getOnlyLeafReader(DirectoryReader.open(iw));
iw.close();
// now feed to codec apis manually
// we use FSDir, things like ramdir are not guaranteed to cause fails if you write to them after close(), etc
Directory dir = newFSDirectory(createTempDir("justSoYouGetSomeChannelErrors"));
Codec codec = getCodec();
SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
FieldInfo proto = oneDocReader.getFieldInfos().fieldInfo("field");
FieldInfo field = new FieldInfo(proto.name, proto.number, proto.hasVectors(), proto.omitsNorms(), proto.hasPayloads(), proto.getIndexOptions(), proto.getDocValuesType(), proto.getDocValuesGen(), new HashMap<>(), proto.getPointDimensionCount(), proto.getPointNumBytes());
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { field });
SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, fieldInfos, null, new IOContext(new FlushInfo(1, 20)));
SegmentReadState readState = new SegmentReadState(dir, segmentInfo, fieldInfos, IOContext.READ);
// PostingsFormat
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
consumer.write(oneDocReader.fields());
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (FieldsProducer producer = codec.postingsFormat().fieldsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// DocValuesFormat
try (DocValuesConsumer consumer = codec.docValuesFormat().fieldsConsumer(writeState)) {
consumer.addNumericField(field, new EmptyDocValuesProducer() {
@Override
public NumericDocValues getNumeric(FieldInfo field) {
return new NumericDocValues() {
int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
docID++;
if (docID == 1) {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public int advance(int target) {
if (docID <= 0 && target == 0) {
docID = 0;
} else {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return target == 0;
}
@Override
public long cost() {
return 1;
}
@Override
public long longValue() {
return 5;
}
};
}
});
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (DocValuesProducer producer = codec.docValuesFormat().fieldsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// NormsFormat
try (NormsConsumer consumer = codec.normsFormat().normsConsumer(writeState)) {
consumer.addNormsField(field, new NormsProducer() {
@Override
public NumericDocValues getNorms(FieldInfo field) {
return new NumericDocValues() {
int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
docID++;
if (docID == 1) {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public int advance(int target) {
if (docID <= 0 && target == 0) {
docID = 0;
} else {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return target == 0;
}
@Override
public long cost() {
return 1;
}
@Override
public long longValue() {
return 5;
}
};
}
@Override
public void checkIntegrity() {
}
@Override
public void close() {
}
@Override
public long ramBytesUsed() {
return 0;
}
});
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (NormsProducer producer = codec.normsFormat().normsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// TermVectorsFormat
try (TermVectorsWriter consumer = codec.termVectorsFormat().vectorsWriter(dir, segmentInfo, writeState.context)) {
consumer.startDocument(1);
consumer.startField(field, 1, false, false, false);
consumer.startTerm(new BytesRef("testing"), 2);
consumer.finishTerm();
consumer.finishField();
consumer.finishDocument();
consumer.finish(fieldInfos, 1);
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (TermVectorsReader producer = codec.termVectorsFormat().vectorsReader(dir, segmentInfo, fieldInfos, readState.context)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// StoredFieldsFormat
try (StoredFieldsWriter consumer = codec.storedFieldsFormat().fieldsWriter(dir, segmentInfo, writeState.context)) {
consumer.startDocument();
consumer.writeField(field, customField);
consumer.finishDocument();
consumer.finish(fieldInfos, 1);
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (StoredFieldsReader producer = codec.storedFieldsFormat().fieldsReader(dir, segmentInfo, fieldInfos, readState.context)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
IOUtils.close(oneDocReader, oneDocIndex, dir);
}
use of org.apache.lucene.codecs.DocValuesConsumer in project lucene-solr by apache.
the class ReadersAndUpdates method handleBinaryDVUpdates.
@SuppressWarnings("synthetic-access")
private void handleBinaryDVUpdates(FieldInfos infos, Map<String, BinaryDocValuesFieldUpdates> updates, TrackingDirectoryWrapper dir, DocValuesFormat dvFormat, final SegmentReader reader, Map<Integer, Set<String>> fieldFiles) throws IOException {
for (Entry<String, BinaryDocValuesFieldUpdates> e : updates.entrySet()) {
final String field = e.getKey();
final BinaryDocValuesFieldUpdates fieldUpdates = e.getValue();
final long nextDocValuesGen = info.getNextDocValuesGen();
final String segmentSuffix = Long.toString(nextDocValuesGen, Character.MAX_RADIX);
final long estUpdatesSize = fieldUpdates.ramBytesPerDoc() * info.info.maxDoc();
final IOContext updatesContext = new IOContext(new FlushInfo(info.info.maxDoc(), estUpdatesSize));
final FieldInfo fieldInfo = infos.fieldInfo(field);
assert fieldInfo != null;
fieldInfo.setDocValuesGen(nextDocValuesGen);
final FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { fieldInfo });
// separately also track which files were created for this gen
final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix);
try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) {
// write the binary updates to a new gen'd docvalues file
fieldsConsumer.addBinaryField(fieldInfo, new EmptyDocValuesProducer() {
@Override
public BinaryDocValues getBinary(FieldInfo fieldInfoIn) throws IOException {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
final int maxDoc = reader.maxDoc();
final BinaryDocValuesFieldUpdates.Iterator updatesIter = fieldUpdates.iterator();
updatesIter.reset();
final BinaryDocValues currentValues = reader.getBinaryDocValues(field);
// Merge sort of the original doc values with updated doc values:
return new BinaryDocValues() {
// merged docID
private int docIDOut = -1;
// docID from our original doc values
private int docIDIn = -1;
// docID from our updates
private int updateDocID = -1;
private BytesRef value;
@Override
public int docID() {
return docIDOut;
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return currentValues.cost();
}
@Override
public BytesRef binaryValue() {
return value;
}
@Override
public int nextDoc() throws IOException {
if (docIDIn == docIDOut) {
if (currentValues == null) {
docIDIn = NO_MORE_DOCS;
} else {
docIDIn = currentValues.nextDoc();
}
}
if (updateDocID == docIDOut) {
updateDocID = updatesIter.nextDoc();
}
if (docIDIn < updateDocID) {
// no update to this doc
docIDOut = docIDIn;
value = currentValues.binaryValue();
} else {
docIDOut = updateDocID;
if (docIDOut != NO_MORE_DOCS) {
value = updatesIter.value();
}
}
return docIDOut;
}
};
}
});
}
info.advanceDocValuesGen();
assert !fieldFiles.containsKey(fieldInfo.number);
fieldFiles.put(fieldInfo.number, trackingDir.getCreatedFiles());
}
}
Aggregations