use of org.apache.lucene.codecs.TermVectorsWriter in project lucene-solr by apache.
the class TermVectorsConsumerPerField method finishDocument.
void finishDocument() throws IOException {
if (doVectors == false) {
return;
}
doVectors = false;
final int numPostings = bytesHash.size();
final BytesRef flushTerm = termsWriter.flushTerm;
assert numPostings >= 0;
// This is called once, after inverting all occurrences
// of a given field in the doc. At this point we flush
// our hash into the DocWriter.
TermVectorsPostingsArray postings = termVectorsPostingsArray;
final TermVectorsWriter tv = termsWriter.writer;
final int[] termIDs = sortPostings();
tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads);
final ByteSliceReader posReader = doVectorPositions ? termsWriter.vectorSliceReaderPos : null;
final ByteSliceReader offReader = doVectorOffsets ? termsWriter.vectorSliceReaderOff : null;
for (int j = 0; j < numPostings; j++) {
final int termID = termIDs[j];
final int freq = postings.freqs[termID];
// Get BytesRef
termBytePool.setBytesRef(flushTerm, postings.textStarts[termID]);
tv.startTerm(flushTerm, freq);
if (doVectorPositions || doVectorOffsets) {
if (posReader != null) {
initReader(posReader, termID, 0);
}
if (offReader != null) {
initReader(offReader, termID, 1);
}
tv.addProx(freq, posReader, offReader);
}
tv.finishTerm();
}
tv.finishField();
reset();
fieldInfo.setStoreTermVectors();
}
use of org.apache.lucene.codecs.TermVectorsWriter in project lucene-solr by apache.
the class BaseIndexFileFormatTestCase method testMultiClose.
/** Calls close multiple times on closeable codec apis */
public void testMultiClose() throws IOException {
// first make a one doc index
Directory oneDocIndex = applyCreatedVersionMajor(newDirectory());
IndexWriter iw = new IndexWriter(oneDocIndex, new IndexWriterConfig(new MockAnalyzer(random())));
Document oneDoc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
Field customField = new Field("field", "contents", customType);
oneDoc.add(customField);
oneDoc.add(new NumericDocValuesField("field", 5));
iw.addDocument(oneDoc);
LeafReader oneDocReader = getOnlyLeafReader(DirectoryReader.open(iw));
iw.close();
// now feed to codec apis manually
// we use FSDir, things like ramdir are not guaranteed to cause fails if you write to them after close(), etc
Directory dir = newFSDirectory(createTempDir("justSoYouGetSomeChannelErrors"));
Codec codec = getCodec();
SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
FieldInfo proto = oneDocReader.getFieldInfos().fieldInfo("field");
FieldInfo field = new FieldInfo(proto.name, proto.number, proto.hasVectors(), proto.omitsNorms(), proto.hasPayloads(), proto.getIndexOptions(), proto.getDocValuesType(), proto.getDocValuesGen(), new HashMap<>(), proto.getPointDimensionCount(), proto.getPointNumBytes());
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { field });
SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, fieldInfos, null, new IOContext(new FlushInfo(1, 20)));
SegmentReadState readState = new SegmentReadState(dir, segmentInfo, fieldInfos, IOContext.READ);
// PostingsFormat
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
consumer.write(oneDocReader.fields());
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (FieldsProducer producer = codec.postingsFormat().fieldsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// DocValuesFormat
try (DocValuesConsumer consumer = codec.docValuesFormat().fieldsConsumer(writeState)) {
consumer.addNumericField(field, new EmptyDocValuesProducer() {
@Override
public NumericDocValues getNumeric(FieldInfo field) {
return new NumericDocValues() {
int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
docID++;
if (docID == 1) {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public int advance(int target) {
if (docID <= 0 && target == 0) {
docID = 0;
} else {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return target == 0;
}
@Override
public long cost() {
return 1;
}
@Override
public long longValue() {
return 5;
}
};
}
});
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (DocValuesProducer producer = codec.docValuesFormat().fieldsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// NormsFormat
try (NormsConsumer consumer = codec.normsFormat().normsConsumer(writeState)) {
consumer.addNormsField(field, new NormsProducer() {
@Override
public NumericDocValues getNorms(FieldInfo field) {
return new NumericDocValues() {
int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
docID++;
if (docID == 1) {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public int advance(int target) {
if (docID <= 0 && target == 0) {
docID = 0;
} else {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return target == 0;
}
@Override
public long cost() {
return 1;
}
@Override
public long longValue() {
return 5;
}
};
}
@Override
public void checkIntegrity() {
}
@Override
public void close() {
}
@Override
public long ramBytesUsed() {
return 0;
}
});
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (NormsProducer producer = codec.normsFormat().normsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// TermVectorsFormat
try (TermVectorsWriter consumer = codec.termVectorsFormat().vectorsWriter(dir, segmentInfo, writeState.context)) {
consumer.startDocument(1);
consumer.startField(field, 1, false, false, false);
consumer.startTerm(new BytesRef("testing"), 2);
consumer.finishTerm();
consumer.finishField();
consumer.finishDocument();
consumer.finish(fieldInfos, 1);
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (TermVectorsReader producer = codec.termVectorsFormat().vectorsReader(dir, segmentInfo, fieldInfos, readState.context)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// StoredFieldsFormat
try (StoredFieldsWriter consumer = codec.storedFieldsFormat().fieldsWriter(dir, segmentInfo, writeState.context)) {
consumer.startDocument();
consumer.writeField(field, customField);
consumer.finishDocument();
consumer.finish(fieldInfos, 1);
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (StoredFieldsReader producer = codec.storedFieldsFormat().fieldsReader(dir, segmentInfo, fieldInfos, readState.context)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
IOUtils.close(oneDocReader, oneDocIndex, dir);
}
use of org.apache.lucene.codecs.TermVectorsWriter in project lucene-solr by apache.
the class SortingTermVectorsConsumer method flush.
@Override
void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
super.flush(fieldsToFlush, state, sortMap);
if (tmpDirectory != null) {
if (sortMap == null) {
// we're lucky the index is already sorted, just rename the temporary file and return
for (Map.Entry<String, String> entry : tmpDirectory.getTemporaryFiles().entrySet()) {
tmpDirectory.rename(entry.getValue(), entry.getKey());
}
return;
}
TermVectorsReader reader = docWriter.codec.termVectorsFormat().vectorsReader(tmpDirectory, state.segmentInfo, state.fieldInfos, IOContext.DEFAULT);
TermVectorsReader mergeReader = reader.getMergeInstance();
TermVectorsWriter writer = docWriter.codec.termVectorsFormat().vectorsWriter(state.directory, state.segmentInfo, IOContext.DEFAULT);
try {
reader.checkIntegrity();
for (int docID = 0; docID < state.segmentInfo.maxDoc(); docID++) {
Fields vectors = mergeReader.get(sortMap.newToOld(docID));
writeTermVectors(writer, vectors, state.fieldInfos);
}
writer.finish(state.fieldInfos, state.segmentInfo.maxDoc());
} finally {
IOUtils.close(reader, writer);
IOUtils.deleteFiles(tmpDirectory, tmpDirectory.getTemporaryFiles().values());
}
}
}
Aggregations