use of org.apache.lucene.codecs.TermVectorsReader in project lucene-solr by apache.
the class BaseIndexFileFormatTestCase method testMultiClose.
/** Calls close multiple times on closeable codec apis */
public void testMultiClose() throws IOException {
// first make a one doc index
Directory oneDocIndex = applyCreatedVersionMajor(newDirectory());
IndexWriter iw = new IndexWriter(oneDocIndex, new IndexWriterConfig(new MockAnalyzer(random())));
Document oneDoc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
Field customField = new Field("field", "contents", customType);
oneDoc.add(customField);
oneDoc.add(new NumericDocValuesField("field", 5));
iw.addDocument(oneDoc);
LeafReader oneDocReader = getOnlyLeafReader(DirectoryReader.open(iw));
iw.close();
// now feed to codec apis manually
// we use FSDir, things like ramdir are not guaranteed to cause fails if you write to them after close(), etc
Directory dir = newFSDirectory(createTempDir("justSoYouGetSomeChannelErrors"));
Codec codec = getCodec();
SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
FieldInfo proto = oneDocReader.getFieldInfos().fieldInfo("field");
FieldInfo field = new FieldInfo(proto.name, proto.number, proto.hasVectors(), proto.omitsNorms(), proto.hasPayloads(), proto.getIndexOptions(), proto.getDocValuesType(), proto.getDocValuesGen(), new HashMap<>(), proto.getPointDimensionCount(), proto.getPointNumBytes());
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { field });
SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, fieldInfos, null, new IOContext(new FlushInfo(1, 20)));
SegmentReadState readState = new SegmentReadState(dir, segmentInfo, fieldInfos, IOContext.READ);
// PostingsFormat
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
consumer.write(oneDocReader.fields());
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (FieldsProducer producer = codec.postingsFormat().fieldsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// DocValuesFormat
try (DocValuesConsumer consumer = codec.docValuesFormat().fieldsConsumer(writeState)) {
consumer.addNumericField(field, new EmptyDocValuesProducer() {
@Override
public NumericDocValues getNumeric(FieldInfo field) {
return new NumericDocValues() {
int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
docID++;
if (docID == 1) {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public int advance(int target) {
if (docID <= 0 && target == 0) {
docID = 0;
} else {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return target == 0;
}
@Override
public long cost() {
return 1;
}
@Override
public long longValue() {
return 5;
}
};
}
});
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (DocValuesProducer producer = codec.docValuesFormat().fieldsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// NormsFormat
try (NormsConsumer consumer = codec.normsFormat().normsConsumer(writeState)) {
consumer.addNormsField(field, new NormsProducer() {
@Override
public NumericDocValues getNorms(FieldInfo field) {
return new NumericDocValues() {
int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
docID++;
if (docID == 1) {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public int advance(int target) {
if (docID <= 0 && target == 0) {
docID = 0;
} else {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return target == 0;
}
@Override
public long cost() {
return 1;
}
@Override
public long longValue() {
return 5;
}
};
}
@Override
public void checkIntegrity() {
}
@Override
public void close() {
}
@Override
public long ramBytesUsed() {
return 0;
}
});
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (NormsProducer producer = codec.normsFormat().normsProducer(readState)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// TermVectorsFormat
try (TermVectorsWriter consumer = codec.termVectorsFormat().vectorsWriter(dir, segmentInfo, writeState.context)) {
consumer.startDocument(1);
consumer.startField(field, 1, false, false, false);
consumer.startTerm(new BytesRef("testing"), 2);
consumer.finishTerm();
consumer.finishField();
consumer.finishDocument();
consumer.finish(fieldInfos, 1);
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (TermVectorsReader producer = codec.termVectorsFormat().vectorsReader(dir, segmentInfo, fieldInfos, readState.context)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
// StoredFieldsFormat
try (StoredFieldsWriter consumer = codec.storedFieldsFormat().fieldsWriter(dir, segmentInfo, writeState.context)) {
consumer.startDocument();
consumer.writeField(field, customField);
consumer.finishDocument();
consumer.finish(fieldInfos, 1);
IOUtils.close(consumer);
IOUtils.close(consumer);
}
try (StoredFieldsReader producer = codec.storedFieldsFormat().fieldsReader(dir, segmentInfo, fieldInfos, readState.context)) {
IOUtils.close(producer);
IOUtils.close(producer);
}
IOUtils.close(oneDocReader, oneDocIndex, dir);
}
use of org.apache.lucene.codecs.TermVectorsReader in project lucene-solr by apache.
the class TestTermVectorsReader method testDocsEnum.
public void testDocsEnum() throws IOException {
TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg.info, fieldInfos, newIOContext(random()));
for (int j = 0; j < 5; j++) {
Terms vector = reader.get(j).terms(testFields[0]);
assertNotNull(vector);
assertEquals(testTerms.length, vector.size());
TermsEnum termsEnum = vector.iterator();
PostingsEnum postingsEnum = null;
for (int i = 0; i < testTerms.length; i++) {
final BytesRef text = termsEnum.next();
assertNotNull(text);
String term = text.utf8ToString();
//System.out.println("Term: " + term);
assertEquals(testTerms[i], term);
postingsEnum = TestUtil.docs(random(), termsEnum, postingsEnum, PostingsEnum.NONE);
assertNotNull(postingsEnum);
int doc = postingsEnum.docID();
assertEquals(-1, doc);
assertTrue(postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc());
}
assertNull(termsEnum.next());
}
reader.close();
}
use of org.apache.lucene.codecs.TermVectorsReader in project lucene-solr by apache.
the class TestTermVectorsReader method testPositionReader.
public void testPositionReader() throws IOException {
TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg.info, fieldInfos, newIOContext(random()));
BytesRef[] terms;
Terms vector = reader.get(0).terms(testFields[0]);
assertNotNull(vector);
assertEquals(testTerms.length, vector.size());
TermsEnum termsEnum = vector.iterator();
PostingsEnum dpEnum = null;
for (int i = 0; i < testTerms.length; i++) {
final BytesRef text = termsEnum.next();
assertNotNull(text);
String term = text.utf8ToString();
//System.out.println("Term: " + term);
assertEquals(testTerms[i], term);
dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL);
assertNotNull(dpEnum);
int doc = dpEnum.docID();
assertEquals(-1, doc);
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(dpEnum.freq(), positions[i].length);
for (int j = 0; j < positions[i].length; j++) {
assertEquals(positions[i][j], dpEnum.nextPosition());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());
dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL);
doc = dpEnum.docID();
assertEquals(-1, doc);
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertNotNull(dpEnum);
assertEquals(dpEnum.freq(), positions[i].length);
for (int j = 0; j < positions[i].length; j++) {
assertEquals(positions[i][j], dpEnum.nextPosition());
assertEquals(j * 10, dpEnum.startOffset());
assertEquals(j * 10 + testTerms[i].length(), dpEnum.endOffset());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());
}
//no pos, no offset
Terms freqVector = reader.get(0).terms(testFields[1]);
assertNotNull(freqVector);
assertEquals(testTerms.length, freqVector.size());
termsEnum = freqVector.iterator();
assertNotNull(termsEnum);
for (int i = 0; i < testTerms.length; i++) {
final BytesRef text = termsEnum.next();
assertNotNull(text);
String term = text.utf8ToString();
//System.out.println("Term: " + term);
assertEquals(testTerms[i], term);
assertNotNull(termsEnum.postings(null));
assertNotNull(termsEnum.postings(null, PostingsEnum.ALL));
}
reader.close();
}
use of org.apache.lucene.codecs.TermVectorsReader in project lucene-solr by apache.
the class CodecReader method getTermVectors.
@Override
public final Fields getTermVectors(int docID) throws IOException {
TermVectorsReader termVectorsReader = getTermVectorsReader();
if (termVectorsReader == null) {
return null;
}
checkBounds(docID);
return termVectorsReader.get(docID);
}
Aggregations