Search in sources :

Example 1 with TermVectorsWriter

use of org.apache.lucene.codecs.TermVectorsWriter in project lucene-solr by apache.

the class TermVectorsConsumerPerField method finishDocument.

void finishDocument() throws IOException {
    if (doVectors == false) {
        return;
    }
    doVectors = false;
    final int numPostings = bytesHash.size();
    final BytesRef flushTerm = termsWriter.flushTerm;
    assert numPostings >= 0;
    // This is called once, after inverting all occurrences
    // of a given field in the doc.  At this point we flush
    // our hash into the DocWriter.
    TermVectorsPostingsArray postings = termVectorsPostingsArray;
    final TermVectorsWriter tv = termsWriter.writer;
    final int[] termIDs = sortPostings();
    tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads);
    final ByteSliceReader posReader = doVectorPositions ? termsWriter.vectorSliceReaderPos : null;
    final ByteSliceReader offReader = doVectorOffsets ? termsWriter.vectorSliceReaderOff : null;
    for (int j = 0; j < numPostings; j++) {
        final int termID = termIDs[j];
        final int freq = postings.freqs[termID];
        // Get BytesRef
        termBytePool.setBytesRef(flushTerm, postings.textStarts[termID]);
        tv.startTerm(flushTerm, freq);
        if (doVectorPositions || doVectorOffsets) {
            if (posReader != null) {
                initReader(posReader, termID, 0);
            }
            if (offReader != null) {
                initReader(offReader, termID, 1);
            }
            tv.addProx(freq, posReader, offReader);
        }
        tv.finishTerm();
    }
    tv.finishField();
    reset();
    fieldInfo.setStoreTermVectors();
}
Also used : TermVectorsWriter(org.apache.lucene.codecs.TermVectorsWriter) BytesRef(org.apache.lucene.util.BytesRef)

Example 2 with TermVectorsWriter

use of org.apache.lucene.codecs.TermVectorsWriter in project lucene-solr by apache.

the class BaseIndexFileFormatTestCase method testMultiClose.

/** Calls close multiple times on closeable codec apis */
public void testMultiClose() throws IOException {
    // first make a one doc index
    Directory oneDocIndex = applyCreatedVersionMajor(newDirectory());
    IndexWriter iw = new IndexWriter(oneDocIndex, new IndexWriterConfig(new MockAnalyzer(random())));
    Document oneDoc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    Field customField = new Field("field", "contents", customType);
    oneDoc.add(customField);
    oneDoc.add(new NumericDocValuesField("field", 5));
    iw.addDocument(oneDoc);
    LeafReader oneDocReader = getOnlyLeafReader(DirectoryReader.open(iw));
    iw.close();
    // now feed to codec apis manually
    // we use FSDir, things like ramdir are not guaranteed to cause fails if you write to them after close(), etc
    Directory dir = newFSDirectory(createTempDir("justSoYouGetSomeChannelErrors"));
    Codec codec = getCodec();
    SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
    FieldInfo proto = oneDocReader.getFieldInfos().fieldInfo("field");
    FieldInfo field = new FieldInfo(proto.name, proto.number, proto.hasVectors(), proto.omitsNorms(), proto.hasPayloads(), proto.getIndexOptions(), proto.getDocValuesType(), proto.getDocValuesGen(), new HashMap<>(), proto.getPointDimensionCount(), proto.getPointNumBytes());
    FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { field });
    SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, fieldInfos, null, new IOContext(new FlushInfo(1, 20)));
    SegmentReadState readState = new SegmentReadState(dir, segmentInfo, fieldInfos, IOContext.READ);
    // PostingsFormat
    try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
        consumer.write(oneDocReader.fields());
        IOUtils.close(consumer);
        IOUtils.close(consumer);
    }
    try (FieldsProducer producer = codec.postingsFormat().fieldsProducer(readState)) {
        IOUtils.close(producer);
        IOUtils.close(producer);
    }
    // DocValuesFormat
    try (DocValuesConsumer consumer = codec.docValuesFormat().fieldsConsumer(writeState)) {
        consumer.addNumericField(field, new EmptyDocValuesProducer() {

            @Override
            public NumericDocValues getNumeric(FieldInfo field) {
                return new NumericDocValues() {

                    int docID = -1;

                    @Override
                    public int docID() {
                        return docID;
                    }

                    @Override
                    public int nextDoc() {
                        docID++;
                        if (docID == 1) {
                            docID = NO_MORE_DOCS;
                        }
                        return docID;
                    }

                    @Override
                    public int advance(int target) {
                        if (docID <= 0 && target == 0) {
                            docID = 0;
                        } else {
                            docID = NO_MORE_DOCS;
                        }
                        return docID;
                    }

                    @Override
                    public boolean advanceExact(int target) throws IOException {
                        docID = target;
                        return target == 0;
                    }

                    @Override
                    public long cost() {
                        return 1;
                    }

                    @Override
                    public long longValue() {
                        return 5;
                    }
                };
            }
        });
        IOUtils.close(consumer);
        IOUtils.close(consumer);
    }
    try (DocValuesProducer producer = codec.docValuesFormat().fieldsProducer(readState)) {
        IOUtils.close(producer);
        IOUtils.close(producer);
    }
    // NormsFormat
    try (NormsConsumer consumer = codec.normsFormat().normsConsumer(writeState)) {
        consumer.addNormsField(field, new NormsProducer() {

            @Override
            public NumericDocValues getNorms(FieldInfo field) {
                return new NumericDocValues() {

                    int docID = -1;

                    @Override
                    public int docID() {
                        return docID;
                    }

                    @Override
                    public int nextDoc() {
                        docID++;
                        if (docID == 1) {
                            docID = NO_MORE_DOCS;
                        }
                        return docID;
                    }

                    @Override
                    public int advance(int target) {
                        if (docID <= 0 && target == 0) {
                            docID = 0;
                        } else {
                            docID = NO_MORE_DOCS;
                        }
                        return docID;
                    }

                    @Override
                    public boolean advanceExact(int target) throws IOException {
                        docID = target;
                        return target == 0;
                    }

                    @Override
                    public long cost() {
                        return 1;
                    }

                    @Override
                    public long longValue() {
                        return 5;
                    }
                };
            }

            @Override
            public void checkIntegrity() {
            }

            @Override
            public void close() {
            }

            @Override
            public long ramBytesUsed() {
                return 0;
            }
        });
        IOUtils.close(consumer);
        IOUtils.close(consumer);
    }
    try (NormsProducer producer = codec.normsFormat().normsProducer(readState)) {
        IOUtils.close(producer);
        IOUtils.close(producer);
    }
    // TermVectorsFormat
    try (TermVectorsWriter consumer = codec.termVectorsFormat().vectorsWriter(dir, segmentInfo, writeState.context)) {
        consumer.startDocument(1);
        consumer.startField(field, 1, false, false, false);
        consumer.startTerm(new BytesRef("testing"), 2);
        consumer.finishTerm();
        consumer.finishField();
        consumer.finishDocument();
        consumer.finish(fieldInfos, 1);
        IOUtils.close(consumer);
        IOUtils.close(consumer);
    }
    try (TermVectorsReader producer = codec.termVectorsFormat().vectorsReader(dir, segmentInfo, fieldInfos, readState.context)) {
        IOUtils.close(producer);
        IOUtils.close(producer);
    }
    // StoredFieldsFormat
    try (StoredFieldsWriter consumer = codec.storedFieldsFormat().fieldsWriter(dir, segmentInfo, writeState.context)) {
        consumer.startDocument();
        consumer.writeField(field, customField);
        consumer.finishDocument();
        consumer.finish(fieldInfos, 1);
        IOUtils.close(consumer);
        IOUtils.close(consumer);
    }
    try (StoredFieldsReader producer = codec.storedFieldsFormat().fieldsReader(dir, segmentInfo, fieldInfos, readState.context)) {
        IOUtils.close(producer);
        IOUtils.close(producer);
    }
    IOUtils.close(oneDocReader, oneDocIndex, dir);
}
Also used : FieldsConsumer(org.apache.lucene.codecs.FieldsConsumer) Document(org.apache.lucene.document.Document) TermVectorsReader(org.apache.lucene.codecs.TermVectorsReader) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Codec(org.apache.lucene.codecs.Codec) StoredFieldsReader(org.apache.lucene.codecs.StoredFieldsReader) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) DocValuesProducer(org.apache.lucene.codecs.DocValuesProducer) IOContext(org.apache.lucene.store.IOContext) NormsProducer(org.apache.lucene.codecs.NormsProducer) BytesRef(org.apache.lucene.util.BytesRef) StoredFieldsWriter(org.apache.lucene.codecs.StoredFieldsWriter) Directory(org.apache.lucene.store.Directory) DocValuesConsumer(org.apache.lucene.codecs.DocValuesConsumer) FieldsProducer(org.apache.lucene.codecs.FieldsProducer) IOException(java.io.IOException) FieldType(org.apache.lucene.document.FieldType) FlushInfo(org.apache.lucene.store.FlushInfo) TermVectorsWriter(org.apache.lucene.codecs.TermVectorsWriter) NormsConsumer(org.apache.lucene.codecs.NormsConsumer)

Example 3 with TermVectorsWriter

use of org.apache.lucene.codecs.TermVectorsWriter in project lucene-solr by apache.

the class SortingTermVectorsConsumer method flush.

@Override
void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
    super.flush(fieldsToFlush, state, sortMap);
    if (tmpDirectory != null) {
        if (sortMap == null) {
            // we're lucky the index is already sorted, just rename the temporary file and return
            for (Map.Entry<String, String> entry : tmpDirectory.getTemporaryFiles().entrySet()) {
                tmpDirectory.rename(entry.getValue(), entry.getKey());
            }
            return;
        }
        TermVectorsReader reader = docWriter.codec.termVectorsFormat().vectorsReader(tmpDirectory, state.segmentInfo, state.fieldInfos, IOContext.DEFAULT);
        TermVectorsReader mergeReader = reader.getMergeInstance();
        TermVectorsWriter writer = docWriter.codec.termVectorsFormat().vectorsWriter(state.directory, state.segmentInfo, IOContext.DEFAULT);
        try {
            reader.checkIntegrity();
            for (int docID = 0; docID < state.segmentInfo.maxDoc(); docID++) {
                Fields vectors = mergeReader.get(sortMap.newToOld(docID));
                writeTermVectors(writer, vectors, state.fieldInfos);
            }
            writer.finish(state.fieldInfos, state.segmentInfo.maxDoc());
        } finally {
            IOUtils.close(reader, writer);
            IOUtils.deleteFiles(tmpDirectory, tmpDirectory.getTemporaryFiles().values());
        }
    }
}
Also used : TermVectorsWriter(org.apache.lucene.codecs.TermVectorsWriter) Map(java.util.Map) TermVectorsReader(org.apache.lucene.codecs.TermVectorsReader)

Aggregations

TermVectorsWriter (org.apache.lucene.codecs.TermVectorsWriter)3 TermVectorsReader (org.apache.lucene.codecs.TermVectorsReader)2 BytesRef (org.apache.lucene.util.BytesRef)2 IOException (java.io.IOException)1 Map (java.util.Map)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 Codec (org.apache.lucene.codecs.Codec)1 DocValuesConsumer (org.apache.lucene.codecs.DocValuesConsumer)1 DocValuesProducer (org.apache.lucene.codecs.DocValuesProducer)1 FieldsConsumer (org.apache.lucene.codecs.FieldsConsumer)1 FieldsProducer (org.apache.lucene.codecs.FieldsProducer)1 NormsConsumer (org.apache.lucene.codecs.NormsConsumer)1 NormsProducer (org.apache.lucene.codecs.NormsProducer)1 StoredFieldsReader (org.apache.lucene.codecs.StoredFieldsReader)1 StoredFieldsWriter (org.apache.lucene.codecs.StoredFieldsWriter)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 FieldType (org.apache.lucene.document.FieldType)1 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)1 TextField (org.apache.lucene.document.TextField)1