Search in sources :

Example 41 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class TermVectorsWriter method addAllDocVectors.

/** Safe (but, slowish) default method to write every
   *  vector field in the document. */
protected final void addAllDocVectors(Fields vectors, MergeState mergeState) throws IOException {
    if (vectors == null) {
    int numFields = vectors.size();
    if (numFields == -1) {
        // count manually! TODO: Maybe enforce that Fields.size() returns something valid?
        numFields = 0;
        for (final Iterator<String> it = vectors.iterator(); it.hasNext(); ) {
    String lastFieldName = null;
    TermsEnum termsEnum = null;
    PostingsEnum docsAndPositionsEnum = null;
    int fieldCount = 0;
    for (String fieldName : vectors) {
        final FieldInfo fieldInfo = mergeState.mergeFieldInfos.fieldInfo(fieldName);
        assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0 : "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
        lastFieldName = fieldName;
        final Terms terms = vectors.terms(fieldName);
        if (terms == null) {
            // FieldsEnum shouldn't lie...
        final boolean hasPositions = terms.hasPositions();
        final boolean hasOffsets = terms.hasOffsets();
        final boolean hasPayloads = terms.hasPayloads();
        assert !hasPayloads || hasPositions;
        int numTerms = (int) terms.size();
        if (numTerms == -1) {
            // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function
            numTerms = 0;
            termsEnum = terms.iterator();
            while ( != null) {
        startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
        termsEnum = terms.iterator();
        int termCount = 0;
        while ( != null) {
            final int freq = (int) termsEnum.totalTermFreq();
            startTerm(termsEnum.term(), freq);
            if (hasPositions || hasOffsets) {
                docsAndPositionsEnum = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS | PostingsEnum.PAYLOADS);
                assert docsAndPositionsEnum != null;
                final int docID = docsAndPositionsEnum.nextDoc();
                assert docID != DocIdSetIterator.NO_MORE_DOCS;
                assert docsAndPositionsEnum.freq() == freq;
                for (int posUpto = 0; posUpto < freq; posUpto++) {
                    final int pos = docsAndPositionsEnum.nextPosition();
                    final int startOffset = docsAndPositionsEnum.startOffset();
                    final int endOffset = docsAndPositionsEnum.endOffset();
                    final BytesRef payload = docsAndPositionsEnum.getPayload();
                    assert !hasPositions || pos >= 0;
                    addPosition(pos, startOffset, endOffset, payload);
        assert termCount == numTerms;
    assert fieldCount == numFields;
Also used : Terms(org.apache.lucene.index.Terms) PostingsEnum(org.apache.lucene.index.PostingsEnum) FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 42 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class SimpleTextFieldsWriter method write.

public void write(FieldInfos fieldInfos, Fields fields) throws IOException {
    // for each field
    for (String field : fields) {
        Terms terms = fields.terms(field);
        if (terms == null) {
            // Annoyingly, this can happen!
        FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
        boolean wroteField = false;
        boolean hasPositions = terms.hasPositions();
        boolean hasFreqs = terms.hasFreqs();
        boolean hasPayloads = fieldInfo.hasPayloads();
        boolean hasOffsets = terms.hasOffsets();
        int flags = 0;
        if (hasPositions) {
            flags = PostingsEnum.POSITIONS;
            if (hasPayloads) {
                flags = flags | PostingsEnum.PAYLOADS;
            if (hasOffsets) {
                flags = flags | PostingsEnum.OFFSETS;
        } else {
            if (hasFreqs) {
                flags = flags | PostingsEnum.FREQS;
        TermsEnum termsEnum = terms.iterator();
        PostingsEnum postingsEnum = null;
        // for each term in field
        while (true) {
            BytesRef term =;
            if (term == null) {
            postingsEnum = termsEnum.postings(postingsEnum, flags);
            assert postingsEnum != null : "termsEnum=" + termsEnum + " hasPos=" + hasPositions + " flags=" + flags;
            boolean wroteTerm = false;
            // for each doc in field+term
            while (true) {
                int doc = postingsEnum.nextDoc();
                if (doc == PostingsEnum.NO_MORE_DOCS) {
                if (!wroteTerm) {
                    if (!wroteField) {
                        // we lazily do this, in case the field had
                        // no terms              
                        wroteField = true;
                    // we lazily do this, in case the term had
                    // zero docs
                    wroteTerm = true;
                if (hasFreqs) {
                    int freq = postingsEnum.freq();
                    if (hasPositions) {
                        // for assert:
                        int lastStartOffset = 0;
                        // for each pos in field+term+doc
                        for (int i = 0; i < freq; i++) {
                            int position = postingsEnum.nextPosition();
                            if (hasOffsets) {
                                int startOffset = postingsEnum.startOffset();
                                int endOffset = postingsEnum.endOffset();
                                assert endOffset >= startOffset;
                                assert startOffset >= lastStartOffset : "startOffset=" + startOffset + " lastStartOffset=" + lastStartOffset;
                                lastStartOffset = startOffset;
                            BytesRef payload = postingsEnum.getPayload();
                            if (payload != null && payload.length > 0) {
                                assert payload.length != 0;
Also used : Terms(org.apache.lucene.index.Terms) PostingsEnum(org.apache.lucene.index.PostingsEnum) FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 43 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class SimpleTextStoredFieldsReader method visitDocument.

public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {[n]);
    while (true) {
        if (StringHelper.startsWith(scratch.get(), FIELD) == false) {
        int fieldNumber = parseIntAt(FIELD.length);
        FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
        assert StringHelper.startsWith(scratch.get(), NAME);
        assert StringHelper.startsWith(scratch.get(), TYPE);
        final BytesRef type;
        if (equalsAt(TYPE_STRING, scratch.get(), TYPE.length)) {
            type = TYPE_STRING;
        } else if (equalsAt(TYPE_BINARY, scratch.get(), TYPE.length)) {
            type = TYPE_BINARY;
        } else if (equalsAt(TYPE_INT, scratch.get(), TYPE.length)) {
            type = TYPE_INT;
        } else if (equalsAt(TYPE_LONG, scratch.get(), TYPE.length)) {
            type = TYPE_LONG;
        } else if (equalsAt(TYPE_FLOAT, scratch.get(), TYPE.length)) {
            type = TYPE_FLOAT;
        } else if (equalsAt(TYPE_DOUBLE, scratch.get(), TYPE.length)) {
            type = TYPE_DOUBLE;
        } else {
            throw new RuntimeException("unknown field type");
        switch(visitor.needsField(fieldInfo)) {
            case YES:
                readField(type, fieldInfo, visitor);
            case NO:
                assert StringHelper.startsWith(scratch.get(), VALUE);
            case STOP:
Also used : FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef)

Example 44 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class PointsWriter method mergeOneField.

/** Default naive merge implementation for one field: it just re-indexes all the values
   *  from the incoming segment.  The default codec overrides this for 1D fields and uses
   *  a faster but more complex implementation. */
protected void mergeOneField(MergeState mergeState, FieldInfo fieldInfo) throws IOException {
    long maxPointCount = 0;
    int docCount = 0;
    for (int i = 0; i < mergeState.pointsReaders.length; i++) {
        PointsReader pointsReader = mergeState.pointsReaders[i];
        if (pointsReader != null) {
            FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(;
            if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                PointValues values = pointsReader.getValues(;
                if (values != null) {
                    maxPointCount += values.size();
                    docCount += values.getDocCount();
    final long finalMaxPointCount = maxPointCount;
    final int finalDocCount = docCount;
    writeField(fieldInfo, new PointsReader() {

        public long ramBytesUsed() {
            return 0;

        public void close() throws IOException {

        public PointValues getValues(String fieldName) {
            if (fieldName.equals( == false) {
                throw new IllegalArgumentException("field name must match the field being merged");
            return new PointValues() {

                public void intersect(IntersectVisitor mergedVisitor) throws IOException {
                    for (int i = 0; i < mergeState.pointsReaders.length; i++) {
                        PointsReader pointsReader = mergeState.pointsReaders[i];
                        if (pointsReader == null) {
                            // This segment has no points
                        FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldName);
                        if (readerFieldInfo == null) {
                            // This segment never saw this field
                        if (readerFieldInfo.getPointDimensionCount() == 0) {
                            // This segment saw this field, but the field did not index points in it:
                        PointValues values = pointsReader.getValues(fieldName);
                        if (values == null) {
                        MergeState.DocMap docMap = mergeState.docMaps[i];
                        values.intersect(new IntersectVisitor() {

                            public void visit(int docID) {
                                // Should never be called because our compare method never returns Relation.CELL_INSIDE_QUERY
                                throw new IllegalStateException();

                            public void visit(int docID, byte[] packedValue) throws IOException {
                                int newDocID = docMap.get(docID);
                                if (newDocID != -1) {
                                    // Not deleted:
                                    mergedVisitor.visit(newDocID, packedValue);

                            public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                                // Forces this segment's PointsReader to always visit all docs + values:
                                return Relation.CELL_CROSSES_QUERY;

                public long estimatePointCount(IntersectVisitor visitor) {
                    throw new UnsupportedOperationException();

                public byte[] getMinPackedValue() {
                    throw new UnsupportedOperationException();

                public byte[] getMaxPackedValue() {
                    throw new UnsupportedOperationException();

                public int getNumDimensions() {
                    throw new UnsupportedOperationException();

                public int getBytesPerDimension() {
                    throw new UnsupportedOperationException();

                public long size() {
                    return finalMaxPointCount;

                public int getDocCount() {
                    return finalDocCount;

        public void checkIntegrity() throws IOException {
            throw new UnsupportedOperationException();
Also used : IOException( PointValues(org.apache.lucene.index.PointValues) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 45 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class DocValuesConsumer method mergeSortedField.

   * Merges the sorted docvalues from <code>toMerge</code>.
   * <p>
   * The default implementation calls {@link #addSortedField}, passing
   * an Iterable that merges ordinals and values and filters deleted documents .
public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState) throws IOException {
    List<SortedDocValues> toMerge = new ArrayList<>();
    for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
        SortedDocValues values = null;
        DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
        if (docValuesProducer != null) {
            FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(;
            if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
                values = docValuesProducer.getSorted(fieldInfo);
        if (values == null) {
            values = DocValues.emptySorted();
    final int numReaders = toMerge.size();
    final SortedDocValues[] dvs = toMerge.toArray(new SortedDocValues[numReaders]);
    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum[] liveTerms = new TermsEnum[dvs.length];
    long[] weights = new long[liveTerms.length];
    for (int sub = 0; sub < numReaders; sub++) {
        SortedDocValues dv = dvs[sub];
        Bits liveDocs = mergeState.liveDocs[sub];
        if (liveDocs == null) {
            liveTerms[sub] = dv.termsEnum();
            weights[sub] = dv.getValueCount();
        } else {
            LongBitSet bitset = new LongBitSet(dv.getValueCount());
            int docID;
            while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
                if (liveDocs.get(docID)) {
                    int ord = dv.ordValue();
                    if (ord >= 0) {
            liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
            weights[sub] = bitset.cardinality();
    // step 2: create ordinal map (this conceptually does the "merging")
    final OrdinalMap map =, liveTerms, weights, PackedInts.COMPACT);
    // step 3: add field
    addSortedField(fieldInfo, new EmptyDocValuesProducer() {

        public SortedDocValues getSorted(FieldInfo fieldInfoIn) throws IOException {
            if (fieldInfoIn != fieldInfo) {
                throw new IllegalArgumentException("wrong FieldInfo");
            // We must make new iterators + DocIDMerger for each iterator:
            List<SortedDocValuesSub> subs = new ArrayList<>();
            long cost = 0;
            for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
                SortedDocValues values = null;
                DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
                if (docValuesProducer != null) {
                    FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(;
                    if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
                        values = docValuesProducer.getSorted(readerFieldInfo);
                if (values == null) {
                    values = DocValues.emptySorted();
                cost += values.cost();
                subs.add(new SortedDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
            final long finalCost = cost;
            final DocIDMerger<SortedDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
            return new SortedDocValues() {

                private int docID = -1;

                private int ord;

                public int docID() {
                    return docID;

                public int nextDoc() throws IOException {
                    SortedDocValuesSub sub =;
                    if (sub == null) {
                        return docID = NO_MORE_DOCS;
                    int subOrd = sub.values.ordValue();
                    assert subOrd != -1;
                    ord = (int);
                    docID = sub.mappedDocID;
                    return docID;

                public int ordValue() {
                    return ord;

                public int advance(int target) {
                    throw new UnsupportedOperationException();

                public boolean advanceExact(int target) throws IOException {
                    throw new UnsupportedOperationException();

                public long cost() {
                    return finalCost;

                public int getValueCount() {
                    return (int) map.getValueCount();

                public BytesRef lookupOrd(int ord) throws IOException {
                    int segmentNumber = map.getFirstSegmentNumber(ord);
                    int segmentOrd = (int) map.getFirstSegmentOrd(ord);
                    return dvs[segmentNumber].lookupOrd(segmentOrd);
Also used : ArrayList(java.util.ArrayList) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) LongBitSet(org.apache.lucene.util.LongBitSet) IOException( SortedDocValues(org.apache.lucene.index.SortedDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) TermsEnum(org.apache.lucene.index.TermsEnum) FilteredTermsEnum(org.apache.lucene.index.FilteredTermsEnum) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) DocIDMerger(org.apache.lucene.index.DocIDMerger) Bits(org.apache.lucene.util.Bits) ArrayList(java.util.ArrayList) List(java.util.List) FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef)


FieldInfo (org.apache.lucene.index.FieldInfo)53 BytesRef (org.apache.lucene.util.BytesRef)13 LeafReader (org.apache.lucene.index.LeafReader)12 ArrayList (java.util.ArrayList)10 Terms (org.apache.lucene.index.Terms)9 TermsEnum (org.apache.lucene.index.TermsEnum)9 IOException ( FieldInfos (org.apache.lucene.index.FieldInfos)8 HashMap (java.util.HashMap)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 DocValuesType (org.apache.lucene.index.DocValuesType)6 PointValues (org.apache.lucene.index.PointValues)6 IndexOutput ( CorruptIndexException (org.apache.lucene.index.CorruptIndexException)5 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)5 StoredFieldVisitor (org.apache.lucene.index.StoredFieldVisitor)5 Map (java.util.Map)4 Document (org.apache.lucene.document.Document)4 EmptyDocValuesProducer (org.apache.lucene.index.EmptyDocValuesProducer)4 IndexReader (org.apache.lucene.index.IndexReader)4