Search in sources :

Example 31 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class FieldCacheImpl method getTerms.

public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
    BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
    if (valuesIn == null) {
        valuesIn = reader.getSortedDocValues(field);
    }
    if (valuesIn != null) {
        // per-thread by SegmentReader):
        return valuesIn;
    }
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
        return DocValues.emptyBinary();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
        throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
        return DocValues.emptyBinary();
    }
    BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
    return impl.iterator();
}
Also used : BinaryDocValues(org.apache.lucene.index.BinaryDocValues) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 32 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class FieldCacheImpl method getDocTermOrds.

// TODO: this if DocTermsIndex was already created, we
// should share it...
public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException {
    // not a general purpose filtering mechanism...
    assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX;
    SortedSetDocValues dv = reader.getSortedSetDocValues(field);
    if (dv != null) {
        return dv;
    }
    SortedDocValues sdv = reader.getSortedDocValues(field);
    if (sdv != null) {
        return DocValues.singleton(sdv);
    }
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
        return DocValues.emptySortedSet();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
        throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
        return DocValues.emptySortedSet();
    }
    // ok we need to uninvert. check if we can optimize a bit.
    Terms terms = reader.terms(field);
    if (terms == null) {
        return DocValues.emptySortedSet();
    } else {
        // if #postings = #docswithfield we know that the field is "single valued enough".
        // it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency.
        // it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf
        long numPostings = terms.getSumDocFreq();
        if (numPostings != -1 && numPostings == terms.getDocCount()) {
            return DocValues.singleton(getTermsIndex(reader, field));
        }
    }
    DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix));
    return dto.iterator(reader);
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Terms(org.apache.lucene.index.Terms) SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 33 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class FieldCacheImpl method getTermsIndex.

public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
    SortedDocValues valuesIn = reader.getSortedDocValues(field);
    if (valuesIn != null) {
        // per-thread by SegmentReader):
        return valuesIn;
    } else {
        final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
        if (info == null) {
            return DocValues.emptySorted();
        } else if (info.getDocValuesType() != DocValuesType.NONE) {
            // values because dedup can be very costly
            throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
        } else if (info.getIndexOptions() == IndexOptions.NONE) {
            return DocValues.emptySorted();
        }
        SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
        return impl.iterator();
    }
}
Also used : SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 34 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class Lucene70DocValuesConsumer method addSortedSetField.

@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    meta.writeInt(field.number);
    meta.writeByte(Lucene70DocValuesFormat.SORTED_SET);
    SortedSetDocValues values = valuesProducer.getSortedSet(field);
    int numDocsWithField = 0;
    long numOrds = 0;
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        numDocsWithField++;
        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
            numOrds++;
        }
    }
    if (numDocsWithField == numOrds) {
        meta.writeByte((byte) 0);
        doAddSortedField(field, new EmptyDocValuesProducer() {

            @Override
            public SortedDocValues getSorted(FieldInfo field) throws IOException {
                return SortedSetSelector.wrap(valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
            }
        });
        return;
    }
    meta.writeByte((byte) 1);
    assert numDocsWithField != 0;
    if (numDocsWithField == maxDoc) {
        meta.writeLong(-1);
        meta.writeLong(0L);
    } else {
        long offset = data.getFilePointer();
        meta.writeLong(offset);
        values = valuesProducer.getSortedSet(field);
        IndexedDISI.writeBitSet(values, data);
        meta.writeLong(data.getFilePointer() - offset);
    }
    int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
    meta.writeByte((byte) numberOfBitsPerOrd);
    long start = data.getFilePointer();
    meta.writeLong(start);
    DirectWriter writer = DirectWriter.getInstance(data, numOrds, numberOfBitsPerOrd);
    values = valuesProducer.getSortedSet(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
            writer.add(ord);
        }
    }
    writer.finish();
    meta.writeLong(data.getFilePointer() - start);
    meta.writeInt(numDocsWithField);
    start = data.getFilePointer();
    meta.writeLong(start);
    meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
    final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
    long addr = 0;
    addressesWriter.add(addr);
    values = valuesProducer.getSortedSet(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        values.nextOrd();
        addr++;
        while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
            addr++;
        }
        addressesWriter.add(addr);
    }
    addressesWriter.finish();
    meta.writeLong(data.getFilePointer() - start);
    addTermsDict(values);
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) DirectWriter(org.apache.lucene.util.packed.DirectWriter) IOException(java.io.IOException) DirectMonotonicWriter(org.apache.lucene.util.packed.DirectMonotonicWriter) FieldInfo(org.apache.lucene.index.FieldInfo) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 35 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class Lucene60PointsWriter method merge.

@Override
public void merge(MergeState mergeState) throws IOException {
    /**
     * If indexSort is activated and some of the leaves are not sorted the next test will catch that and the non-optimized merge will run.
     * If the readers are all sorted then it's safe to perform a bulk merge of the points.
     **/
    for (PointsReader reader : mergeState.pointsReaders) {
        if (reader instanceof Lucene60PointsReader == false) {
            // We can only bulk merge when all to-be-merged segments use our format:
            super.merge(mergeState);
            return;
        }
    }
    for (PointsReader reader : mergeState.pointsReaders) {
        if (reader != null) {
            reader.checkIntegrity();
        }
    }
    for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
        if (fieldInfo.getPointDimensionCount() != 0) {
            if (fieldInfo.getPointDimensionCount() == 1) {
                boolean singleValuePerDoc = true;
                // Worst case total maximum size (if none of the points are deleted):
                long totMaxSize = 0;
                for (int i = 0; i < mergeState.pointsReaders.length; i++) {
                    PointsReader reader = mergeState.pointsReaders[i];
                    if (reader != null) {
                        FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
                        FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
                        if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                            PointValues values = reader.getValues(fieldInfo.name);
                            if (values != null) {
                                totMaxSize += values.size();
                                singleValuePerDoc &= values.size() == values.getDocCount();
                            }
                        }
                    }
                }
                // we were simply reindexing them:
                try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), maxPointsInLeafNode, maxMBSortInHeap, totMaxSize, singleValuePerDoc)) {
                    List<BKDReader> bkdReaders = new ArrayList<>();
                    List<MergeState.DocMap> docMaps = new ArrayList<>();
                    for (int i = 0; i < mergeState.pointsReaders.length; i++) {
                        PointsReader reader = mergeState.pointsReaders[i];
                        if (reader != null) {
                            // we confirmed this up above
                            assert reader instanceof Lucene60PointsReader;
                            Lucene60PointsReader reader60 = (Lucene60PointsReader) reader;
                            // NOTE: we cannot just use the merged fieldInfo.number (instead of resolving to this
                            // reader's FieldInfo as we do below) because field numbers can easily be different
                            // when addIndexes(Directory...) copies over segments from another index:
                            FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
                            FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
                            if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                                BKDReader bkdReader = reader60.readers.get(readerFieldInfo.number);
                                if (bkdReader != null) {
                                    bkdReaders.add(bkdReader);
                                    docMaps.add(mergeState.docMaps[i]);
                                }
                            }
                        }
                    }
                    long fp = writer.merge(dataOut, docMaps, bkdReaders);
                    if (fp != -1) {
                        indexFPs.put(fieldInfo.name, fp);
                    }
                }
            } else {
                mergeOneField(mergeState, fieldInfo);
            }
        }
    }
    finish();
}
Also used : ArrayList(java.util.ArrayList) FieldInfos(org.apache.lucene.index.FieldInfos) BKDReader(org.apache.lucene.util.bkd.BKDReader) MutablePointValues(org.apache.lucene.codecs.MutablePointValues) PointValues(org.apache.lucene.index.PointValues) PointsReader(org.apache.lucene.codecs.PointsReader) BKDWriter(org.apache.lucene.util.bkd.BKDWriter) FieldInfo(org.apache.lucene.index.FieldInfo)

Aggregations

FieldInfo (org.apache.lucene.index.FieldInfo)53 BytesRef (org.apache.lucene.util.BytesRef)13 LeafReader (org.apache.lucene.index.LeafReader)12 ArrayList (java.util.ArrayList)10 Terms (org.apache.lucene.index.Terms)9 TermsEnum (org.apache.lucene.index.TermsEnum)9 IOException (java.io.IOException)8 FieldInfos (org.apache.lucene.index.FieldInfos)8 HashMap (java.util.HashMap)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 DocValuesType (org.apache.lucene.index.DocValuesType)6 PointValues (org.apache.lucene.index.PointValues)6 IndexOutput (org.apache.lucene.store.IndexOutput)6 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)5 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)5 StoredFieldVisitor (org.apache.lucene.index.StoredFieldVisitor)5 Map (java.util.Map)4 Document (org.apache.lucene.document.Document)4 EmptyDocValuesProducer (org.apache.lucene.index.EmptyDocValuesProducer)4 IndexReader (org.apache.lucene.index.IndexReader)4