Search in sources :

Example 41 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class TestFieldCache method testNonexistantFields.

public void testNonexistantFields() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);
    final FieldCache cache = FieldCache.DEFAULT;
    cache.purgeAllCaches();
    assertEquals(0, cache.getCacheEntries().length);
    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, floats.nextDoc());
    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, doubles.nextDoc());
    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
    assertEquals(NO_MORE_DOCS, binaries.nextDoc());
    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(NO_MORE_DOCS, sorted.nextDoc());
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
    assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
    assertFalse(bits.get(0));
    // check that we cached nothing
    assertEquals(0, cache.getCacheEntries().length);
    ir.close();
    dir.close();
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DirectoryReader(org.apache.lucene.index.DirectoryReader) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) Directory(org.apache.lucene.store.Directory)

Example 42 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class TestFieldCache method testNonIndexedFields.

public void testNonIndexedFields() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new StoredField("bogusbytes", "bogus"));
    doc.add(new StoredField("bogusshorts", "bogus"));
    doc.add(new StoredField("bogusints", "bogus"));
    doc.add(new StoredField("boguslongs", "bogus"));
    doc.add(new StoredField("bogusfloats", "bogus"));
    doc.add(new StoredField("bogusdoubles", "bogus"));
    doc.add(new StoredField("bogusterms", "bogus"));
    doc.add(new StoredField("bogustermsindex", "bogus"));
    doc.add(new StoredField("bogusmultivalued", "bogus"));
    doc.add(new StoredField("bogusbits", "bogus"));
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);
    final FieldCache cache = FieldCache.DEFAULT;
    cache.purgeAllCaches();
    assertEquals(0, cache.getCacheEntries().length);
    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, floats.nextDoc());
    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, doubles.nextDoc());
    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
    assertEquals(NO_MORE_DOCS, binaries.nextDoc());
    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(NO_MORE_DOCS, sorted.nextDoc());
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
    assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
    assertFalse(bits.get(0));
    // check that we cached nothing
    assertEquals(0, cache.getCacheEntries().length);
    ir.close();
    dir.close();
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) StoredField(org.apache.lucene.document.StoredField) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DirectoryReader(org.apache.lucene.index.DirectoryReader) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) Directory(org.apache.lucene.store.Directory)

Example 43 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class UninvertDocValuesMergePolicyTest method testIndexAndAddDocValues.

public void testIndexAndAddDocValues() throws Exception {
    Random rand = random();
    for (int i = 0; i < 100; i++) {
        assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
        if (rand.nextBoolean()) {
            assertU(commit());
        }
    }
    assertU(commit());
    // Assert everything has been indexed and there are no docvalues
    withNewRawReader(h, topReader -> {
        assertEquals(100, topReader.numDocs());
        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        assertEquals(DocValuesType.NONE, infos.fieldInfo(TEST_FIELD).getDocValuesType());
    });
    addDocValuesTo(h, TEST_FIELD);
    // Add some more documents with doc values turned on including updating some
    for (int i = 90; i < 110; i++) {
        assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
        if (rand.nextBoolean()) {
            assertU(commit());
        }
    }
    assertU(commit());
    withNewRawReader(h, topReader -> {
        assertEquals(110, topReader.numDocs());
        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
    });
    int optimizeSegments = 1;
    assertU(optimize("maxSegments", String.valueOf(optimizeSegments)));
    // Assert all docs have the right docvalues
    withNewRawReader(h, topReader -> {
        assertEquals(110, topReader.numDocs());
        assertEquals(optimizeSegments, topReader.leaves().size());
        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
        for (LeafReaderContext ctx : topReader.leaves()) {
            LeafReader r = ctx.reader();
            SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
            for (int i = 0; i < r.numDocs(); ++i) {
                Document doc = r.document(i);
                String v = doc.getField(TEST_FIELD).stringValue();
                String id = doc.getField(ID_FIELD).stringValue();
                assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
                assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
                assertEquals(v, id);
                docvalues.nextDoc();
                assertEquals(v, docvalues.binaryValue().utf8ToString());
            }
        }
    });
}
Also used : FieldInfos(org.apache.lucene.index.FieldInfos) Random(java.util.Random) LeafReader(org.apache.lucene.index.LeafReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Document(org.apache.lucene.document.Document) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 44 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class LegacyDocValuesIterables method sortedOrdIterable.

/** Converts {@link SortedDocValues} into the ord for each document as an {@code Iterable&lt;Number&gt;}.
   *
   * @deprecated Consume {@link SortedDocValues} instead. */
@Deprecated
public static Iterable<Number> sortedOrdIterable(final DocValuesProducer valuesProducer, FieldInfo fieldInfo, int maxDoc) {
    return new Iterable<Number>() {

        @Override
        public Iterator<Number> iterator() {
            final SortedDocValues values;
            try {
                values = valuesProducer.getSorted(fieldInfo);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
            return new Iterator<Number>() {

                private int nextDocID;

                @Override
                public boolean hasNext() {
                    return nextDocID < maxDoc;
                }

                @Override
                public Number next() {
                    try {
                        if (nextDocID > values.docID()) {
                            values.nextDoc();
                        }
                        int result;
                        if (nextDocID == values.docID()) {
                            result = values.ordValue();
                        } else {
                            result = -1;
                        }
                        nextDocID++;
                        return result;
                    } catch (IOException ioe) {
                        throw new RuntimeException(ioe);
                    }
                }
            };
        }
    };
}
Also used : Iterator(java.util.Iterator) IOException(java.io.IOException) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 45 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class DocValuesConsumer method mergeSortedField.

/**
   * Merges the sorted docvalues from <code>toMerge</code>.
   * <p>
   * The default implementation calls {@link #addSortedField}, passing
   * an Iterable that merges ordinals and values and filters deleted documents .
   */
public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState) throws IOException {
    List<SortedDocValues> toMerge = new ArrayList<>();
    for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
        SortedDocValues values = null;
        DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
        if (docValuesProducer != null) {
            FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
            if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
                values = docValuesProducer.getSorted(fieldInfo);
            }
        }
        if (values == null) {
            values = DocValues.emptySorted();
        }
        toMerge.add(values);
    }
    final int numReaders = toMerge.size();
    final SortedDocValues[] dvs = toMerge.toArray(new SortedDocValues[numReaders]);
    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum[] liveTerms = new TermsEnum[dvs.length];
    long[] weights = new long[liveTerms.length];
    for (int sub = 0; sub < numReaders; sub++) {
        SortedDocValues dv = dvs[sub];
        Bits liveDocs = mergeState.liveDocs[sub];
        if (liveDocs == null) {
            liveTerms[sub] = dv.termsEnum();
            weights[sub] = dv.getValueCount();
        } else {
            LongBitSet bitset = new LongBitSet(dv.getValueCount());
            int docID;
            while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
                if (liveDocs.get(docID)) {
                    int ord = dv.ordValue();
                    if (ord >= 0) {
                        bitset.set(ord);
                    }
                }
            }
            liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
            weights[sub] = bitset.cardinality();
        }
    }
    // step 2: create ordinal map (this conceptually does the "merging")
    final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
    // step 3: add field
    addSortedField(fieldInfo, new EmptyDocValuesProducer() {

        @Override
        public SortedDocValues getSorted(FieldInfo fieldInfoIn) throws IOException {
            if (fieldInfoIn != fieldInfo) {
                throw new IllegalArgumentException("wrong FieldInfo");
            }
            // We must make new iterators + DocIDMerger for each iterator:
            List<SortedDocValuesSub> subs = new ArrayList<>();
            long cost = 0;
            for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
                SortedDocValues values = null;
                DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
                if (docValuesProducer != null) {
                    FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
                    if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
                        values = docValuesProducer.getSorted(readerFieldInfo);
                    }
                }
                if (values == null) {
                    values = DocValues.emptySorted();
                }
                cost += values.cost();
                subs.add(new SortedDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
            }
            final long finalCost = cost;
            final DocIDMerger<SortedDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
            return new SortedDocValues() {

                private int docID = -1;

                private int ord;

                @Override
                public int docID() {
                    return docID;
                }

                @Override
                public int nextDoc() throws IOException {
                    SortedDocValuesSub sub = docIDMerger.next();
                    if (sub == null) {
                        return docID = NO_MORE_DOCS;
                    }
                    int subOrd = sub.values.ordValue();
                    assert subOrd != -1;
                    ord = (int) sub.map.get(subOrd);
                    docID = sub.mappedDocID;
                    return docID;
                }

                @Override
                public int ordValue() {
                    return ord;
                }

                @Override
                public int advance(int target) {
                    throw new UnsupportedOperationException();
                }

                @Override
                public boolean advanceExact(int target) throws IOException {
                    throw new UnsupportedOperationException();
                }

                @Override
                public long cost() {
                    return finalCost;
                }

                @Override
                public int getValueCount() {
                    return (int) map.getValueCount();
                }

                @Override
                public BytesRef lookupOrd(int ord) throws IOException {
                    int segmentNumber = map.getFirstSegmentNumber(ord);
                    int segmentOrd = (int) map.getFirstSegmentOrd(ord);
                    return dvs[segmentNumber].lookupOrd(segmentOrd);
                }
            };
        }
    });
}
Also used : ArrayList(java.util.ArrayList) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) LongBitSet(org.apache.lucene.util.LongBitSet) IOException(java.io.IOException) SortedDocValues(org.apache.lucene.index.SortedDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) TermsEnum(org.apache.lucene.index.TermsEnum) FilteredTermsEnum(org.apache.lucene.index.FilteredTermsEnum) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) DocIDMerger(org.apache.lucene.index.DocIDMerger) Bits(org.apache.lucene.util.Bits) ArrayList(java.util.ArrayList) List(java.util.List) FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

SortedDocValues (org.apache.lucene.index.SortedDocValues)66 BytesRef (org.apache.lucene.util.BytesRef)32 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)27 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)23 LeafReader (org.apache.lucene.index.LeafReader)22 Document (org.apache.lucene.document.Document)21 NumericDocValues (org.apache.lucene.index.NumericDocValues)15 Directory (org.apache.lucene.store.Directory)15 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)14 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)13 IOException (java.io.IOException)12 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)12 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)10 MultiDocValues (org.apache.lucene.index.MultiDocValues)10 ArrayList (java.util.ArrayList)9 IndexReader (org.apache.lucene.index.IndexReader)9 OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)9 DoublePoint (org.apache.lucene.document.DoublePoint)8 FloatPoint (org.apache.lucene.document.FloatPoint)8 IntPoint (org.apache.lucene.document.IntPoint)8