Examples with SortedDocValues - org.apache.lucene.index.SortedDocValues

Example 36 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.

the class BytesRefFieldComparatorSource method newComparator.

@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
    assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());
    final boolean sortMissingLast = sortMissingLast(missingValue) ^ reversed;
    final BytesRef missingBytes = (BytesRef) missingObject(missingValue, reversed);
    if (indexFieldData instanceof IndexOrdinalsFieldData) {
        return new FieldComparator.TermOrdValComparator(numHits, null, sortMissingLast) {

            @Override
            protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) throws IOException {
                final RandomAccessOrds values = ((IndexOrdinalsFieldData) indexFieldData).load(context).getOrdinalsValues();
                final SortedDocValues selectedValues;
                if (nested == null) {
                    selectedValues = sortMode.select(values);
                } else {
                    final BitSet rootDocs = nested.rootDocs(context);
                    final DocIdSetIterator innerDocs = nested.innerDocs(context);
                    selectedValues = sortMode.select(values, rootDocs, innerDocs);
                }
                if (sortMissingFirst(missingValue) || sortMissingLast(missingValue)) {
                    return selectedValues;
                } else {
                    return new ReplaceMissing(selectedValues, missingBytes);
                }
            }

            @Override
            public void setScorer(Scorer scorer) {
                BytesRefFieldComparatorSource.this.setScorer(scorer);
            }
        };
    }
    final BytesRef nullPlaceHolder = new BytesRef();
    final BytesRef nonNullMissingBytes = missingBytes == null ? nullPlaceHolder : missingBytes;
    return new FieldComparator.TermValComparator(numHits, null, sortMissingLast) {

        @Override
        protected BinaryDocValues getBinaryDocValues(LeafReaderContext context, String field) throws IOException {
            final SortedBinaryDocValues values = getValues(context);
            final BinaryDocValues selectedValues;
            if (nested == null) {
                selectedValues = sortMode.select(values, nonNullMissingBytes);
            } else {
                final BitSet rootDocs = nested.rootDocs(context);
                final DocIdSetIterator innerDocs = nested.innerDocs(context);
                selectedValues = sortMode.select(values, nonNullMissingBytes, rootDocs, innerDocs, context.reader().maxDoc());
            }
            return selectedValues;
        }

        @Override
        protected Bits getDocsWithField(LeafReaderContext context, String field) throws IOException {
            return new Bits.MatchAllBits(context.reader().maxDoc());
        }

        @Override
        protected boolean isNull(int doc, BytesRef term) {
            return term == nullPlaceHolder;
        }

        @Override
        public void setScorer(Scorer scorer) {
            BytesRefFieldComparatorSource.this.setScorer(scorer);
        }
    };
}

Also used : RandomAccessOrds(org.apache.lucene.index.RandomAccessOrds) BitSet(org.apache.lucene.util.BitSet) Scorer(org.apache.lucene.search.Scorer) SortedDocValues(org.apache.lucene.index.SortedDocValues) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues) IndexOrdinalsFieldData(org.elasticsearch.index.fielddata.IndexOrdinalsFieldData) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) BytesRef(org.apache.lucene.util.BytesRef)

Example 37 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.

the class AbstractAtomicParentChildFieldData method getBytesValues.

@Override
public final SortedBinaryDocValues getBytesValues() {
    return new SortedBinaryDocValues() {

        private final BytesRef[] terms = new BytesRef[2];

        private int count;

        @Override
        public void setDocument(int docId) {
            count = 0;
            for (String type : types()) {
                final SortedDocValues values = getOrdinalsValues(type);
                final int ord = values.getOrd(docId);
                if (ord >= 0) {
                    terms[count++] = values.lookupOrd(ord);
                }
            }
            assert count <= 2 : "A single doc can potentially be both parent and child, so the maximum allowed values is 2";
            if (count > 1) {
                int cmp = terms[0].compareTo(terms[1]);
                if (cmp > 0) {
                    ArrayUtil.swap(terms, 0, 1);
                } else if (cmp == 0) {
                    // If the id is the same between types the only omit one. For example: a doc has parent#1 in _uid field and has grand_parent#1 in _parent field.
                    count = 1;
                }
            }
        }

        @Override
        public int count() {
            return count;
        }

        @Override
        public BytesRef valueAt(int index) {
            return terms[index];
        }
    };
}

Also used : SortedDocValues(org.apache.lucene.index.SortedDocValues) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues)

Example 38 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.

the class MultiValueModeTests method testSingleValuedOrds.

public void testSingleValuedOrds() throws Exception {
    final int numDocs = scaledRandomIntBetween(1, 100);
    final int[] array = new int[numDocs];
    for (int i = 0; i < array.length; ++i) {
        if (randomBoolean()) {
            array[i] = randomInt(1000);
        } else {
            array[i] = -1;
        }
    }
    final SortedDocValues singleValues = new SortedDocValues() {

        @Override
        public int getOrd(int docID) {
            return array[docID];
        }

        @Override
        public BytesRef lookupOrd(int ord) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getValueCount() {
            return 1 << 20;
        }
    };
    final RandomAccessOrds multiValues = (RandomAccessOrds) DocValues.singleton(singleValues);
    verify(multiValues, numDocs);
    final FixedBitSet rootDocs = randomRootDocs(numDocs);
    final FixedBitSet innerDocs = randomInnerDocs(rootDocs);
    verify(multiValues, numDocs, rootDocs, innerDocs);
}

Also used : RandomAccessOrds(org.apache.lucene.index.RandomAccessOrds) FixedBitSet(org.apache.lucene.util.FixedBitSet) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 39 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.

the class MultiValueModeTests method verify.

private void verify(RandomAccessOrds values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException {
    for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX }) {
        final SortedDocValues selected = mode.select(values, rootDocs, new BitSetIterator(innerDocs, 0L));
        int prevRoot = -1;
        for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {
            final int actual = selected.getOrd(root);
            int expected = -1;
            for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1 && child < root; child = innerDocs.nextSetBit(child + 1)) {
                values.setDocument(child);
                for (int j = 0; j < values.cardinality(); ++j) {
                    if (expected == -1) {
                        expected = (int) values.ordAt(j);
                    } else {
                        if (mode == MultiValueMode.MIN) {
                            expected = Math.min(expected, (int) values.ordAt(j));
                        } else if (mode == MultiValueMode.MAX) {
                            expected = Math.max(expected, (int) values.ordAt(j));
                        }
                    }
                }
            }
            assertEquals(mode.toString() + " docId=" + root, expected, actual);
            prevRoot = root;
        }
    }
}

Also used : BitSetIterator(org.apache.lucene.util.BitSetIterator) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 40 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class TestFieldCache method test.

public void test() throws IOException {
    FieldCache cache = FieldCache.DEFAULT;
    NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, doubles.nextDoc());
        assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.longValue());
    }
    NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, longs.nextDoc());
        assertEquals(Long.MAX_VALUE - i, longs.longValue());
    }
    NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, ints.nextDoc());
        assertEquals(Integer.MAX_VALUE - i, ints.longValue());
    }
    NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, floats.nextDoc());
        assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.longValue());
    }
    Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
    assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertTrue(docsWithField.get(i));
    }
    docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
    assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertEquals(i % 2 == 0, docsWithField.get(i));
    }
    // getTermsIndex
    SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        final String s;
        if (i > termsIndex.docID()) {
            termsIndex.advance(i);
        }
        if (i == termsIndex.docID()) {
            s = termsIndex.binaryValue().utf8ToString();
        } else {
            s = null;
        }
        assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
    }
    int nTerms = termsIndex.getValueCount();
    TermsEnum tenum = termsIndex.termsEnum();
    for (int i = 0; i < nTerms; i++) {
        BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
        final BytesRef val = termsIndex.lookupOrd(i);
        // System.out.println("i="+i);
        assertEquals(val, val1);
    }
    // seek the enum around (note this isn't a great test here)
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
        int k = random().nextInt(nTerms);
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }
    for (int i = 0; i < nTerms; i++) {
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }
    // test bad field
    termsIndex = cache.getTermsIndex(reader, "bogusfield");
    // getTerms
    BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        if (terms.docID() < i) {
            terms.nextDoc();
        }
        if (terms.docID() == i) {
            assertEquals(unicodeStrings[i], terms.binaryValue().utf8ToString());
        } else {
            assertNull(unicodeStrings[i]);
        }
    }
    // test bad field
    terms = cache.getTerms(reader, "bogusfield");
    // getDocTermOrds
    SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    int numEntries = cache.getCacheEntries().length;
    // ask for it again, and check that we didnt create any additional entries:
    termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    assertEquals(numEntries, cache.getCacheEntries().length);
    for (int i = 0; i < NUM_DOCS; i++) {
        // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
        List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
        for (BytesRef v : values) {
            if (v == null) {
                // why does this test use null values... instead of an empty list: confusing
                break;
            }
            if (i > termOrds.docID()) {
                assertEquals(i, termOrds.nextDoc());
            }
            long ord = termOrds.nextOrd();
            assert ord != SortedSetDocValues.NO_MORE_ORDS;
            BytesRef scratch = termOrds.lookupOrd(ord);
            assertEquals(v, scratch);
        }
        if (i == termOrds.docID()) {
            assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
        }
    }
    // test bad field
    termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
    assertTrue(termOrds.getValueCount() == 0);
    FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheHelper().getKey());
}

Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) ArrayList(java.util.ArrayList) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) SortedDocValues(org.apache.lucene.index.SortedDocValues) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) TermsEnum(org.apache.lucene.index.TermsEnum) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Bits(org.apache.lucene.util.Bits) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

SortedDocValues (org.apache.lucene.index.SortedDocValues)66 BytesRef (org.apache.lucene.util.BytesRef)32 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)27 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)23 LeafReader (org.apache.lucene.index.LeafReader)22 Document (org.apache.lucene.document.Document)21 NumericDocValues (org.apache.lucene.index.NumericDocValues)15 Directory (org.apache.lucene.store.Directory)15 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)14 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)13 IOException (java.io.IOException)12 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)12 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)10 MultiDocValues (org.apache.lucene.index.MultiDocValues)10 ArrayList (java.util.ArrayList)9 IndexReader (org.apache.lucene.index.IndexReader)9 OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)9 DoublePoint (org.apache.lucene.document.DoublePoint)8 FloatPoint (org.apache.lucene.document.FloatPoint)8 IntPoint (org.apache.lucene.document.IntPoint)8