Examples with BinaryDocValues - org.apache.lucene.index.BinaryDocValues

Example 16 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project elasticsearch by elastic.

the class BytesRefFieldComparatorSource method newComparator.

@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
    assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());
    final boolean sortMissingLast = sortMissingLast(missingValue) ^ reversed;
    final BytesRef missingBytes = (BytesRef) missingObject(missingValue, reversed);
    if (indexFieldData instanceof IndexOrdinalsFieldData) {
        return new FieldComparator.TermOrdValComparator(numHits, null, sortMissingLast) {

            @Override
            protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) throws IOException {
                final RandomAccessOrds values = ((IndexOrdinalsFieldData) indexFieldData).load(context).getOrdinalsValues();
                final SortedDocValues selectedValues;
                if (nested == null) {
                    selectedValues = sortMode.select(values);
                } else {
                    final BitSet rootDocs = nested.rootDocs(context);
                    final DocIdSetIterator innerDocs = nested.innerDocs(context);
                    selectedValues = sortMode.select(values, rootDocs, innerDocs);
                }
                if (sortMissingFirst(missingValue) || sortMissingLast(missingValue)) {
                    return selectedValues;
                } else {
                    return new ReplaceMissing(selectedValues, missingBytes);
                }
            }

            @Override
            public void setScorer(Scorer scorer) {
                BytesRefFieldComparatorSource.this.setScorer(scorer);
            }
        };
    }
    final BytesRef nullPlaceHolder = new BytesRef();
    final BytesRef nonNullMissingBytes = missingBytes == null ? nullPlaceHolder : missingBytes;
    return new FieldComparator.TermValComparator(numHits, null, sortMissingLast) {

        @Override
        protected BinaryDocValues getBinaryDocValues(LeafReaderContext context, String field) throws IOException {
            final SortedBinaryDocValues values = getValues(context);
            final BinaryDocValues selectedValues;
            if (nested == null) {
                selectedValues = sortMode.select(values, nonNullMissingBytes);
            } else {
                final BitSet rootDocs = nested.rootDocs(context);
                final DocIdSetIterator innerDocs = nested.innerDocs(context);
                selectedValues = sortMode.select(values, nonNullMissingBytes, rootDocs, innerDocs, context.reader().maxDoc());
            }
            return selectedValues;
        }

        @Override
        protected Bits getDocsWithField(LeafReaderContext context, String field) throws IOException {
            return new Bits.MatchAllBits(context.reader().maxDoc());
        }

        @Override
        protected boolean isNull(int doc, BytesRef term) {
            return term == nullPlaceHolder;
        }

        @Override
        public void setScorer(Scorer scorer) {
            BytesRefFieldComparatorSource.this.setScorer(scorer);
        }
    };
}

Also used : RandomAccessOrds(org.apache.lucene.index.RandomAccessOrds) BitSet(org.apache.lucene.util.BitSet) Scorer(org.apache.lucene.search.Scorer) SortedDocValues(org.apache.lucene.index.SortedDocValues) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues) IndexOrdinalsFieldData(org.elasticsearch.index.fielddata.IndexOrdinalsFieldData) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) BytesRef(org.apache.lucene.util.BytesRef)

Example 17 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestFieldCache method test.

public void test() throws IOException {
    FieldCache cache = FieldCache.DEFAULT;
    NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, doubles.nextDoc());
        assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.longValue());
    }
    NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, longs.nextDoc());
        assertEquals(Long.MAX_VALUE - i, longs.longValue());
    }
    NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, ints.nextDoc());
        assertEquals(Integer.MAX_VALUE - i, ints.longValue());
    }
    NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, floats.nextDoc());
        assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.longValue());
    }
    Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
    assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertTrue(docsWithField.get(i));
    }
    docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
    assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertEquals(i % 2 == 0, docsWithField.get(i));
    }
    // getTermsIndex
    SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        final String s;
        if (i > termsIndex.docID()) {
            termsIndex.advance(i);
        }
        if (i == termsIndex.docID()) {
            s = termsIndex.binaryValue().utf8ToString();
        } else {
            s = null;
        }
        assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
    }
    int nTerms = termsIndex.getValueCount();
    TermsEnum tenum = termsIndex.termsEnum();
    for (int i = 0; i < nTerms; i++) {
        BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
        final BytesRef val = termsIndex.lookupOrd(i);
        // System.out.println("i="+i);
        assertEquals(val, val1);
    }
    // seek the enum around (note this isn't a great test here)
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
        int k = random().nextInt(nTerms);
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }
    for (int i = 0; i < nTerms; i++) {
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }
    // test bad field
    termsIndex = cache.getTermsIndex(reader, "bogusfield");
    // getTerms
    BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        if (terms.docID() < i) {
            terms.nextDoc();
        }
        if (terms.docID() == i) {
            assertEquals(unicodeStrings[i], terms.binaryValue().utf8ToString());
        } else {
            assertNull(unicodeStrings[i]);
        }
    }
    // test bad field
    terms = cache.getTerms(reader, "bogusfield");
    // getDocTermOrds
    SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    int numEntries = cache.getCacheEntries().length;
    // ask for it again, and check that we didnt create any additional entries:
    termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    assertEquals(numEntries, cache.getCacheEntries().length);
    for (int i = 0; i < NUM_DOCS; i++) {
        // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
        List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
        for (BytesRef v : values) {
            if (v == null) {
                // why does this test use null values... instead of an empty list: confusing
                break;
            }
            if (i > termOrds.docID()) {
                assertEquals(i, termOrds.nextDoc());
            }
            long ord = termOrds.nextOrd();
            assert ord != SortedSetDocValues.NO_MORE_ORDS;
            BytesRef scratch = termOrds.lookupOrd(ord);
            assertEquals(v, scratch);
        }
        if (i == termOrds.docID()) {
            assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
        }
    }
    // test bad field
    termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
    assertTrue(termOrds.getValueCount() == 0);
    FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheHelper().getKey());
}

Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) ArrayList(java.util.ArrayList) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) SortedDocValues(org.apache.lucene.index.SortedDocValues) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) TermsEnum(org.apache.lucene.index.TermsEnum) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Bits(org.apache.lucene.util.Bits) BytesRef(org.apache.lucene.util.BytesRef)

Example 18 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestFieldCache method testNonexistantFields.

public void testNonexistantFields() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);
    final FieldCache cache = FieldCache.DEFAULT;
    cache.purgeAllCaches();
    assertEquals(0, cache.getCacheEntries().length);
    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, floats.nextDoc());
    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, doubles.nextDoc());
    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
    assertEquals(NO_MORE_DOCS, binaries.nextDoc());
    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(NO_MORE_DOCS, sorted.nextDoc());
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
    assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
    assertFalse(bits.get(0));
    // check that we cached nothing
    assertEquals(0, cache.getCacheEntries().length);
    ir.close();
    dir.close();
}

Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DirectoryReader(org.apache.lucene.index.DirectoryReader) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) Directory(org.apache.lucene.store.Directory)

Example 19 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestFieldCache method testNonIndexedFields.

public void testNonIndexedFields() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new StoredField("bogusbytes", "bogus"));
    doc.add(new StoredField("bogusshorts", "bogus"));
    doc.add(new StoredField("bogusints", "bogus"));
    doc.add(new StoredField("boguslongs", "bogus"));
    doc.add(new StoredField("bogusfloats", "bogus"));
    doc.add(new StoredField("bogusdoubles", "bogus"));
    doc.add(new StoredField("bogusterms", "bogus"));
    doc.add(new StoredField("bogustermsindex", "bogus"));
    doc.add(new StoredField("bogusmultivalued", "bogus"));
    doc.add(new StoredField("bogusbits", "bogus"));
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);
    final FieldCache cache = FieldCache.DEFAULT;
    cache.purgeAllCaches();
    assertEquals(0, cache.getCacheEntries().length);
    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, floats.nextDoc());
    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, doubles.nextDoc());
    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
    assertEquals(NO_MORE_DOCS, binaries.nextDoc());
    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(NO_MORE_DOCS, sorted.nextDoc());
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
    assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
    assertFalse(bits.get(0));
    // check that we cached nothing
    assertEquals(0, cache.getCacheEntries().length);
    ir.close();
    dir.close();
}

Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) StoredField(org.apache.lucene.document.StoredField) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DirectoryReader(org.apache.lucene.index.DirectoryReader) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) Directory(org.apache.lucene.store.Directory)

Example 20 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class SimpleTextDocValuesWriter method doAddBinaryField.

private void doAddBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    int maxLength = 0;
    BinaryDocValues values = valuesProducer.getBinary(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        maxLength = Math.max(maxLength, values.binaryValue().length);
    }
    writeFieldEntry(field, DocValuesType.BINARY);
    // write maxLength
    SimpleTextUtil.write(data, MAXLENGTH);
    SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
    SimpleTextUtil.writeNewline(data);
    int maxBytesLength = Long.toString(maxLength).length();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < maxBytesLength; i++) {
        sb.append('0');
    }
    // write our pattern for encoding lengths
    SimpleTextUtil.write(data, PATTERN);
    SimpleTextUtil.write(data, sb.toString(), scratch);
    SimpleTextUtil.writeNewline(data);
    final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
    values = valuesProducer.getBinary(field);
    int numDocsWritten = 0;
    for (int i = 0; i < numDocs; ++i) {
        if (values.docID() < i) {
            values.nextDoc();
            assert values.docID() >= i;
        }
        // write length
        final int length = values.docID() != i ? 0 : values.binaryValue().length;
        SimpleTextUtil.write(data, LENGTH);
        SimpleTextUtil.write(data, encoder.format(length), scratch);
        SimpleTextUtil.writeNewline(data);
        // because it escapes:
        if (values.docID() == i) {
            BytesRef value = values.binaryValue();
            data.writeBytes(value.bytes, value.offset, value.length);
        }
        // pad to fit
        for (int j = length; j < maxLength; j++) {
            data.writeByte((byte) ' ');
        }
        SimpleTextUtil.writeNewline(data);
        if (values.docID() != i) {
            SimpleTextUtil.write(data, "F", scratch);
        } else {
            SimpleTextUtil.write(data, "T", scratch);
        }
        SimpleTextUtil.writeNewline(data);
        numDocsWritten++;
    }
    assert numDocs == numDocsWritten;
}

Also used : DecimalFormatSymbols(java.text.DecimalFormatSymbols) DecimalFormat(java.text.DecimalFormat) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

BinaryDocValues (org.apache.lucene.index.BinaryDocValues)37 BytesRef (org.apache.lucene.util.BytesRef)29 Document (org.apache.lucene.document.Document)13 LeafReader (org.apache.lucene.index.LeafReader)12 SortedDocValues (org.apache.lucene.index.SortedDocValues)12 NumericDocValues (org.apache.lucene.index.NumericDocValues)11 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)11 Directory (org.apache.lucene.store.Directory)10 ArrayList (java.util.ArrayList)9 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)9 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)9 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)7 DirectoryReader (org.apache.lucene.index.DirectoryReader)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)6 Bits (org.apache.lucene.util.Bits)6 IOException (java.io.IOException)5 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)5 IndexReader (org.apache.lucene.index.IndexReader)5 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)5