Search in sources :

Example 26 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestFieldCacheVsDocValues method testHugeBinaryValueLimit.

// TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
public void testHugeBinaryValueLimit() throws Exception {
    // We only test DVFormats that have a limit
    assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
    Analyzer analyzer = new MockAnalyzer(random());
    // FSDirectory because SimpleText will consume gobbs of
    // space when storing big binary values:
    Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
    boolean doFixed = random().nextBoolean();
    int numDocs;
    int fixedLength = 0;
    if (doFixed) {
        // Sometimes make all values fixed length since some
        // codecs have different code paths for this:
        numDocs = TestUtil.nextInt(random(), 10, 20);
        fixedLength = LARGE_BINARY_FIELD_LENGTH;
    } else {
        numDocs = TestUtil.nextInt(random(), 100, 200);
    }
    IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
    List<byte[]> docBytes = new ArrayList<>();
    long totalBytes = 0;
    for (int docID = 0; docID < numDocs; docID++) {
        // we don't use RandomIndexWriter because it might add
        // more docvalues than we expect !!!!
        // Must be > 64KB in size to ensure more than 2 pages in
        // PagedBytes would be needed:
        int numBytes;
        if (doFixed) {
            numBytes = fixedLength;
        } else if (docID == 0 || random().nextInt(5) == 3) {
            numBytes = LARGE_BINARY_FIELD_LENGTH;
        } else {
            numBytes = TestUtil.nextInt(random(), 1, LARGE_BINARY_FIELD_LENGTH);
        }
        totalBytes += numBytes;
        if (totalBytes > 5 * 1024 * 1024) {
            break;
        }
        byte[] bytes = new byte[numBytes];
        random().nextBytes(bytes);
        docBytes.add(bytes);
        Document doc = new Document();
        BytesRef b = new BytesRef(bytes);
        b.length = bytes.length;
        doc.add(new BinaryDocValuesField("field", b));
        doc.add(new StringField("id", "" + docID, Field.Store.YES));
        w.addDocument(doc);
    }
    DirectoryReader r = DirectoryReader.open(w);
    w.close();
    LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(ar);
    BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
    for (int docID = 0; docID < docBytes.size(); docID++) {
        assertEquals(docID, s.nextDoc());
        Document doc = ar.document(docID);
        BytesRef bytes = s.binaryValue();
        byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
        assertEquals(expected.length, bytes.length);
        assertEquals(new BytesRef(expected), bytes);
    }
    ar.close();
    d.close();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) StringField(org.apache.lucene.document.StringField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 27 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestFieldCacheVsDocValues method testHugeBinaryValues.

// LUCENE-4853
public void testHugeBinaryValues() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    // FSDirectory because SimpleText will consume gobbs of
    // space when storing big binary values:
    Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
    boolean doFixed = random().nextBoolean();
    int numDocs;
    int fixedLength = 0;
    if (doFixed) {
        // Sometimes make all values fixed length since some
        // codecs have different code paths for this:
        numDocs = TestUtil.nextInt(random(), 10, 20);
        fixedLength = TestUtil.nextInt(random(), 65537, 256 * 1024);
    } else {
        numDocs = TestUtil.nextInt(random(), 100, 200);
    }
    IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
    List<byte[]> docBytes = new ArrayList<>();
    long totalBytes = 0;
    for (int docID = 0; docID < numDocs; docID++) {
        // we don't use RandomIndexWriter because it might add
        // more docvalues than we expect !!!!
        // Must be > 64KB in size to ensure more than 2 pages in
        // PagedBytes would be needed:
        int numBytes;
        if (doFixed) {
            numBytes = fixedLength;
        } else if (docID == 0 || random().nextInt(5) == 3) {
            numBytes = TestUtil.nextInt(random(), 65537, 3 * 1024 * 1024);
        } else {
            numBytes = TestUtil.nextInt(random(), 1, 1024 * 1024);
        }
        totalBytes += numBytes;
        if (totalBytes > 5 * 1024 * 1024) {
            break;
        }
        byte[] bytes = new byte[numBytes];
        random().nextBytes(bytes);
        docBytes.add(bytes);
        Document doc = new Document();
        BytesRef b = new BytesRef(bytes);
        b.length = bytes.length;
        doc.add(new BinaryDocValuesField("field", b));
        doc.add(new StringField("id", "" + docID, Field.Store.YES));
        try {
            w.addDocument(doc);
        } catch (IllegalArgumentException iae) {
            if (iae.getMessage().indexOf("is too large") == -1) {
                throw iae;
            } else {
                // OK: some codecs can't handle binary DV > 32K
                assertFalse(codecAcceptsHugeBinaryValues("field"));
                w.rollback();
                d.close();
                return;
            }
        }
    }
    DirectoryReader r;
    try {
        r = DirectoryReader.open(w);
    } catch (IllegalArgumentException iae) {
        if (iae.getMessage().indexOf("is too large") == -1) {
            throw iae;
        } else {
            assertFalse(codecAcceptsHugeBinaryValues("field"));
            // OK: some codecs can't handle binary DV > 32K
            w.rollback();
            d.close();
            return;
        }
    }
    w.close();
    LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(ar);
    BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
    for (int docID = 0; docID < docBytes.size(); docID++) {
        Document doc = ar.document(docID);
        assertEquals(docID, s.nextDoc());
        BytesRef bytes = s.binaryValue();
        byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
        assertEquals(expected.length, bytes.length);
        assertEquals(new BytesRef(expected), bytes);
    }
    assertTrue(codecAcceptsHugeBinaryValues("field"));
    ar.close();
    d.close();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) StringField(org.apache.lucene.document.StringField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 28 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class DocValuesOrdinalsReader method getReader.

@Override
public OrdinalsSegmentReader getReader(LeafReaderContext context) throws IOException {
    BinaryDocValues values0 = context.reader().getBinaryDocValues(field);
    if (values0 == null) {
        values0 = DocValues.emptyBinary();
    }
    final BinaryDocValues values = values0;
    return new OrdinalsSegmentReader() {

        private int lastDocID;

        @Override
        public void get(int docID, IntsRef ordinals) throws IOException {
            if (docID < lastDocID) {
                throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " vs docID=" + docID);
            }
            lastDocID = docID;
            if (docID > values.docID()) {
                values.advance(docID);
            }
            final BytesRef bytes;
            if (values.docID() == docID) {
                bytes = values.binaryValue();
            } else {
                bytes = new BytesRef(BytesRef.EMPTY_BYTES);
            }
            decode(bytes, ordinals);
        }
    };
}
Also used : IntsRef(org.apache.lucene.util.IntsRef) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) BytesRef(org.apache.lucene.util.BytesRef)

Example 29 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class FastTaxonomyFacetCounts method count.

private final void count(List<MatchingDocs> matchingDocs) throws IOException {
    for (MatchingDocs hits : matchingDocs) {
        BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName);
        if (dv == null) {
            // this reader does not have DocValues for the requested category list
            continue;
        }
        DocIdSetIterator it = ConjunctionDISI.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));
        for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
            final BytesRef bytesRef = dv.binaryValue();
            byte[] bytes = bytesRef.bytes;
            int end = bytesRef.offset + bytesRef.length;
            int ord = 0;
            int offset = bytesRef.offset;
            int prev = 0;
            while (offset < end) {
                byte b = bytes[offset++];
                if (b >= 0) {
                    prev = ord = ((ord << 7) | b) + prev;
                    ++values[ord];
                    ord = 0;
                } else {
                    ord = (ord << 7) | (b & 0x7F);
                }
            }
        }
    }
    rollup();
}
Also used : MatchingDocs(org.apache.lucene.facet.FacetsCollector.MatchingDocs) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) BytesRef(org.apache.lucene.util.BytesRef)

Example 30 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TaxonomyFacetSumFloatAssociations method sumValues.

private final void sumValues(List<MatchingDocs> matchingDocs) throws IOException {
    //System.out.println("count matchingDocs=" + matchingDocs + " facetsField=" + facetsFieldName);
    for (MatchingDocs hits : matchingDocs) {
        BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName);
        if (dv == null) {
            // this reader does not have DocValues for the requested category list
            continue;
        }
        DocIdSetIterator docs = hits.bits.iterator();
        int doc;
        while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            // BytesRef getAssociation()?
            if (dv.docID() < doc) {
                dv.advance(doc);
            }
            if (dv.docID() == doc) {
                final BytesRef bytesRef = dv.binaryValue();
                byte[] bytes = bytesRef.bytes;
                int end = bytesRef.offset + bytesRef.length;
                int offset = bytesRef.offset;
                while (offset < end) {
                    int ord = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
                    offset += 4;
                    int value = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
                    offset += 4;
                    values[ord] += Float.intBitsToFloat(value);
                }
            }
        }
    }
}
Also used : MatchingDocs(org.apache.lucene.facet.FacetsCollector.MatchingDocs) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

BinaryDocValues (org.apache.lucene.index.BinaryDocValues)37 BytesRef (org.apache.lucene.util.BytesRef)29 Document (org.apache.lucene.document.Document)13 LeafReader (org.apache.lucene.index.LeafReader)12 SortedDocValues (org.apache.lucene.index.SortedDocValues)12 NumericDocValues (org.apache.lucene.index.NumericDocValues)11 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)11 Directory (org.apache.lucene.store.Directory)10 ArrayList (java.util.ArrayList)9 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)9 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)9 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)7 DirectoryReader (org.apache.lucene.index.DirectoryReader)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)6 Bits (org.apache.lucene.util.Bits)6 IOException (java.io.IOException)5 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)5 IndexReader (org.apache.lucene.index.IndexReader)5 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)5