use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class TestFieldCacheVsDocValues method testHugeBinaryValueLimit.
// TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
public void testHugeBinaryValueLimit() throws Exception {
// We only test DVFormats that have a limit
assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
Analyzer analyzer = new MockAnalyzer(random());
// FSDirectory because SimpleText will consume gobbs of
// space when storing big binary values:
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
boolean doFixed = random().nextBoolean();
int numDocs;
int fixedLength = 0;
if (doFixed) {
// Sometimes make all values fixed length since some
// codecs have different code paths for this:
numDocs = TestUtil.nextInt(random(), 10, 20);
fixedLength = LARGE_BINARY_FIELD_LENGTH;
} else {
numDocs = TestUtil.nextInt(random(), 100, 200);
}
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
List<byte[]> docBytes = new ArrayList<>();
long totalBytes = 0;
for (int docID = 0; docID < numDocs; docID++) {
// we don't use RandomIndexWriter because it might add
// more docvalues than we expect !!!!
// Must be > 64KB in size to ensure more than 2 pages in
// PagedBytes would be needed:
int numBytes;
if (doFixed) {
numBytes = fixedLength;
} else if (docID == 0 || random().nextInt(5) == 3) {
numBytes = LARGE_BINARY_FIELD_LENGTH;
} else {
numBytes = TestUtil.nextInt(random(), 1, LARGE_BINARY_FIELD_LENGTH);
}
totalBytes += numBytes;
if (totalBytes > 5 * 1024 * 1024) {
break;
}
byte[] bytes = new byte[numBytes];
random().nextBytes(bytes);
docBytes.add(bytes);
Document doc = new Document();
BytesRef b = new BytesRef(bytes);
b.length = bytes.length;
doc.add(new BinaryDocValuesField("field", b));
doc.add(new StringField("id", "" + docID, Field.Store.YES));
w.addDocument(doc);
}
DirectoryReader r = DirectoryReader.open(w);
w.close();
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(ar);
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
for (int docID = 0; docID < docBytes.size(); docID++) {
assertEquals(docID, s.nextDoc());
Document doc = ar.document(docID);
BytesRef bytes = s.binaryValue();
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
assertEquals(expected.length, bytes.length);
assertEquals(new BytesRef(expected), bytes);
}
ar.close();
d.close();
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class TestFieldCacheVsDocValues method testHugeBinaryValues.
// LUCENE-4853
public void testHugeBinaryValues() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
// FSDirectory because SimpleText will consume gobbs of
// space when storing big binary values:
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
boolean doFixed = random().nextBoolean();
int numDocs;
int fixedLength = 0;
if (doFixed) {
// Sometimes make all values fixed length since some
// codecs have different code paths for this:
numDocs = TestUtil.nextInt(random(), 10, 20);
fixedLength = TestUtil.nextInt(random(), 65537, 256 * 1024);
} else {
numDocs = TestUtil.nextInt(random(), 100, 200);
}
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
List<byte[]> docBytes = new ArrayList<>();
long totalBytes = 0;
for (int docID = 0; docID < numDocs; docID++) {
// we don't use RandomIndexWriter because it might add
// more docvalues than we expect !!!!
// Must be > 64KB in size to ensure more than 2 pages in
// PagedBytes would be needed:
int numBytes;
if (doFixed) {
numBytes = fixedLength;
} else if (docID == 0 || random().nextInt(5) == 3) {
numBytes = TestUtil.nextInt(random(), 65537, 3 * 1024 * 1024);
} else {
numBytes = TestUtil.nextInt(random(), 1, 1024 * 1024);
}
totalBytes += numBytes;
if (totalBytes > 5 * 1024 * 1024) {
break;
}
byte[] bytes = new byte[numBytes];
random().nextBytes(bytes);
docBytes.add(bytes);
Document doc = new Document();
BytesRef b = new BytesRef(bytes);
b.length = bytes.length;
doc.add(new BinaryDocValuesField("field", b));
doc.add(new StringField("id", "" + docID, Field.Store.YES));
try {
w.addDocument(doc);
} catch (IllegalArgumentException iae) {
if (iae.getMessage().indexOf("is too large") == -1) {
throw iae;
} else {
// OK: some codecs can't handle binary DV > 32K
assertFalse(codecAcceptsHugeBinaryValues("field"));
w.rollback();
d.close();
return;
}
}
}
DirectoryReader r;
try {
r = DirectoryReader.open(w);
} catch (IllegalArgumentException iae) {
if (iae.getMessage().indexOf("is too large") == -1) {
throw iae;
} else {
assertFalse(codecAcceptsHugeBinaryValues("field"));
// OK: some codecs can't handle binary DV > 32K
w.rollback();
d.close();
return;
}
}
w.close();
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(ar);
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
for (int docID = 0; docID < docBytes.size(); docID++) {
Document doc = ar.document(docID);
assertEquals(docID, s.nextDoc());
BytesRef bytes = s.binaryValue();
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
assertEquals(expected.length, bytes.length);
assertEquals(new BytesRef(expected), bytes);
}
assertTrue(codecAcceptsHugeBinaryValues("field"));
ar.close();
d.close();
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class DocValuesOrdinalsReader method getReader.
@Override
public OrdinalsSegmentReader getReader(LeafReaderContext context) throws IOException {
BinaryDocValues values0 = context.reader().getBinaryDocValues(field);
if (values0 == null) {
values0 = DocValues.emptyBinary();
}
final BinaryDocValues values = values0;
return new OrdinalsSegmentReader() {
private int lastDocID;
@Override
public void get(int docID, IntsRef ordinals) throws IOException {
if (docID < lastDocID) {
throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " vs docID=" + docID);
}
lastDocID = docID;
if (docID > values.docID()) {
values.advance(docID);
}
final BytesRef bytes;
if (values.docID() == docID) {
bytes = values.binaryValue();
} else {
bytes = new BytesRef(BytesRef.EMPTY_BYTES);
}
decode(bytes, ordinals);
}
};
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class FastTaxonomyFacetCounts method count.
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
for (MatchingDocs hits : matchingDocs) {
BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName);
if (dv == null) {
// this reader does not have DocValues for the requested category list
continue;
}
DocIdSetIterator it = ConjunctionDISI.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
final BytesRef bytesRef = dv.binaryValue();
byte[] bytes = bytesRef.bytes;
int end = bytesRef.offset + bytesRef.length;
int ord = 0;
int offset = bytesRef.offset;
int prev = 0;
while (offset < end) {
byte b = bytes[offset++];
if (b >= 0) {
prev = ord = ((ord << 7) | b) + prev;
++values[ord];
ord = 0;
} else {
ord = (ord << 7) | (b & 0x7F);
}
}
}
}
rollup();
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class TaxonomyFacetSumFloatAssociations method sumValues.
private final void sumValues(List<MatchingDocs> matchingDocs) throws IOException {
//System.out.println("count matchingDocs=" + matchingDocs + " facetsField=" + facetsFieldName);
for (MatchingDocs hits : matchingDocs) {
BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName);
if (dv == null) {
// this reader does not have DocValues for the requested category list
continue;
}
DocIdSetIterator docs = hits.bits.iterator();
int doc;
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
// BytesRef getAssociation()?
if (dv.docID() < doc) {
dv.advance(doc);
}
if (dv.docID() == doc) {
final BytesRef bytesRef = dv.binaryValue();
byte[] bytes = bytesRef.bytes;
int end = bytesRef.offset + bytesRef.length;
int offset = bytesRef.offset;
while (offset < end) {
int ord = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
offset += 4;
int value = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
offset += 4;
values[ord] += Float.intBitsToFloat(value);
}
}
}
}
}
Aggregations