use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class FuzzySet method downsize.
/**
*
* @param targetMaxSaturation A number between 0 and 1 describing the % of bits that would ideally be set in the
* result. Lower values have better accuracy but require more space.
* @return a smaller FuzzySet or null if the current set is already over-saturated
*/
public FuzzySet downsize(float targetMaxSaturation) {
int numBitsSet = filter.cardinality();
FixedBitSet rightSizedBitSet = filter;
int rightSizedBitSetSize = bloomSize;
//Hopefully find a smaller size bitset into which we can project accumulated values while maintaining desired saturation level
for (int i = 0; i < usableBitSetSizes.length; i++) {
int candidateBitsetSize = usableBitSetSizes[i];
float candidateSaturation = (float) numBitsSet / (float) candidateBitsetSize;
if (candidateSaturation <= targetMaxSaturation) {
rightSizedBitSetSize = candidateBitsetSize;
break;
}
}
// Re-project the numbers to a smaller space if necessary
if (rightSizedBitSetSize < bloomSize) {
// Reset the choice of bitset to the smaller version
rightSizedBitSet = new FixedBitSet(rightSizedBitSetSize + 1);
// Map across the bits from the large set to the smaller one
int bitIndex = 0;
do {
bitIndex = filter.nextSetBit(bitIndex);
if (bitIndex != DocIdSetIterator.NO_MORE_DOCS) {
// Project the larger number into a smaller one effectively
// modulo-ing by using the target bitset size as a mask
int downSizedBitIndex = bitIndex & rightSizedBitSetSize;
rightSizedBitSet.set(downSizedBitIndex);
bitIndex++;
}
} while ((bitIndex >= 0) && (bitIndex <= bloomSize));
} else {
return null;
}
return new FuzzySet(rightSizedBitSet, rightSizedBitSetSize, hashFunction);
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class FuzzySet method deserialize.
public static FuzzySet deserialize(DataInput in) throws IOException {
int version = in.readInt();
if (version == VERSION_SPI) {
in.readString();
}
final HashFunction hashFunction = hashFunctionForVersion(version);
int bloomSize = in.readInt();
int numLongs = in.readInt();
long[] longs = new long[numLongs];
for (int i = 0; i < numLongs; i++) {
longs[i] = in.readLong();
}
FixedBitSet bits = new FixedBitSet(longs, bloomSize + 1);
return new FuzzySet(bits, bloomSize, hashFunction);
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class DirectDocValuesProducer method getMissingBits.
private Bits getMissingBits(FieldInfo field, final long offset, final long length) throws IOException {
if (offset == -1) {
return new Bits.MatchAllBits(maxDoc);
} else {
FixedBitSet instance;
synchronized (this) {
instance = docsWithFieldInstances.get(field.name);
if (instance == null) {
IndexInput data = this.data.clone();
data.seek(offset);
assert length % 8 == 0;
long[] bits = new long[(int) length >> 3];
for (int i = 0; i < bits.length; i++) {
bits[i] = data.readLong();
}
instance = new FixedBitSet(bits, maxDoc);
if (!merging) {
docsWithFieldInstances.put(field.name, instance);
ramBytesUsed.addAndGet(instance.ramBytesUsed());
}
}
}
return instance;
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class MemoryDocValuesProducer method getMissingBits.
private Bits getMissingBits(FieldInfo field, final long offset, final long length) throws IOException {
if (offset == -1) {
return new Bits.MatchAllBits(maxDoc);
} else {
FixedBitSet instance;
synchronized (this) {
instance = docsWithFieldInstances.get(field.name);
if (instance == null) {
IndexInput data = this.data.clone();
data.seek(offset);
assert length % 8 == 0;
long[] bits = new long[(int) length >> 3];
for (int i = 0; i < bits.length; i++) {
bits[i] = data.readLong();
}
instance = new FixedBitSet(bits, maxDoc);
if (!merging) {
docsWithFieldInstances.put(field.name, instance);
ramBytesUsed.addAndGet(instance.ramBytesUsed());
}
}
}
return instance;
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class FSTOrdTermsWriter method write.
@Override
public void write(Fields fields) throws IOException {
for (String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
boolean hasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
TermsEnum termsEnum = terms.iterator();
TermsWriter termsWriter = new TermsWriter(fieldInfo);
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
FixedBitSet docsSeen = new FixedBitSet(maxDoc);
while (true) {
BytesRef term = termsEnum.next();
if (term == null) {
break;
}
BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen);
if (termState != null) {
termsWriter.finishTerm(term, termState);
sumTotalTermFreq += termState.totalTermFreq;
sumDocFreq += termState.docFreq;
}
}
termsWriter.finish(hasFreq ? sumTotalTermFreq : -1, sumDocFreq, docsSeen.cardinality());
}
}
Aggregations