use of org.apache.lucene.util.LongsRef in project lucene-solr by apache.
the class CompressingTermVectorsReader method get.
@Override
public Fields get(int doc) throws IOException {
ensureOpen();
// seek to the right place
{
final long startPointer = indexReader.getStartPointer(doc);
vectorsStream.seek(startPointer);
}
// decode
// - docBase: first doc ID of the chunk
// - chunkDocs: number of docs of the chunk
final int docBase = vectorsStream.readVInt();
final int chunkDocs = vectorsStream.readVInt();
if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) {
throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc, vectorsStream);
}
// number of fields to skip
final int skip;
// number of fields of the document we're looking for
final int numFields;
// total number of fields of the chunk (sum for all docs)
final int totalFields;
if (chunkDocs == 1) {
skip = 0;
numFields = totalFields = vectorsStream.readVInt();
} else {
reader.reset(vectorsStream, chunkDocs);
int sum = 0;
for (int i = docBase; i < doc; ++i) {
sum += reader.next();
}
skip = sum;
numFields = (int) reader.next();
sum += numFields;
for (int i = doc + 1; i < docBase + chunkDocs; ++i) {
sum += reader.next();
}
totalFields = sum;
}
if (numFields == 0) {
// no vectors
return null;
}
// read field numbers that have term vectors
final int[] fieldNums;
{
final int token = vectorsStream.readByte() & 0xFF;
// means no term vectors, cannot happen since we checked for numFields == 0
assert token != 0;
final int bitsPerFieldNum = token & 0x1F;
int totalDistinctFields = token >>> 5;
if (totalDistinctFields == 0x07) {
totalDistinctFields += vectorsStream.readVInt();
}
++totalDistinctFields;
final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1);
fieldNums = new int[totalDistinctFields];
for (int i = 0; i < totalDistinctFields; ++i) {
fieldNums[i] = (int) it.next();
}
}
// read field numbers and flags
final int[] fieldNumOffs = new int[numFields];
final PackedInts.Reader flags;
{
final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1);
final PackedInts.Reader allFieldNumOffs = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
switch(vectorsStream.readVInt()) {
case 0:
final PackedInts.Reader fieldFlags = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, fieldNums.length, FLAGS_BITS);
PackedInts.Mutable f = PackedInts.getMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT);
for (int i = 0; i < totalFields; ++i) {
final int fieldNumOff = (int) allFieldNumOffs.get(i);
assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length;
final int fgs = (int) fieldFlags.get(fieldNumOff);
f.set(i, fgs);
}
flags = f;
break;
case 1:
flags = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, FLAGS_BITS);
break;
default:
throw new AssertionError();
}
for (int i = 0; i < numFields; ++i) {
fieldNumOffs[i] = (int) allFieldNumOffs.get(skip + i);
}
}
// number of terms per field for all fields
final PackedInts.Reader numTerms;
final int totalTerms;
{
final int bitsRequired = vectorsStream.readVInt();
numTerms = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsRequired);
int sum = 0;
for (int i = 0; i < totalFields; ++i) {
sum += numTerms.get(i);
}
totalTerms = sum;
}
// term lengths
int docOff = 0, docLen = 0, totalLen;
final int[] fieldLengths = new int[numFields];
final int[][] prefixLengths = new int[numFields][];
final int[][] suffixLengths = new int[numFields][];
{
reader.reset(vectorsStream, totalTerms);
// skip
int toSkip = 0;
for (int i = 0; i < skip; ++i) {
toSkip += numTerms.get(i);
}
reader.skip(toSkip);
// read prefix lengths
for (int i = 0; i < numFields; ++i) {
final int termCount = (int) numTerms.get(skip + i);
final int[] fieldPrefixLengths = new int[termCount];
prefixLengths[i] = fieldPrefixLengths;
for (int j = 0; j < termCount; ) {
final LongsRef next = reader.next(termCount - j);
for (int k = 0; k < next.length; ++k) {
fieldPrefixLengths[j++] = (int) next.longs[next.offset + k];
}
}
}
reader.skip(totalTerms - reader.ord());
reader.reset(vectorsStream, totalTerms);
// skip
toSkip = 0;
for (int i = 0; i < skip; ++i) {
for (int j = 0; j < numTerms.get(i); ++j) {
docOff += reader.next();
}
}
for (int i = 0; i < numFields; ++i) {
final int termCount = (int) numTerms.get(skip + i);
final int[] fieldSuffixLengths = new int[termCount];
suffixLengths[i] = fieldSuffixLengths;
for (int j = 0; j < termCount; ) {
final LongsRef next = reader.next(termCount - j);
for (int k = 0; k < next.length; ++k) {
fieldSuffixLengths[j++] = (int) next.longs[next.offset + k];
}
}
fieldLengths[i] = sum(suffixLengths[i]);
docLen += fieldLengths[i];
}
totalLen = docOff + docLen;
for (int i = skip + numFields; i < totalFields; ++i) {
for (int j = 0; j < numTerms.get(i); ++j) {
totalLen += reader.next();
}
}
}
// term freqs
final int[] termFreqs = new int[totalTerms];
{
reader.reset(vectorsStream, totalTerms);
for (int i = 0; i < totalTerms; ) {
final LongsRef next = reader.next(totalTerms - i);
for (int k = 0; k < next.length; ++k) {
termFreqs[i++] = 1 + (int) next.longs[next.offset + k];
}
}
}
// total number of positions, offsets and payloads
int totalPositions = 0, totalOffsets = 0, totalPayloads = 0;
for (int i = 0, termIndex = 0; i < totalFields; ++i) {
final int f = (int) flags.get(i);
final int termCount = (int) numTerms.get(i);
for (int j = 0; j < termCount; ++j) {
final int freq = termFreqs[termIndex++];
if ((f & POSITIONS) != 0) {
totalPositions += freq;
}
if ((f & OFFSETS) != 0) {
totalOffsets += freq;
}
if ((f & PAYLOADS) != 0) {
totalPayloads += freq;
}
}
assert i != totalFields - 1 || termIndex == totalTerms : termIndex + " " + totalTerms;
}
final int[][] positionIndex = positionIndex(skip, numFields, numTerms, termFreqs);
final int[][] positions, startOffsets, lengths;
if (totalPositions > 0) {
positions = readPositions(skip, numFields, flags, numTerms, termFreqs, POSITIONS, totalPositions, positionIndex);
} else {
positions = new int[numFields][];
}
if (totalOffsets > 0) {
// average number of chars per term
final float[] charsPerTerm = new float[fieldNums.length];
for (int i = 0; i < charsPerTerm.length; ++i) {
charsPerTerm[i] = Float.intBitsToFloat(vectorsStream.readInt());
}
startOffsets = readPositions(skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
lengths = readPositions(skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
for (int i = 0; i < numFields; ++i) {
final int[] fStartOffsets = startOffsets[i];
final int[] fPositions = positions[i];
// patch offsets from positions
if (fStartOffsets != null && fPositions != null) {
final float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]];
for (int j = 0; j < startOffsets[i].length; ++j) {
fStartOffsets[j] += (int) (fieldCharsPerTerm * fPositions[j]);
}
}
if (fStartOffsets != null) {
final int[] fPrefixLengths = prefixLengths[i];
final int[] fSuffixLengths = suffixLengths[i];
final int[] fLengths = lengths[i];
for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) {
// delta-decode start offsets and patch lengths using term lengths
final int termLength = fPrefixLengths[j] + fSuffixLengths[j];
lengths[i][positionIndex[i][j]] += termLength;
for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) {
fStartOffsets[k] += fStartOffsets[k - 1];
fLengths[k] += termLength;
}
}
}
}
} else {
startOffsets = lengths = new int[numFields][];
}
if (totalPositions > 0) {
// delta-decode positions
for (int i = 0; i < numFields; ++i) {
final int[] fPositions = positions[i];
final int[] fpositionIndex = positionIndex[i];
if (fPositions != null) {
for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) {
// delta-decode start offsets
for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) {
fPositions[k] += fPositions[k - 1];
}
}
}
}
}
// payload lengths
final int[][] payloadIndex = new int[numFields][];
int totalPayloadLength = 0;
int payloadOff = 0;
int payloadLen = 0;
if (totalPayloads > 0) {
reader.reset(vectorsStream, totalPayloads);
// skip
int termIndex = 0;
for (int i = 0; i < skip; ++i) {
final int f = (int) flags.get(i);
final int termCount = (int) numTerms.get(i);
if ((f & PAYLOADS) != 0) {
for (int j = 0; j < termCount; ++j) {
final int freq = termFreqs[termIndex + j];
for (int k = 0; k < freq; ++k) {
final int l = (int) reader.next();
payloadOff += l;
}
}
}
termIndex += termCount;
}
totalPayloadLength = payloadOff;
// read doc payload lengths
for (int i = 0; i < numFields; ++i) {
final int f = (int) flags.get(skip + i);
final int termCount = (int) numTerms.get(skip + i);
if ((f & PAYLOADS) != 0) {
final int totalFreq = positionIndex[i][termCount];
payloadIndex[i] = new int[totalFreq + 1];
int posIdx = 0;
payloadIndex[i][posIdx] = payloadLen;
for (int j = 0; j < termCount; ++j) {
final int freq = termFreqs[termIndex + j];
for (int k = 0; k < freq; ++k) {
final int payloadLength = (int) reader.next();
payloadLen += payloadLength;
payloadIndex[i][posIdx + 1] = payloadLen;
++posIdx;
}
}
assert posIdx == totalFreq;
}
termIndex += termCount;
}
totalPayloadLength += payloadLen;
for (int i = skip + numFields; i < totalFields; ++i) {
final int f = (int) flags.get(i);
final int termCount = (int) numTerms.get(i);
if ((f & PAYLOADS) != 0) {
for (int j = 0; j < termCount; ++j) {
final int freq = termFreqs[termIndex + j];
for (int k = 0; k < freq; ++k) {
totalPayloadLength += reader.next();
}
}
}
termIndex += termCount;
}
assert termIndex == totalTerms : termIndex + " " + totalTerms;
}
// decompress data
final BytesRef suffixBytes = new BytesRef();
decompressor.decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes);
suffixBytes.length = docLen;
final BytesRef payloadBytes = new BytesRef(suffixBytes.bytes, suffixBytes.offset + docLen, payloadLen);
final int[] fieldFlags = new int[numFields];
for (int i = 0; i < numFields; ++i) {
fieldFlags[i] = (int) flags.get(skip + i);
}
final int[] fieldNumTerms = new int[numFields];
for (int i = 0; i < numFields; ++i) {
fieldNumTerms[i] = (int) numTerms.get(skip + i);
}
final int[][] fieldTermFreqs = new int[numFields][];
{
int termIdx = 0;
for (int i = 0; i < skip; ++i) {
termIdx += numTerms.get(i);
}
for (int i = 0; i < numFields; ++i) {
final int termCount = (int) numTerms.get(skip + i);
fieldTermFreqs[i] = new int[termCount];
for (int j = 0; j < termCount; ++j) {
fieldTermFreqs[i][j] = termFreqs[termIdx++];
}
}
}
assert sum(fieldLengths) == docLen : sum(fieldLengths) + " != " + docLen;
return new TVFields(fieldNums, fieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths, prefixLengths, suffixLengths, fieldTermFreqs, positionIndex, positions, startOffsets, lengths, payloadBytes, payloadIndex, suffixBytes);
}
use of org.apache.lucene.util.LongsRef in project lucene-solr by apache.
the class CompressingTermVectorsReader method readPositions.
private int[][] readPositions(int skip, int numFields, PackedInts.Reader flags, PackedInts.Reader numTerms, int[] termFreqs, int flag, final int totalPositions, int[][] positionIndex) throws IOException {
final int[][] positions = new int[numFields][];
reader.reset(vectorsStream, totalPositions);
// skip
int toSkip = 0;
int termIndex = 0;
for (int i = 0; i < skip; ++i) {
final int f = (int) flags.get(i);
final int termCount = (int) numTerms.get(i);
if ((f & flag) != 0) {
for (int j = 0; j < termCount; ++j) {
final int freq = termFreqs[termIndex + j];
toSkip += freq;
}
}
termIndex += termCount;
}
reader.skip(toSkip);
// read doc positions
for (int i = 0; i < numFields; ++i) {
final int f = (int) flags.get(skip + i);
final int termCount = (int) numTerms.get(skip + i);
if ((f & flag) != 0) {
final int totalFreq = positionIndex[i][termCount];
final int[] fieldPositions = new int[totalFreq];
positions[i] = fieldPositions;
for (int j = 0; j < totalFreq; ) {
final LongsRef nextPositions = reader.next(totalFreq - j);
for (int k = 0; k < nextPositions.length; ++k) {
fieldPositions[j++] = (int) nextPositions.longs[nextPositions.offset + k];
}
}
}
termIndex += termCount;
}
reader.skip(totalPositions - reader.ord());
return positions;
}
use of org.apache.lucene.util.LongsRef in project lucene-solr by apache.
the class Lucene54DocValuesConsumer method writeDictionary.
private void writeDictionary(SortedSet<LongsRef> uniqueValueSets) throws IOException {
int lengthSum = 0;
for (LongsRef longs : uniqueValueSets) {
lengthSum += longs.length;
}
meta.writeInt(lengthSum);
for (LongsRef valueSet : uniqueValueSets) {
for (int i = 0; i < valueSet.length; ++i) {
meta.writeLong(valueSet.longs[valueSet.offset + i]);
}
}
meta.writeInt(uniqueValueSets.size());
for (LongsRef valueSet : uniqueValueSets) {
meta.writeInt(valueSet.length);
}
}
use of org.apache.lucene.util.LongsRef in project lucene-solr by apache.
the class Lucene54DocValuesConsumer method uniqueValueSets.
private SortedSet<LongsRef> uniqueValueSets(Iterable<Number> docToValueCount, Iterable<Number> values) {
Set<LongsRef> uniqueValueSet = new HashSet<>();
LongsRef docValues = new LongsRef(256);
Iterator<Number> valueCountIterator = docToValueCount.iterator();
Iterator<Number> valueIterator = values.iterator();
int totalDictSize = 0;
while (valueCountIterator.hasNext()) {
docValues.length = valueCountIterator.next().intValue();
if (docValues.length > 256) {
return null;
}
for (int i = 0; i < docValues.length; ++i) {
docValues.longs[i] = valueIterator.next().longValue();
}
if (uniqueValueSet.contains(docValues)) {
continue;
}
totalDictSize += docValues.length;
if (totalDictSize > 256) {
return null;
}
uniqueValueSet.add(new LongsRef(Arrays.copyOf(docValues.longs, docValues.length), 0, docValues.length));
}
assert valueIterator.hasNext() == false;
return new TreeSet<>(uniqueValueSet);
}
use of org.apache.lucene.util.LongsRef in project lucene-solr by apache.
the class Lucene54DocValuesConsumer method docToSetId.
private Iterable<Number> docToSetId(SortedSet<LongsRef> uniqueValueSets, Iterable<Number> docToValueCount, Iterable<Number> values) {
final Map<LongsRef, Integer> setIds = new HashMap<>();
int i = 0;
for (LongsRef set : uniqueValueSets) {
setIds.put(set, i++);
}
assert i == uniqueValueSets.size();
return new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
final Iterator<Number> valueCountIterator = docToValueCount.iterator();
final Iterator<Number> valueIterator = values.iterator();
final LongsRef docValues = new LongsRef(256);
return new Iterator<Number>() {
@Override
public boolean hasNext() {
return valueCountIterator.hasNext();
}
@Override
public Number next() {
docValues.length = valueCountIterator.next().intValue();
for (int i = 0; i < docValues.length; ++i) {
docValues.longs[i] = valueIterator.next().longValue();
}
final Integer id = setIds.get(docValues);
assert id != null;
return id;
}
};
}
};
}
Aggregations