use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.
the class OrdsSegmentTermsEnum method printSeekState.
@SuppressWarnings("unused")
private void printSeekState(PrintStream out) throws IOException {
if (currentFrame == staticFrame) {
out.println(" no prior seek");
} else {
out.println(" prior seek state:");
int ord = 0;
boolean isSeekFrame = true;
while (true) {
OrdsSegmentTermsEnumFrame f = getFrame(ord);
assert f != null;
final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix);
if (f.nextEnt == -1) {
out.println(" frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd() + " termOrd=" + f.termOrd);
} else {
out.println(" frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd() + " termOrd=" + f.termOrd);
}
if (fr.index != null) {
assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix - 1) & 0xFF)) {
out.println(" broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix - 1) & 0xFF));
throw new RuntimeException("seek state is broken");
}
Output output = Util.get(fr.index, prefix);
if (output == null) {
out.println(" broken seek state: prefix is not final in index");
throw new RuntimeException("seek state is broken");
} else if (isSeekFrame && !f.isFloor) {
final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes.bytes, output.bytes.offset, output.bytes.length);
final long codeOrig = reader.readVLong();
final long code = (f.fp << OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) | (f.isFloor ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0);
if (codeOrig != code) {
out.println(" broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code);
throw new RuntimeException("seek state is broken");
}
}
}
if (f == currentFrame) {
break;
}
if (f.prefix == validIndexPrefix) {
isSeekFrame = false;
}
ord++;
}
}
}
use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.
the class OrdsSegmentTermsEnumFrame method loadBlock.
/* Does initial decode of next block of terms; this
doesn't actually decode the docFreq, totalTermFreq,
postings details (frq/prx offset, etc.) metadata;
it just loads them as byte[] blobs which are then
decoded on-demand if the metadata is ever requested
for any term in this block. This enables terms-only
intensive consumes (eg certain MTQs, respelling) to
not pay the price of decoding metadata they won't
use. */
void loadBlock() throws IOException {
// Clone the IndexInput lazily, so that consumers
// that just pull a TermsEnum to
// seekExact(TermState) don't pay this cost:
ste.initIndexInput();
if (nextEnt != -1) {
// Already loaded
return;
}
// System.out.println("loadBlock ord=" + ord + " termOrdOrig=" + termOrdOrig + " termOrd=" + termOrd + " fp=" + fp);
ste.in.seek(fp);
int code = ste.in.readVInt();
entCount = code >>> 1;
assert entCount > 0;
isLastInFloor = (code & 1) != 0;
assert arc == null || (isLastInFloor || isFloor);
// TODO: if suffixes were stored in random-access
// array structure, then we could do binary search
// instead of linear scan to find target term; eg
// we could have simple array of offsets
// term suffixes:
code = ste.in.readVInt();
isLeafBlock = (code & 1) != 0;
int numBytes = code >>> 1;
if (suffixBytes.length < numBytes) {
suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
}
ste.in.readBytes(suffixBytes, 0, numBytes);
suffixesReader.reset(suffixBytes, 0, numBytes);
/*if (DEBUG) {
if (arc == null) {
System.out.println(" loadBlock (next) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
} else {
System.out.println(" loadBlock (seek) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
}
}*/
// stats
numBytes = ste.in.readVInt();
if (statBytes.length < numBytes) {
statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
}
// System.out.println("READ stats numBytes=" + numBytes + " fp=" + ste.in.getFilePointer());
ste.in.readBytes(statBytes, 0, numBytes);
statsReader.reset(statBytes, 0, numBytes);
metaDataUpto = 0;
state.termBlockOrd = 0;
nextEnt = 0;
lastSubFP = -1;
// TODO: we could skip this if !hasTerms; but
// that's rare so won't help much
// metadata
numBytes = ste.in.readVInt();
if (bytes == null) {
bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
bytesReader = new ByteArrayDataInput();
} else if (bytes.length < numBytes) {
bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
}
ste.in.readBytes(bytes, 0, numBytes);
bytesReader.reset(bytes, 0, numBytes);
// Sub-blocks of a single floor block are always
// written one after another -- tail recurse:
fpEnd = ste.in.getFilePointer();
// if (DEBUG) {
// System.out.println(" fpEnd=" + fpEnd);
// }
}
use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.
the class SortedInputIterator method next.
@Override
public BytesRef next() throws IOException {
boolean success = false;
if (done) {
return null;
}
try {
ByteArrayDataInput input = new ByteArrayDataInput();
BytesRef bytes = reader.next();
if (bytes != null) {
weight = decode(bytes, input);
if (hasPayloads) {
payload = decodePayload(bytes, input);
}
if (hasContexts) {
contexts = decodeContexts(bytes, input);
}
success = true;
return bytes;
}
close();
success = done = true;
return null;
} finally {
if (!success) {
done = true;
close();
}
}
}
use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.
the class TestCompressingStoredFieldsFormat method testZDouble.
public void testZDouble() throws Exception {
// we never need more than 9 bytes
byte[] buffer = new byte[9];
ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
ByteArrayDataInput in = new ByteArrayDataInput(buffer);
// round-trip small integer values
for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
double x = (double) i;
CompressingStoredFieldsWriter.writeZDouble(out, x);
in.reset(buffer, 0, out.getPosition());
double y = CompressingStoredFieldsReader.readZDouble(in);
assertTrue(in.eof());
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
// check that compression actually works
if (i >= -1 && i <= 124) {
// single byte compression
assertEquals(1, out.getPosition());
}
out.reset(buffer);
}
// round-trip special values
double[] special = { -0.0d, +0.0d, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.MIN_VALUE, Double.MAX_VALUE, Double.NaN };
for (double x : special) {
CompressingStoredFieldsWriter.writeZDouble(out, x);
in.reset(buffer, 0, out.getPosition());
double y = CompressingStoredFieldsReader.readZDouble(in);
assertTrue(in.eof());
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
out.reset(buffer);
}
// round-trip random values
Random r = random();
for (int i = 0; i < 100000; i++) {
double x = r.nextDouble() * (random().nextInt(100) - 50);
CompressingStoredFieldsWriter.writeZDouble(out, x);
assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= (x < 0 ? 9 : 8));
in.reset(buffer, 0, out.getPosition());
double y = CompressingStoredFieldsReader.readZDouble(in);
assertTrue(in.eof());
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
out.reset(buffer);
}
// same with floats
for (int i = 0; i < 100000; i++) {
double x = (double) (r.nextFloat() * (random().nextInt(100) - 50));
CompressingStoredFieldsWriter.writeZDouble(out, x);
assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= 5);
in.reset(buffer, 0, out.getPosition());
double y = CompressingStoredFieldsReader.readZDouble(in);
assertTrue(in.eof());
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
out.reset(buffer);
}
}
use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.
the class TestCompressingStoredFieldsFormat method testTLong.
public void testTLong() throws Exception {
// we never need more than 10 bytes
byte[] buffer = new byte[10];
ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
ByteArrayDataInput in = new ByteArrayDataInput(buffer);
// round-trip small integer values
for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
for (long mul : new long[] { SECOND, HOUR, DAY }) {
long l1 = (long) i * mul;
CompressingStoredFieldsWriter.writeTLong(out, l1);
in.reset(buffer, 0, out.getPosition());
long l2 = CompressingStoredFieldsReader.readTLong(in);
assertTrue(in.eof());
assertEquals(l1, l2);
// check that compression actually works
if (i >= -16 && i <= 15) {
// single byte compression
assertEquals(1, out.getPosition());
}
out.reset(buffer);
}
}
// round-trip random values
Random r = random();
for (int i = 0; i < 100000; i++) {
final int numBits = r.nextInt(65);
long l1 = r.nextLong() & ((1L << numBits) - 1);
switch(r.nextInt(4)) {
case 0:
l1 *= SECOND;
break;
case 1:
l1 *= HOUR;
break;
case 2:
l1 *= DAY;
break;
default:
break;
}
CompressingStoredFieldsWriter.writeTLong(out, l1);
in.reset(buffer, 0, out.getPosition());
long l2 = CompressingStoredFieldsReader.readTLong(in);
assertTrue(in.eof());
assertEquals(l1, l2);
out.reset(buffer);
}
}
Aggregations