Search in sources :

Example 16 with ByteArrayDataInput

use of in project lucene-solr by apache.

the class OrdsSegmentTermsEnum method printSeekState.

private void printSeekState(PrintStream out) throws IOException {
    if (currentFrame == staticFrame) {
        out.println("  no prior seek");
    } else {
        out.println("  prior seek state:");
        int ord = 0;
        boolean isSeekFrame = true;
        while (true) {
            OrdsSegmentTermsEnumFrame f = getFrame(ord);
            assert f != null;
            final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix);
            if (f.nextEnt == -1) {
                out.println("    frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd() + " termOrd=" + f.termOrd);
            } else {
                out.println("    frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd() + " termOrd=" + f.termOrd);
            if (fr.index != null) {
                assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
                if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix - 1) & 0xFF)) {
                    out.println("      broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix - 1) & 0xFF));
                    throw new RuntimeException("seek state is broken");
                Output output = Util.get(fr.index, prefix);
                if (output == null) {
                    out.println("      broken seek state: prefix is not final in index");
                    throw new RuntimeException("seek state is broken");
                } else if (isSeekFrame && !f.isFloor) {
                    final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes.bytes, output.bytes.offset, output.bytes.length);
                    final long codeOrig = reader.readVLong();
                    final long code = (f.fp << OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) | (f.isFloor ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0);
                    if (codeOrig != code) {
                        out.println("      broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code);
                        throw new RuntimeException("seek state is broken");
            if (f == currentFrame) {
            if (f.prefix == validIndexPrefix) {
                isSeekFrame = false;
Also used : Output(org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output) ByteArrayDataInput( BytesRef(org.apache.lucene.util.BytesRef)

Example 17 with ByteArrayDataInput

use of in project lucene-solr by apache.

the class OrdsSegmentTermsEnumFrame method loadBlock.

/* Does initial decode of next block of terms; this
     doesn't actually decode the docFreq, totalTermFreq,
     postings details (frq/prx offset, etc.) metadata;
     it just loads them as byte[] blobs which are then      
     decoded on-demand if the metadata is ever requested
     for any term in this block.  This enables terms-only
     intensive consumes (eg certain MTQs, respelling) to
     not pay the price of decoding metadata they won't
     use. */
void loadBlock() throws IOException {
    // Clone the IndexInput lazily, so that consumers
    // that just pull a TermsEnum to
    // seekExact(TermState) don't pay this cost:
    if (nextEnt != -1) {
        // Already loaded
    // System.out.println("loadBlock ord=" + ord + " termOrdOrig=" + termOrdOrig + " termOrd=" + termOrd + " fp=" + fp);;
    int code =;
    entCount = code >>> 1;
    assert entCount > 0;
    isLastInFloor = (code & 1) != 0;
    assert arc == null || (isLastInFloor || isFloor);
    // TODO: if suffixes were stored in random-access
    // array structure, then we could do binary search
    // instead of linear scan to find target term; eg
    // we could have simple array of offsets
    // term suffixes:
    code =;
    isLeafBlock = (code & 1) != 0;
    int numBytes = code >>> 1;
    if (suffixBytes.length < numBytes) {
        suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }, 0, numBytes);
    suffixesReader.reset(suffixBytes, 0, numBytes);
    /*if (DEBUG) {
      if (arc == null) {
      System.out.println("    loadBlock (next) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
      } else {
      System.out.println("    loadBlock (seek) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
    // stats
    numBytes =;
    if (statBytes.length < numBytes) {
        statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    // System.out.println("READ stats numBytes=" + numBytes + " fp=" +;, 0, numBytes);
    statsReader.reset(statBytes, 0, numBytes);
    metaDataUpto = 0;
    state.termBlockOrd = 0;
    nextEnt = 0;
    lastSubFP = -1;
    // TODO: we could skip this if !hasTerms; but
    // that's rare so won't help much
    // metadata
    numBytes =;
    if (bytes == null) {
        bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
        bytesReader = new ByteArrayDataInput();
    } else if (bytes.length < numBytes) {
        bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }, 0, numBytes);
    bytesReader.reset(bytes, 0, numBytes);
    // Sub-blocks of a single floor block are always
    // written one after another -- tail recurse:
    fpEnd =;
// if (DEBUG) {
//   System.out.println("      fpEnd=" + fpEnd);
// }
Also used : ByteArrayDataInput(

Example 18 with ByteArrayDataInput

use of in project lucene-solr by apache.

the class SortedInputIterator method next.

public BytesRef next() throws IOException {
    boolean success = false;
    if (done) {
        return null;
    try {
        ByteArrayDataInput input = new ByteArrayDataInput();
        BytesRef bytes =;
        if (bytes != null) {
            weight = decode(bytes, input);
            if (hasPayloads) {
                payload = decodePayload(bytes, input);
            if (hasContexts) {
                contexts = decodeContexts(bytes, input);
            success = true;
            return bytes;
        success = done = true;
        return null;
    } finally {
        if (!success) {
            done = true;
Also used : ByteArrayDataInput( BytesRef(org.apache.lucene.util.BytesRef)

Example 19 with ByteArrayDataInput

use of in project lucene-solr by apache.

the class TestCompressingStoredFieldsFormat method testZDouble.

public void testZDouble() throws Exception {
    // we never need more than 9 bytes
    byte[] buffer = new byte[9];
    ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
    ByteArrayDataInput in = new ByteArrayDataInput(buffer);
    // round-trip small integer values
    for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
        double x = (double) i;
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
        // check that compression actually works
        if (i >= -1 && i <= 124) {
            // single byte compression
            assertEquals(1, out.getPosition());
    // round-trip special values
    double[] special = { -0.0d, +0.0d, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.MIN_VALUE, Double.MAX_VALUE, Double.NaN };
    for (double x : special) {
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
    // round-trip random values
    Random r = random();
    for (int i = 0; i < 100000; i++) {
        double x = r.nextDouble() * (random().nextInt(100) - 50);
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= (x < 0 ? 9 : 8));
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
    // same with floats
    for (int i = 0; i < 100000; i++) {
        double x = (double) (r.nextFloat() * (random().nextInt(100) - 50));
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= 5);
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
Also used : Random(java.util.Random) ByteArrayDataOutput( ByteArrayDataInput( IntPoint(org.apache.lucene.document.IntPoint)

Example 20 with ByteArrayDataInput

use of in project lucene-solr by apache.

the class TestCompressingStoredFieldsFormat method testTLong.

public void testTLong() throws Exception {
    // we never need more than 10 bytes
    byte[] buffer = new byte[10];
    ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
    ByteArrayDataInput in = new ByteArrayDataInput(buffer);
    // round-trip small integer values
    for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
        for (long mul : new long[] { SECOND, HOUR, DAY }) {
            long l1 = (long) i * mul;
            CompressingStoredFieldsWriter.writeTLong(out, l1);
            in.reset(buffer, 0, out.getPosition());
            long l2 = CompressingStoredFieldsReader.readTLong(in);
            assertEquals(l1, l2);
            // check that compression actually works
            if (i >= -16 && i <= 15) {
                // single byte compression
                assertEquals(1, out.getPosition());
    // round-trip random values
    Random r = random();
    for (int i = 0; i < 100000; i++) {
        final int numBits = r.nextInt(65);
        long l1 = r.nextLong() & ((1L << numBits) - 1);
        switch(r.nextInt(4)) {
            case 0:
                l1 *= SECOND;
            case 1:
                l1 *= HOUR;
            case 2:
                l1 *= DAY;
        CompressingStoredFieldsWriter.writeTLong(out, l1);
        in.reset(buffer, 0, out.getPosition());
        long l2 = CompressingStoredFieldsReader.readTLong(in);
        assertEquals(l1, l2);
Also used : Random(java.util.Random) ByteArrayDataOutput( ByteArrayDataInput( IntPoint(org.apache.lucene.document.IntPoint)


ByteArrayDataInput ( BytesRef (org.apache.lucene.util.BytesRef)16 ByteArrayDataOutput ( IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)5 IndexOutput ( BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)4 IOException ( HashSet (java.util.HashSet)3 Random (java.util.Random)3 IntPoint (org.apache.lucene.document.IntPoint)3 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)3 IntsRef (org.apache.lucene.util.IntsRef)3 OfflineSorter (org.apache.lucene.util.OfflineSorter)3 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)3 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 TokenStreamToAutomaton (org.apache.lucene.analysis.TokenStreamToAutomaton)2 Directory ( IndexInput ( CharsRef (org.apache.lucene.util.CharsRef)2 LimitedFiniteStringsIterator (org.apache.lucene.util.automaton.LimitedFiniteStringsIterator)2