Search in sources :

Example 16 with ByteArrayDataInput

use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.

the class OrdsSegmentTermsEnum method printSeekState.

@SuppressWarnings("unused")
private void printSeekState(PrintStream out) throws IOException {
    if (currentFrame == staticFrame) {
        out.println("  no prior seek");
    } else {
        out.println("  prior seek state:");
        int ord = 0;
        boolean isSeekFrame = true;
        while (true) {
            OrdsSegmentTermsEnumFrame f = getFrame(ord);
            assert f != null;
            final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix);
            if (f.nextEnt == -1) {
                out.println("    frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd() + " termOrd=" + f.termOrd);
            } else {
                out.println("    frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd() + " termOrd=" + f.termOrd);
            }
            if (fr.index != null) {
                assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
                if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix - 1) & 0xFF)) {
                    out.println("      broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix - 1) & 0xFF));
                    throw new RuntimeException("seek state is broken");
                }
                Output output = Util.get(fr.index, prefix);
                if (output == null) {
                    out.println("      broken seek state: prefix is not final in index");
                    throw new RuntimeException("seek state is broken");
                } else if (isSeekFrame && !f.isFloor) {
                    final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes.bytes, output.bytes.offset, output.bytes.length);
                    final long codeOrig = reader.readVLong();
                    final long code = (f.fp << OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) | (f.isFloor ? OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0);
                    if (codeOrig != code) {
                        out.println("      broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code);
                        throw new RuntimeException("seek state is broken");
                    }
                }
            }
            if (f == currentFrame) {
                break;
            }
            if (f.prefix == validIndexPrefix) {
                isSeekFrame = false;
            }
            ord++;
        }
    }
}
Also used : Output(org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) BytesRef(org.apache.lucene.util.BytesRef)

Example 17 with ByteArrayDataInput

use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.

the class OrdsSegmentTermsEnumFrame method loadBlock.

/* Does initial decode of next block of terms; this
     doesn't actually decode the docFreq, totalTermFreq,
     postings details (frq/prx offset, etc.) metadata;
     it just loads them as byte[] blobs which are then      
     decoded on-demand if the metadata is ever requested
     for any term in this block.  This enables terms-only
     intensive consumes (eg certain MTQs, respelling) to
     not pay the price of decoding metadata they won't
     use. */
void loadBlock() throws IOException {
    // Clone the IndexInput lazily, so that consumers
    // that just pull a TermsEnum to
    // seekExact(TermState) don't pay this cost:
    ste.initIndexInput();
    if (nextEnt != -1) {
        // Already loaded
        return;
    }
    // System.out.println("loadBlock ord=" + ord + " termOrdOrig=" + termOrdOrig + " termOrd=" + termOrd + " fp=" + fp);
    ste.in.seek(fp);
    int code = ste.in.readVInt();
    entCount = code >>> 1;
    assert entCount > 0;
    isLastInFloor = (code & 1) != 0;
    assert arc == null || (isLastInFloor || isFloor);
    // TODO: if suffixes were stored in random-access
    // array structure, then we could do binary search
    // instead of linear scan to find target term; eg
    // we could have simple array of offsets
    // term suffixes:
    code = ste.in.readVInt();
    isLeafBlock = (code & 1) != 0;
    int numBytes = code >>> 1;
    if (suffixBytes.length < numBytes) {
        suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    ste.in.readBytes(suffixBytes, 0, numBytes);
    suffixesReader.reset(suffixBytes, 0, numBytes);
    /*if (DEBUG) {
      if (arc == null) {
      System.out.println("    loadBlock (next) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
      } else {
      System.out.println("    loadBlock (seek) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
      }
      }*/
    // stats
    numBytes = ste.in.readVInt();
    if (statBytes.length < numBytes) {
        statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    // System.out.println("READ stats numBytes=" + numBytes + " fp=" + ste.in.getFilePointer());
    ste.in.readBytes(statBytes, 0, numBytes);
    statsReader.reset(statBytes, 0, numBytes);
    metaDataUpto = 0;
    state.termBlockOrd = 0;
    nextEnt = 0;
    lastSubFP = -1;
    // TODO: we could skip this if !hasTerms; but
    // that's rare so won't help much
    // metadata
    numBytes = ste.in.readVInt();
    if (bytes == null) {
        bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
        bytesReader = new ByteArrayDataInput();
    } else if (bytes.length < numBytes) {
        bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    ste.in.readBytes(bytes, 0, numBytes);
    bytesReader.reset(bytes, 0, numBytes);
    // Sub-blocks of a single floor block are always
    // written one after another -- tail recurse:
    fpEnd = ste.in.getFilePointer();
// if (DEBUG) {
//   System.out.println("      fpEnd=" + fpEnd);
// }
}
Also used : ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput)

Example 18 with ByteArrayDataInput

use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.

the class SortedInputIterator method next.

@Override
public BytesRef next() throws IOException {
    boolean success = false;
    if (done) {
        return null;
    }
    try {
        ByteArrayDataInput input = new ByteArrayDataInput();
        BytesRef bytes = reader.next();
        if (bytes != null) {
            weight = decode(bytes, input);
            if (hasPayloads) {
                payload = decodePayload(bytes, input);
            }
            if (hasContexts) {
                contexts = decodeContexts(bytes, input);
            }
            success = true;
            return bytes;
        }
        close();
        success = done = true;
        return null;
    } finally {
        if (!success) {
            done = true;
            close();
        }
    }
}
Also used : ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) BytesRef(org.apache.lucene.util.BytesRef)

Example 19 with ByteArrayDataInput

use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.

the class TestCompressingStoredFieldsFormat method testZDouble.

public void testZDouble() throws Exception {
    // we never need more than 9 bytes
    byte[] buffer = new byte[9];
    ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
    ByteArrayDataInput in = new ByteArrayDataInput(buffer);
    // round-trip small integer values
    for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
        double x = (double) i;
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertTrue(in.eof());
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
        // check that compression actually works
        if (i >= -1 && i <= 124) {
            // single byte compression
            assertEquals(1, out.getPosition());
        }
        out.reset(buffer);
    }
    // round-trip special values
    double[] special = { -0.0d, +0.0d, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.MIN_VALUE, Double.MAX_VALUE, Double.NaN };
    for (double x : special) {
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertTrue(in.eof());
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
        out.reset(buffer);
    }
    // round-trip random values
    Random r = random();
    for (int i = 0; i < 100000; i++) {
        double x = r.nextDouble() * (random().nextInt(100) - 50);
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= (x < 0 ? 9 : 8));
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertTrue(in.eof());
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
        out.reset(buffer);
    }
    // same with floats
    for (int i = 0; i < 100000; i++) {
        double x = (double) (r.nextFloat() * (random().nextInt(100) - 50));
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= 5);
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertTrue(in.eof());
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
        out.reset(buffer);
    }
}
Also used : Random(java.util.Random) ByteArrayDataOutput(org.apache.lucene.store.ByteArrayDataOutput) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) IntPoint(org.apache.lucene.document.IntPoint)

Example 20 with ByteArrayDataInput

use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.

the class TestCompressingStoredFieldsFormat method testTLong.

public void testTLong() throws Exception {
    // we never need more than 10 bytes
    byte[] buffer = new byte[10];
    ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
    ByteArrayDataInput in = new ByteArrayDataInput(buffer);
    // round-trip small integer values
    for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
        for (long mul : new long[] { SECOND, HOUR, DAY }) {
            long l1 = (long) i * mul;
            CompressingStoredFieldsWriter.writeTLong(out, l1);
            in.reset(buffer, 0, out.getPosition());
            long l2 = CompressingStoredFieldsReader.readTLong(in);
            assertTrue(in.eof());
            assertEquals(l1, l2);
            // check that compression actually works
            if (i >= -16 && i <= 15) {
                // single byte compression
                assertEquals(1, out.getPosition());
            }
            out.reset(buffer);
        }
    }
    // round-trip random values
    Random r = random();
    for (int i = 0; i < 100000; i++) {
        final int numBits = r.nextInt(65);
        long l1 = r.nextLong() & ((1L << numBits) - 1);
        switch(r.nextInt(4)) {
            case 0:
                l1 *= SECOND;
                break;
            case 1:
                l1 *= HOUR;
                break;
            case 2:
                l1 *= DAY;
                break;
            default:
                break;
        }
        CompressingStoredFieldsWriter.writeTLong(out, l1);
        in.reset(buffer, 0, out.getPosition());
        long l2 = CompressingStoredFieldsReader.readTLong(in);
        assertTrue(in.eof());
        assertEquals(l1, l2);
        out.reset(buffer);
    }
}
Also used : Random(java.util.Random) ByteArrayDataOutput(org.apache.lucene.store.ByteArrayDataOutput) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) IntPoint(org.apache.lucene.document.IntPoint)

Aggregations

ByteArrayDataInput (org.apache.lucene.store.ByteArrayDataInput)26 BytesRef (org.apache.lucene.util.BytesRef)16 ByteArrayDataOutput (org.apache.lucene.store.ByteArrayDataOutput)8 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)5 IndexOutput (org.apache.lucene.store.IndexOutput)4 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)4 IOException (java.io.IOException)3 HashSet (java.util.HashSet)3 Random (java.util.Random)3 IntPoint (org.apache.lucene.document.IntPoint)3 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)3 IntsRef (org.apache.lucene.util.IntsRef)3 OfflineSorter (org.apache.lucene.util.OfflineSorter)3 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)3 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 TokenStreamToAutomaton (org.apache.lucene.analysis.TokenStreamToAutomaton)2 Directory (org.apache.lucene.store.Directory)2 IndexInput (org.apache.lucene.store.IndexInput)2 CharsRef (org.apache.lucene.util.CharsRef)2 LimitedFiniteStringsIterator (org.apache.lucene.util.automaton.LimitedFiniteStringsIterator)2