Search in sources :

Example 11 with ByteArrayDataInput

use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.

the class TestCompressingStoredFieldsFormat method testZFloat.

public void testZFloat() throws Exception {
    // we never need more than 5 bytes
    byte[] buffer = new byte[5];
    ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
    ByteArrayDataInput in = new ByteArrayDataInput(buffer);
    // round-trip small integer values
    for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
        float f = (float) i;
        CompressingStoredFieldsWriter.writeZFloat(out, f);
        in.reset(buffer, 0, out.getPosition());
        float g = CompressingStoredFieldsReader.readZFloat(in);
        assertTrue(in.eof());
        assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g));
        // check that compression actually works
        if (i >= -1 && i <= 123) {
            // single byte compression
            assertEquals(1, out.getPosition());
        }
        out.reset(buffer);
    }
    // round-trip special values
    float[] special = { -0.0f, +0.0f, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY, Float.MIN_VALUE, Float.MAX_VALUE, Float.NaN };
    for (float f : special) {
        CompressingStoredFieldsWriter.writeZFloat(out, f);
        in.reset(buffer, 0, out.getPosition());
        float g = CompressingStoredFieldsReader.readZFloat(in);
        assertTrue(in.eof());
        assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g));
        out.reset(buffer);
    }
    // round-trip random values
    Random r = random();
    for (int i = 0; i < 100000; i++) {
        float f = r.nextFloat() * (random().nextInt(100) - 50);
        CompressingStoredFieldsWriter.writeZFloat(out, f);
        assertTrue("length=" + out.getPosition() + ", f=" + f, out.getPosition() <= ((Float.floatToIntBits(f) >>> 31) == 1 ? 5 : 4));
        in.reset(buffer, 0, out.getPosition());
        float g = CompressingStoredFieldsReader.readZFloat(in);
        assertTrue(in.eof());
        assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g));
        out.reset(buffer);
    }
}
Also used : Random(java.util.Random) ByteArrayDataOutput(org.apache.lucene.store.ByteArrayDataOutput) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) IntPoint(org.apache.lucene.document.IntPoint)

Example 12 with ByteArrayDataInput

use of org.apache.lucene.store.ByteArrayDataInput in project elasticsearch by elastic.

the class BytesBinaryDVAtomicFieldData method getBytesValues.

@Override
public SortedBinaryDocValues getBytesValues() {
    return new SortedBinaryDocValues() {

        int count;

        BytesRefBuilder[] refs = new BytesRefBuilder[0];

        final ByteArrayDataInput in = new ByteArrayDataInput();

        @Override
        public void setDocument(int docId) {
            final BytesRef bytes = values.get(docId);
            in.reset(bytes.bytes, bytes.offset, bytes.length);
            if (bytes.length == 0) {
                count = 0;
            } else {
                count = in.readVInt();
                if (count > refs.length) {
                    final int previousLength = refs.length;
                    refs = Arrays.copyOf(refs, ArrayUtil.oversize(count, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
                    for (int i = previousLength; i < refs.length; ++i) {
                        refs[i] = new BytesRefBuilder();
                    }
                }
                for (int i = 0; i < count; ++i) {
                    final int length = in.readVInt();
                    final BytesRefBuilder scratch = refs[i];
                    scratch.grow(length);
                    in.readBytes(scratch.bytes(), 0, length);
                    scratch.setLength(length);
                }
            }
        }

        @Override
        public int count() {
            return count;
        }

        @Override
        public BytesRef valueAt(int index) {
            return refs[index].get();
        }
    };
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) BytesRef(org.apache.lucene.util.BytesRef) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues)

Example 13 with ByteArrayDataInput

use of org.apache.lucene.store.ByteArrayDataInput in project elasticsearch by elastic.

the class ByteUtilsTests method testVLong.

public void testVLong() throws IOException {
    final long[] data = new long[scaledRandomIntBetween(1000, 10000)];
    for (int i = 0; i < data.length; ++i) {
        switch(randomInt(4)) {
            case 0:
                data[i] = 0;
                break;
            case 1:
                data[i] = Long.MAX_VALUE;
                break;
            case 2:
                data[i] = Long.MIN_VALUE;
                break;
            case 3:
                data[i] = randomInt(1 << randomIntBetween(2, 30));
                break;
            case 4:
                data[i] = randomLong();
                break;
            default:
                throw new AssertionError();
        }
    }
    final byte[] encoded = new byte[ByteUtils.MAX_BYTES_VLONG * data.length];
    ByteArrayDataOutput out = new ByteArrayDataOutput(encoded);
    for (int i = 0; i < data.length; ++i) {
        final int pos = out.getPosition();
        ByteUtils.writeVLong(out, data[i]);
        if (data[i] < 0) {
            assertEquals(ByteUtils.MAX_BYTES_VLONG, out.getPosition() - pos);
        }
    }
    final ByteArrayDataInput in = new ByteArrayDataInput(encoded);
    for (int i = 0; i < data.length; ++i) {
        assertEquals(data[i], ByteUtils.readVLong(in));
    }
}
Also used : ByteArrayDataOutput(org.apache.lucene.store.ByteArrayDataOutput) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput)

Example 14 with ByteArrayDataInput

use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.

the class BaseSynonymParserTestCase method assertEntryEquals.

/**
   * Helper method to validate synonym parsing.
   *
   * @param synonynMap  the generated synonym map after parsing
   * @param word        word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer.
   *                    All spaces will be replaced by word separators.
   * @param includeOrig if synonyms should include original
   * @param synonyms    actual synonyms. All word separators are replaced with a single space.
   */
public static void assertEntryEquals(SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms) throws Exception {
    word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
    BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
    assertNotNull("No synonyms found for: " + word, value);
    ByteArrayDataInput bytesReader = new ByteArrayDataInput(value.bytes, value.offset, value.length);
    final int code = bytesReader.readVInt();
    final boolean keepOrig = (code & 0x1) == 0;
    assertEquals("Include original different than expected. Expected " + includeOrig + " was " + keepOrig, includeOrig, keepOrig);
    final int count = code >>> 1;
    assertEquals("Invalid synonym count. Expected " + synonyms.length + " was " + count, synonyms.length, count);
    Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms));
    BytesRef scratchBytes = new BytesRef();
    for (int i = 0; i < count; i++) {
        synonynMap.words.get(bytesReader.readVInt(), scratchBytes);
        String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' ');
        assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym));
    }
}
Also used : IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) BytesRef(org.apache.lucene.util.BytesRef) CharsRef(org.apache.lucene.util.CharsRef) HashSet(java.util.HashSet)

Example 15 with ByteArrayDataInput

use of org.apache.lucene.store.ByteArrayDataInput in project lucene-solr by apache.

the class OrdsIntersectTermsEnumFrame method load.

void load(Output output) throws IOException {
    if (output != null && output.bytes != null && transitionCount != 0) {
        BytesRef frameIndexData = output.bytes;
        // Floor frame
        if (floorData.length < frameIndexData.length) {
            this.floorData = new byte[ArrayUtil.oversize(frameIndexData.length, 1)];
        }
        System.arraycopy(frameIndexData.bytes, frameIndexData.offset, floorData, 0, frameIndexData.length);
        floorDataReader.reset(floorData, 0, frameIndexData.length);
        final long code = floorDataReader.readVLong();
        if ((code & OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0) {
            numFollowFloorBlocks = floorDataReader.readVInt();
            nextFloorLabel = floorDataReader.readByte() & 0xff;
            termOrd = termOrdOrig + floorDataReader.readVLong();
            // first block in case it has empty suffix:
            if (!ite.runAutomaton.isAccept(state)) {
                // Maybe skip floor blocks:
                assert transitionIndex == 0 : "transitionIndex=" + transitionIndex;
                while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) {
                    fp = fpOrig + (floorDataReader.readVLong() >>> 1);
                    numFollowFloorBlocks--;
                    // if (DEBUG) System.out.println("    skip floor block!  nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[0].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
                    if (numFollowFloorBlocks != 0) {
                        nextFloorLabel = floorDataReader.readByte() & 0xff;
                        termOrd += floorDataReader.readVLong();
                    } else {
                        nextFloorLabel = 256;
                    }
                }
            }
        }
    }
    ite.in.seek(fp);
    int code = ite.in.readVInt();
    entCount = code >>> 1;
    assert entCount > 0;
    isLastInFloor = (code & 1) != 0;
    // term suffixes:
    code = ite.in.readVInt();
    isLeafBlock = (code & 1) != 0;
    int numBytes = code >>> 1;
    // if (DEBUG) System.out.println("      entCount=" + entCount + " lastInFloor?=" + isLastInFloor + " leafBlock?=" + isLeafBlock + " numSuffixBytes=" + numBytes);
    if (suffixBytes.length < numBytes) {
        suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    ite.in.readBytes(suffixBytes, 0, numBytes);
    suffixesReader.reset(suffixBytes, 0, numBytes);
    // stats
    numBytes = ite.in.readVInt();
    if (statBytes.length < numBytes) {
        statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    ite.in.readBytes(statBytes, 0, numBytes);
    statsReader.reset(statBytes, 0, numBytes);
    metaDataUpto = 0;
    termState.termBlockOrd = 0;
    nextEnt = 0;
    // metadata
    numBytes = ite.in.readVInt();
    if (bytes == null) {
        bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
        bytesReader = new ByteArrayDataInput();
    } else if (bytes.length < numBytes) {
        bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    ite.in.readBytes(bytes, 0, numBytes);
    bytesReader.reset(bytes, 0, numBytes);
    if (!isLastInFloor) {
        // Sub-blocks of a single floor block are always
        // written one after another -- tail recurse:
        fpEnd = ite.in.getFilePointer();
    }
}
Also used : ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

ByteArrayDataInput (org.apache.lucene.store.ByteArrayDataInput)26 BytesRef (org.apache.lucene.util.BytesRef)16 ByteArrayDataOutput (org.apache.lucene.store.ByteArrayDataOutput)8 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)5 IndexOutput (org.apache.lucene.store.IndexOutput)4 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)4 IOException (java.io.IOException)3 HashSet (java.util.HashSet)3 Random (java.util.Random)3 IntPoint (org.apache.lucene.document.IntPoint)3 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)3 IntsRef (org.apache.lucene.util.IntsRef)3 OfflineSorter (org.apache.lucene.util.OfflineSorter)3 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)3 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 TokenStreamToAutomaton (org.apache.lucene.analysis.TokenStreamToAutomaton)2 Directory (org.apache.lucene.store.Directory)2 IndexInput (org.apache.lucene.store.IndexInput)2 CharsRef (org.apache.lucene.util.CharsRef)2 LimitedFiniteStringsIterator (org.apache.lucene.util.automaton.LimitedFiniteStringsIterator)2