Search in sources :

Example 1 with RandomAccessInputView

use of org.apache.flink.runtime.io.disk.RandomAccessInputView in project flink by apache.

the class BinaryRowDataTest method testHashAndCopy.

@Test
public void testHashAndCopy() throws IOException {
    MemorySegment[] segments = new MemorySegment[3];
    for (int i = 0; i < 3; i++) {
        segments[i] = MemorySegmentFactory.wrap(new byte[64]);
    }
    RandomAccessOutputView out = new RandomAccessOutputView(segments, 64);
    BinaryRowDataSerializer serializer = new BinaryRowDataSerializer(2);
    BinaryRowData row = new BinaryRowData(2);
    BinaryRowWriter writer = new BinaryRowWriter(row);
    writer.writeString(0, fromString("hahahahahahahahahahahahahahahahahahahhahahahahahahahahah"));
    writer.writeString(1, fromString("hahahahahahahahahahahahahahahahahahahhahahahahahahahahaa"));
    writer.complete();
    serializer.serializeToPages(row, out);
    ArrayList<MemorySegment> segmentList = new ArrayList<>(Arrays.asList(segments));
    RandomAccessInputView input = new RandomAccessInputView(segmentList, 64, 64);
    BinaryRowData mapRow = serializer.createInstance();
    mapRow = serializer.mapFromPages(mapRow, input);
    assertEquals(row, mapRow);
    assertEquals(row.getString(0), mapRow.getString(0));
    assertEquals(row.getString(1), mapRow.getString(1));
    assertNotEquals(row.getString(0), mapRow.getString(1));
    // test if the hash code before and after serialization are the same
    assertEquals(row.hashCode(), mapRow.hashCode());
    assertEquals(row.getString(0).hashCode(), mapRow.getString(0).hashCode());
    assertEquals(row.getString(1).hashCode(), mapRow.getString(1).hashCode());
    // test if the copy method produce a row with the same contents
    assertEquals(row.copy(), mapRow.copy());
    assertEquals(((BinaryStringData) row.getString(0)).copy(), ((BinaryStringData) mapRow.getString(0)).copy());
    assertEquals(((BinaryStringData) row.getString(1)).copy(), ((BinaryStringData) mapRow.getString(1)).copy());
}
Also used : RandomAccessInputView(org.apache.flink.runtime.io.disk.RandomAccessInputView) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) ArrayList(java.util.ArrayList) RandomAccessOutputView(org.apache.flink.runtime.io.disk.RandomAccessOutputView) MemorySegment(org.apache.flink.core.memory.MemorySegment) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer) Test(org.junit.Test)

Example 2 with RandomAccessInputView

use of org.apache.flink.runtime.io.disk.RandomAccessInputView in project flink by apache.

the class BinaryRowDataTest method testGenericObject.

@Test
public void testGenericObject() throws Exception {
    GenericTypeInfo<MyObj> info = new GenericTypeInfo<>(MyObj.class);
    TypeSerializer<MyObj> genericSerializer = info.createSerializer(new ExecutionConfig());
    RawValueDataSerializer<MyObj> binarySerializer = new RawValueDataSerializer<>(genericSerializer);
    BinaryRowData row = new BinaryRowData(4);
    BinaryRowWriter writer = new BinaryRowWriter(row);
    writer.writeInt(0, 0);
    RawValueData<MyObj> myObj1 = RawValueData.fromObject(new MyObj(0, 1));
    writer.writeRawValue(1, myObj1, binarySerializer);
    RawValueData<MyObj> myObj2 = RawValueData.fromObject(new MyObj(123, 5.0));
    writer.writeRawValue(2, myObj2, binarySerializer);
    RawValueData<MyObj> myObj3 = RawValueData.fromObject(new MyObj(1, 1));
    writer.writeRawValue(3, myObj3, binarySerializer);
    writer.complete();
    assertTestGenericObjectRow(row, genericSerializer);
    // getBytes from var-length memorySegments.
    BinaryRowDataSerializer serializer = new BinaryRowDataSerializer(4);
    MemorySegment[] memorySegments = new MemorySegment[3];
    ArrayList<MemorySegment> memorySegmentList = new ArrayList<>();
    for (int i = 0; i < 3; i++) {
        memorySegments[i] = MemorySegmentFactory.wrap(new byte[64]);
        memorySegmentList.add(memorySegments[i]);
    }
    RandomAccessOutputView out = new RandomAccessOutputView(memorySegments, 64);
    serializer.serializeToPages(row, out);
    BinaryRowData mapRow = serializer.createInstance();
    mapRow = serializer.mapFromPages(mapRow, new RandomAccessInputView(memorySegmentList, 64));
    assertTestGenericObjectRow(mapRow, genericSerializer);
}
Also used : RandomAccessInputView(org.apache.flink.runtime.io.disk.RandomAccessInputView) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) RandomAccessOutputView(org.apache.flink.runtime.io.disk.RandomAccessOutputView) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) MemorySegment(org.apache.flink.core.memory.MemorySegment) RawValueDataSerializer(org.apache.flink.table.runtime.typeutils.RawValueDataSerializer) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) MyObj(org.apache.flink.table.data.util.DataFormatTestUtil.MyObj) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer) Test(org.junit.Test)

Example 3 with RandomAccessInputView

use of org.apache.flink.runtime.io.disk.RandomAccessInputView in project flink by apache.

the class BinaryRowDataTest method testSerStringToKryo.

@Test
public void testSerStringToKryo() throws IOException {
    KryoSerializer<BinaryStringData> serializer = new KryoSerializer<>(BinaryStringData.class, new ExecutionConfig());
    BinaryStringData string = BinaryStringData.fromString("hahahahaha");
    RandomAccessOutputView out = new RandomAccessOutputView(new MemorySegment[] { MemorySegmentFactory.wrap(new byte[1024]) }, 64);
    serializer.serialize(string, out);
    RandomAccessInputView input = new RandomAccessInputView(new ArrayList<>(Collections.singletonList(out.getCurrentSegment())), 64, 64);
    StringData newStr = serializer.deserialize(input);
    assertEquals(string, newStr);
}
Also used : RandomAccessInputView(org.apache.flink.runtime.io.disk.RandomAccessInputView) BinaryStringData(org.apache.flink.table.data.binary.BinaryStringData) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) BinaryStringData(org.apache.flink.table.data.binary.BinaryStringData) RandomAccessOutputView(org.apache.flink.runtime.io.disk.RandomAccessOutputView) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) Test(org.junit.Test)

Example 4 with RandomAccessInputView

use of org.apache.flink.runtime.io.disk.RandomAccessInputView in project flink by apache.

the class BytesHashMapTestBase method testResetAndOutput.

@Test
public void testResetAndOutput() throws Exception {
    final Random rnd = new Random(RANDOM_SEED);
    final int reservedMemSegments = 64;
    int minMemorySize = reservedMemSegments * PAGE_SIZE;
    MemoryManager memoryManager = MemoryManagerBuilder.newBuilder().setMemorySize(minMemorySize).build();
    AbstractBytesHashMap<K> table = createBytesHashMap(memoryManager, minMemorySize, KEY_TYPES, VALUE_TYPES);
    K[] keys = generateRandomKeys(NUM_ENTRIES);
    List<BinaryRowData> expected = new ArrayList<>(NUM_ENTRIES);
    List<BinaryRowData> actualValues = new ArrayList<>(NUM_ENTRIES);
    List<K> actualKeys = new ArrayList<>(NUM_ENTRIES);
    for (int i = 0; i < NUM_ENTRIES; i++) {
        K groupKey = keys[i];
        // look up and insert
        BytesMap.LookupInfo<K, BinaryRowData> lookupInfo = table.lookup(groupKey);
        Assert.assertFalse(lookupInfo.isFound());
        try {
            BinaryRowData entry = table.append(lookupInfo, defaultValue);
            Assert.assertNotNull(entry);
            // mock multiple updates
            for (int j = 0; j < NUM_REWRITES; j++) {
                updateOutputBuffer(entry, rnd);
            }
            expected.add(entry.copy());
        } catch (Exception e) {
            ArrayList<MemorySegment> segments = table.getRecordAreaMemorySegments();
            RandomAccessInputView inView = new RandomAccessInputView(segments, segments.get(0).size());
            K reuseKey = keySerializer.createInstance();
            BinaryRowData reuseValue = valueSerializer.createInstance();
            for (int index = 0; index < table.getNumElements(); index++) {
                reuseKey = keySerializer.mapFromPages(reuseKey, inView);
                reuseValue = valueSerializer.mapFromPages(reuseValue, inView);
                actualKeys.add(keySerializer.copy(reuseKey));
                actualValues.add(reuseValue.copy());
            }
            table.reset();
            // retry
            lookupInfo = table.lookup(groupKey);
            BinaryRowData entry = table.append(lookupInfo, defaultValue);
            Assert.assertNotNull(entry);
            // mock multiple updates
            for (int j = 0; j < NUM_REWRITES; j++) {
                updateOutputBuffer(entry, rnd);
            }
            expected.add(entry.copy());
        }
    }
    KeyValueIterator<K, BinaryRowData> iter = table.getEntryIterator(false);
    while (iter.advanceNext()) {
        actualKeys.add(keySerializer.copy(iter.getKey()));
        actualValues.add(iter.getValue().copy());
    }
    Assert.assertEquals(NUM_ENTRIES, expected.size());
    Assert.assertEquals(NUM_ENTRIES, actualKeys.size());
    Assert.assertEquals(NUM_ENTRIES, actualValues.size());
    Assert.assertEquals(expected, actualValues);
    table.free();
}
Also used : RandomAccessInputView(org.apache.flink.runtime.io.disk.RandomAccessInputView) ArrayList(java.util.ArrayList) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) IOException(java.io.IOException) Random(java.util.Random) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) Test(org.junit.Test)

Example 5 with RandomAccessInputView

use of org.apache.flink.runtime.io.disk.RandomAccessInputView in project flink by apache.

the class BinaryHashBucketArea method findFirstSameBuildRow.

/**
 * For distinct build.
 */
private boolean findFirstSameBuildRow(MemorySegment bucket, int searchHashCode, int bucketInSegmentOffset, BinaryRowData buildRowToInsert) {
    int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
    int countInBucket = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
    int numInBucket = 0;
    RandomAccessInputView view = partition.getBuildStateInputView();
    while (countInBucket != 0) {
        while (numInBucket < countInBucket) {
            final int thisCode = bucket.getInt(posInSegment);
            posInSegment += HASH_CODE_LEN;
            if (thisCode == searchHashCode) {
                final int pointer = bucket.getInt(bucketInSegmentOffset + BUCKET_POINTER_START_OFFSET + (numInBucket * POINTER_LEN));
                numInBucket++;
                try {
                    view.setReadPosition(pointer);
                    BinaryRowData row = table.binaryBuildSideSerializer.mapFromPages(table.reuseBuildRow, view);
                    if (buildRowToInsert.equals(row)) {
                        return true;
                    }
                } catch (IOException e) {
                    throw new RuntimeException("Error deserializing key or value from the hashtable: " + e.getMessage(), e);
                }
            } else {
                numInBucket++;
            }
        }
        // this segment is done. check if there is another chained bucket
        final int forwardPointer = bucket.getInt(bucketInSegmentOffset + HEADER_FORWARD_OFFSET);
        if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
            return false;
        }
        final int overflowSegIndex = forwardPointer >>> table.segmentSizeBits;
        bucket = overflowSegments[overflowSegIndex];
        bucketInSegmentOffset = forwardPointer & table.segmentSizeMask;
        countInBucket = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
        posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
        numInBucket = 0;
    }
    return false;
}
Also used : RandomAccessInputView(org.apache.flink.runtime.io.disk.RandomAccessInputView) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) IOException(java.io.IOException)

Aggregations

RandomAccessInputView (org.apache.flink.runtime.io.disk.RandomAccessInputView)7 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)6 ArrayList (java.util.ArrayList)5 RandomAccessOutputView (org.apache.flink.runtime.io.disk.RandomAccessOutputView)5 Test (org.junit.Test)5 MemorySegment (org.apache.flink.core.memory.MemorySegment)4 BinaryRowWriter (org.apache.flink.table.data.writer.BinaryRowWriter)3 BinaryRowDataSerializer (org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)3 IOException (java.io.IOException)2 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)2 Random (java.util.Random)1 GenericTypeInfo (org.apache.flink.api.java.typeutils.GenericTypeInfo)1 KryoSerializer (org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer)1 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)1 StringData.fromString (org.apache.flink.table.data.StringData.fromString)1 BinaryStringData (org.apache.flink.table.data.binary.BinaryStringData)1 MyObj (org.apache.flink.table.data.util.DataFormatTestUtil.MyObj)1 RawValueDataSerializer (org.apache.flink.table.runtime.typeutils.RawValueDataSerializer)1