Search in sources :

Example 66 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BinaryRowDataTest method testRawValueData.

@Test
public void testRawValueData() {
    BinaryRowData row = new BinaryRowData(3);
    BinaryRowWriter writer = new BinaryRowWriter(row);
    RawValueDataSerializer<String> binarySerializer = new RawValueDataSerializer<>(StringSerializer.INSTANCE);
    RawValueData<String> hahah = RawValueData.fromObject("hahah");
    writer.writeRawValue(0, hahah, binarySerializer);
    writer.setNullAt(1);
    writer.writeRawValue(2, hahah, binarySerializer);
    writer.complete();
    RawValueData<String> generic0 = row.getRawValue(0);
    assertThat(generic0, equivalent(hahah, binarySerializer));
    assertTrue(row.isNullAt(1));
    RawValueData<String> generic2 = row.getRawValue(2);
    assertThat(generic2, equivalent(hahah, binarySerializer));
}
Also used : RawValueDataSerializer(org.apache.flink.table.runtime.typeutils.RawValueDataSerializer) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) StringData.fromString(org.apache.flink.table.data.StringData.fromString) Test(org.junit.Test)

Example 67 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BinaryHashTableTest method testSpillingHashJoinWithTwoRecursions.

/*
     * This test is basically identical to the "testSpillingHashJoinWithMassiveCollisions" test, only that the number
     * of repeated values (causing bucket collisions) are large enough to make sure that their target partition no longer
     * fits into memory by itself and needs to be repartitioned in the recursion again.
     */
@Test
public void testSpillingHashJoinWithTwoRecursions() throws IOException {
    // the following two values are known to have a hash-code collision on the first recursion
    // level.
    // we use them to make sure one partition grows over-proportionally large
    final int repeatedValue1 = 40559;
    final int repeatedValue2 = 92882;
    final int repeatedValueCountBuild = 200000;
    final int repeatedValueCountProbe = 5;
    final int numKeys = 1000000;
    final int buildValsPerKey = 3;
    final int probeValsPerKey = 10;
    // create a build input that gives 3 million pairs with 3 values sharing the same key, plus
    // 400k pairs with two colliding keys
    MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
    MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
    MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
    List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
    builds.add(build1);
    builds.add(build2);
    builds.add(build3);
    MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);
    // create a probe input that gives 10 million pairs with 10 values sharing a key
    MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
    MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
    MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
    List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
    probes.add(probe1);
    probes.add(probe2);
    probes.add(probe3);
    MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
    // create the map for validating the results
    HashMap<Integer, Long> map = new HashMap<>(numKeys);
    // ----------------------------------------------------------------------------------------
    MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
    final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 896 * PAGE_SIZE, ioManager);
    final BinaryRowData recordReuse = new BinaryRowData(2);
    BinaryRowData buildRow = buildSideSerializer.createInstance();
    while ((buildRow = buildInput.next(buildRow)) != null) {
        table.putBuildRow(buildRow);
    }
    table.endBuild();
    BinaryRowData probeRow = probeSideSerializer.createInstance();
    while ((probeRow = probeInput.next(probeRow)) != null) {
        if (table.tryProbe(probeRow)) {
            testJoin(table, map);
        }
    }
    while (table.nextMatching()) {
        testJoin(table, map);
    }
    table.close();
    Assert.assertEquals("Wrong number of keys", numKeys, map.size());
    for (Map.Entry<Integer, Long> entry : map.entrySet()) {
        long val = entry.getValue();
        int key = entry.getKey();
        Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, (key == repeatedValue1 || key == repeatedValue2) ? (probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) : probeValsPerKey * buildValsPerKey, val);
    }
    // ----------------------------------------------------------------------------------------
    table.free();
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) UnionIterator(org.apache.flink.runtime.operators.testutils.UnionIterator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) HashMap(java.util.HashMap) Map(java.util.Map) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 68 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class SumHashAggTestOperator method processElement.

@Override
public void processElement(StreamRecord<RowData> element) throws Exception {
    RowData in1 = element.getValue();
    // project key from input
    currentKeyWriter.reset();
    if (in1.isNullAt(0)) {
        currentKeyWriter.setNullAt(0);
    } else {
        currentKeyWriter.writeInt(0, in1.getInt(0));
    }
    currentKeyWriter.complete();
    // look up output buffer using current group key
    BytesMap.LookupInfo<BinaryRowData, BinaryRowData> lookupInfo = aggregateMap.lookup(currentKey);
    BinaryRowData currentAggBuffer = lookupInfo.getValue();
    if (!lookupInfo.isFound()) {
        // append empty agg buffer into aggregate map for current group key
        try {
            currentAggBuffer = aggregateMap.append(lookupInfo, emptyAggBuffer);
        } catch (EOFException exp) {
            // hash map out of memory, spill to external sorter
            if (sorter == null) {
                sorter = new BufferedKVExternalSorter(getIOManager(), new BinaryRowDataSerializer(keyTypes.length), new BinaryRowDataSerializer(aggBufferTypes.length), new IntNormalizedKeyComputer(), new IntRecordComparator(), getMemoryManager().getPageSize(), getConf());
            }
            // sort and spill
            sorter.sortAndSpill(aggregateMap.getRecordAreaMemorySegments(), aggregateMap.getNumElements(), new BytesHashMapSpillMemorySegmentPool(aggregateMap.getBucketAreaMemorySegments()));
            // retry append
            // reset aggregate map retry append
            aggregateMap.reset();
            lookupInfo = aggregateMap.lookup(currentKey);
            try {
                currentAggBuffer = aggregateMap.append(lookupInfo, emptyAggBuffer);
            } catch (EOFException e) {
                throw new OutOfMemoryError("BytesHashMap Out of Memory.");
            }
        }
    }
    if (!in1.isNullAt(1)) {
        long sumInput = in1.getLong(1);
        if (currentAggBuffer.isNullAt(0)) {
            currentAggBuffer.setLong(0, sumInput);
        } else {
            currentAggBuffer.setLong(0, sumInput + currentAggBuffer.getLong(0));
        }
    }
}
Also used : IntRecordComparator(org.apache.flink.table.runtime.operators.sort.IntRecordComparator) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) IntNormalizedKeyComputer(org.apache.flink.table.runtime.operators.sort.IntNormalizedKeyComputer) BytesMap(org.apache.flink.table.runtime.util.collections.binary.BytesMap) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) EOFException(java.io.EOFException) BufferedKVExternalSorter(org.apache.flink.table.runtime.operators.sort.BufferedKVExternalSorter) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Example 69 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class NestedRowDataTest method getBinaryRowData.

private BinaryRowData getBinaryRowData() {
    BinaryRowData row = new BinaryRowData(1);
    BinaryRowWriter writer = new BinaryRowWriter(row);
    GenericTypeInfo<MyObj> info = new GenericTypeInfo<>(MyObj.class);
    TypeSerializer<MyObj> genericSerializer = info.createSerializer(new ExecutionConfig());
    GenericRowData gRow = new GenericRowData(5);
    gRow.setField(0, 1);
    gRow.setField(1, 5L);
    gRow.setField(2, StringData.fromString("12345678"));
    gRow.setField(3, null);
    gRow.setField(4, RawValueData.fromObject(new MyObj(15, 5)));
    RowDataSerializer serializer = new RowDataSerializer(new LogicalType[] { DataTypes.INT().getLogicalType(), DataTypes.BIGINT().getLogicalType(), DataTypes.STRING().getLogicalType(), DataTypes.STRING().getLogicalType(), DataTypes.RAW(info.getTypeClass(), info.createSerializer(new ExecutionConfig())).getLogicalType() }, new TypeSerializer[] { IntSerializer.INSTANCE, LongSerializer.INSTANCE, StringDataSerializer.INSTANCE, StringDataSerializer.INSTANCE, new RawValueDataSerializer<>(genericSerializer) });
    writer.writeRow(0, gRow, serializer);
    writer.complete();
    return row;
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MyObj(org.apache.flink.table.data.util.DataFormatTestUtil.MyObj) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer)

Example 70 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class NestedRowDataTest method testNestInNestedRowData.

@Test
public void testNestInNestedRowData() {
    // layer1
    GenericRowData gRow = new GenericRowData(4);
    gRow.setField(0, 1);
    gRow.setField(1, 5L);
    gRow.setField(2, StringData.fromString("12345678"));
    gRow.setField(3, null);
    // layer2
    RowDataSerializer serializer = new RowDataSerializer(new LogicalType[] { DataTypes.INT().getLogicalType(), DataTypes.BIGINT().getLogicalType(), DataTypes.STRING().getLogicalType(), DataTypes.STRING().getLogicalType() }, new TypeSerializer[] { IntSerializer.INSTANCE, LongSerializer.INSTANCE, StringSerializer.INSTANCE, StringSerializer.INSTANCE });
    BinaryRowData row = new BinaryRowData(2);
    BinaryRowWriter writer = new BinaryRowWriter(row);
    writer.writeString(0, StringData.fromString("hahahahafff"));
    writer.writeRow(1, gRow, serializer);
    writer.complete();
    // layer3
    BinaryRowData row2 = new BinaryRowData(1);
    BinaryRowWriter writer2 = new BinaryRowWriter(row2);
    writer2.writeRow(0, row, null);
    writer2.complete();
    // verify
    {
        NestedRowData nestedRow = (NestedRowData) row2.getRow(0, 2);
        BinaryRowData binaryRow = new BinaryRowData(2);
        binaryRow.pointTo(nestedRow.getSegments(), nestedRow.getOffset(), nestedRow.getSizeInBytes());
        assertEquals(binaryRow, row);
    }
    assertEquals(row2.getRow(0, 2).getString(0), StringData.fromString("hahahahafff"));
    RowData nestedRow = row2.getRow(0, 2).getRow(1, 4);
    assertEquals(nestedRow.getInt(0), 1);
    assertEquals(nestedRow.getLong(1), 5L);
    assertEquals(nestedRow.getString(2), StringData.fromString("12345678"));
    assertTrue(nestedRow.isNullAt(3));
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) NestedRowData(org.apache.flink.table.data.binary.NestedRowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) NestedRowData(org.apache.flink.table.data.binary.NestedRowData) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer) Test(org.junit.Test)

Aggregations

BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)173 Test (org.junit.Test)81 BinaryRowWriter (org.apache.flink.table.data.writer.BinaryRowWriter)54 RowData (org.apache.flink.table.data.RowData)31 ArrayList (java.util.ArrayList)30 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)22 UniformBinaryRowGenerator (org.apache.flink.table.runtime.util.UniformBinaryRowGenerator)21 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)16 MemorySegment (org.apache.flink.core.memory.MemorySegment)15 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)14 GenericRowData (org.apache.flink.table.data.GenericRowData)13 Random (java.util.Random)12 BinaryRowDataSerializer (org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)12 HashMap (java.util.HashMap)9 RowDataSerializer (org.apache.flink.table.runtime.typeutils.RowDataSerializer)9 Map (java.util.Map)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)7 RandomAccessInputView (org.apache.flink.runtime.io.disk.RandomAccessInputView)6 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)6