Search in sources :

Example 41 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BinaryHashTableTest method testInMemoryMutableHashTable.

@Test
public void testInMemoryMutableHashTable() throws IOException {
    final int numKeys = 100000;
    final int buildValsPerKey = 3;
    final int probeValsPerKey = 10;
    // create a build input that gives 3 million pairs with 3 values sharing the same key
    MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
    // create a probe input that gives 10 million pairs with 10 values sharing a key
    MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
    MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
    // ----------------------------------------------------------------------------------------
    final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 100 * PAGE_SIZE, ioManager);
    int numRecordsInJoinResult = join(table, buildInput, probeInput);
    Assert.assertEquals("Wrong number of records in join result.", numKeys * buildValsPerKey * probeValsPerKey, numRecordsInJoinResult);
    table.close();
    table.free();
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 42 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BinaryHashTableTest method testSparseProbeSpilling.

/*
     * Spills build records, so that probe records are also spilled. But only so
     * few probe records are used that some partitions remain empty.
     */
@Test
public void testSparseProbeSpilling() throws IOException, MemoryAllocationException {
    final int numBuildKeys = 1000000;
    final int numBuildVals = 1;
    final int numProbeKeys = 20;
    final int numProbeVals = 1;
    MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(numBuildKeys, numBuildVals, false);
    MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(128 * PAGE_SIZE).build();
    final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 100 * PAGE_SIZE, ioManager);
    int expectedNumResults = (Math.min(numProbeKeys, numBuildKeys) * numBuildVals) * numProbeVals;
    int numRecordsInJoinResult = join(table, buildInput, new UniformBinaryRowGenerator(numProbeKeys, numProbeVals, true));
    Assert.assertEquals("Wrong number of records in join result.", expectedNumResults, numRecordsInJoinResult);
    table.close();
    table.free();
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 43 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BinaryHashTableTest method testSpillingHashJoinOneRecursionValidity.

@Test
public void testSpillingHashJoinOneRecursionValidity() throws IOException {
    final int numKeys = 1000000;
    final int buildValsPerKey = 3;
    final int probeValsPerKey = 10;
    // create a build input that gives 3 million pairs with 3 values sharing the same key
    MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
    // create a probe input that gives 10 million pairs with 10 values sharing a key
    MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
    // create the map for validating the results
    HashMap<Integer, Long> map = new HashMap<>(numKeys);
    // ----------------------------------------------------------------------------------------
    MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
    final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 100 * PAGE_SIZE, ioManager);
    final BinaryRowData recordReuse = new BinaryRowData(2);
    BinaryRowData buildRow = buildSideSerializer.createInstance();
    while ((buildRow = buildInput.next(buildRow)) != null) {
        table.putBuildRow(buildRow);
    }
    table.endBuild();
    BinaryRowData probeRow = probeSideSerializer.createInstance();
    while ((probeRow = probeInput.next(probeRow)) != null) {
        if (table.tryProbe(probeRow)) {
            testJoin(table, map);
        }
    }
    while (table.nextMatching()) {
        testJoin(table, map);
    }
    table.close();
    Assert.assertEquals("Wrong number of keys", numKeys, map.size());
    for (Map.Entry<Integer, Long> entry : map.entrySet()) {
        long val = entry.getValue();
        int key = entry.getKey();
        Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, probeValsPerKey * buildValsPerKey, val);
    }
    // ----------------------------------------------------------------------------------------
    table.free();
}
Also used : HashMap(java.util.HashMap) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) HashMap(java.util.HashMap) Map(java.util.Map) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 44 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BinaryHashTableTest method testRepeatBuildJoinWithSpill.

@Test
public void testRepeatBuildJoinWithSpill() throws Exception {
    final int numKeys = 30000;
    final int numRows = 300000;
    final int probeValsPerKey = 1;
    MutableObjectIterator<BinaryRowData> buildInput = new MutableObjectIterator<BinaryRowData>() {

        int cnt = 0;

        @Override
        public BinaryRowData next(BinaryRowData reuse) throws IOException {
            return next();
        }

        @Override
        public BinaryRowData next() throws IOException {
            cnt++;
            if (cnt > numRows) {
                return null;
            }
            int value = cnt % numKeys;
            BinaryRowData row = new BinaryRowData(2);
            BinaryRowWriter writer = new BinaryRowWriter(row);
            writer.writeInt(0, value);
            writer.writeInt(1, value);
            writer.complete();
            return row;
        }
    };
    MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(35 * PAGE_SIZE).build();
    MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
    final BinaryHashTable table = new BinaryHashTable(conf, new Object(), buildSideSerializer, probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 35 * PAGE_SIZE, ioManager, 24, 200000, true, HashJoinType.INNER, null, false, new boolean[] { true }, true);
    int numRecordsInJoinResult = join(table, buildInput, probeInput, true);
    Assert.assertTrue("Wrong number of records in join result.", numRecordsInJoinResult < numRows);
    table.close();
    table.free();
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 45 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BinaryHashTableTest method joinWithNextKey.

private int joinWithNextKey(BinaryHashTable table, boolean buildOuterJoin) throws IOException {
    int count = 0;
    final RowIterator<BinaryRowData> buildIterator = table.getBuildSideIterator();
    final RowData probeRow = table.getCurrentProbeRow();
    BinaryRowData buildRow;
    buildRow = buildIterator.advanceNext() ? buildIterator.getRow() : null;
    // get the first build side value
    if (probeRow != null && buildRow != null) {
        count++;
        while (buildIterator.advanceNext()) {
            count++;
        }
    } else {
        if (buildOuterJoin && probeRow == null && buildRow != null) {
            count++;
            while (buildIterator.advanceNext()) {
                count++;
            }
        }
    }
    return count;
}
Also used : RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData)

Aggregations

BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)173 Test (org.junit.Test)81 BinaryRowWriter (org.apache.flink.table.data.writer.BinaryRowWriter)54 RowData (org.apache.flink.table.data.RowData)31 ArrayList (java.util.ArrayList)30 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)22 UniformBinaryRowGenerator (org.apache.flink.table.runtime.util.UniformBinaryRowGenerator)21 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)16 MemorySegment (org.apache.flink.core.memory.MemorySegment)15 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)14 GenericRowData (org.apache.flink.table.data.GenericRowData)13 Random (java.util.Random)12 BinaryRowDataSerializer (org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)12 HashMap (java.util.HashMap)9 RowDataSerializer (org.apache.flink.table.runtime.typeutils.RowDataSerializer)9 Map (java.util.Map)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)7 RandomAccessInputView (org.apache.flink.runtime.io.disk.RandomAccessInputView)6 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)6