Search in sources :

Example 46 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BinaryHashTableTest method testBucketsNotFulfillSegment.

@Test
public void testBucketsNotFulfillSegment() throws Exception {
    final int numKeys = 10000;
    final int buildValsPerKey = 3;
    final int probeValsPerKey = 10;
    // create a build input that gives 30000 pairs with 3 values sharing the same key
    MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
    // create a probe input that gives 100000 pairs with 10 values sharing a key
    MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
    // allocate the memory for the HashTable
    MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(35 * PAGE_SIZE).build();
    // ----------------------------------------------------------------------------------------
    final BinaryHashTable table = new BinaryHashTable(conf, new Object(), this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 35 * PAGE_SIZE, ioManager, 24, 200000, true, HashJoinType.INNER, null, false, new boolean[] { true }, false);
    // For FLINK-2545, the buckets data may not fulfill it's buffer, for example, the buffer may
    // contains 256 buckets,
    // while hash table only assign 250 bucket on it. The unused buffer bytes may contains
    // arbitrary data, which may
    // influence hash table if forget to skip it. To mock this, put the invalid bucket
    // data(partition=1, inMemory=true, count=-1)
    // at the end of buffer.
    int totalPages = table.getInternalPool().freePages();
    for (int i = 0; i < totalPages; i++) {
        MemorySegment segment = table.getInternalPool().nextSegment();
        int newBucketOffset = segment.size() - 128;
        // initialize the header fields
        segment.put(newBucketOffset, (byte) 0);
        segment.put(newBucketOffset + 1, (byte) 0);
        segment.putShort(newBucketOffset + 2, (short) -1);
        segment.putLong(newBucketOffset + 4, ~0x0L);
        table.returnPage(segment);
    }
    int numRecordsInJoinResult = join(table, buildInput, probeInput);
    Assert.assertEquals("Wrong number of records in join result.", numKeys * buildValsPerKey * probeValsPerKey, numRecordsInJoinResult);
    table.close();
    table.free();
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) MemorySegment(org.apache.flink.core.memory.MemorySegment) Test(org.junit.Test)

Example 47 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BinaryHashTableTest method testSpillingHashJoinOneRecursionPerformance.

@Test
public void testSpillingHashJoinOneRecursionPerformance() throws IOException {
    final int numKeys = 1000000;
    final int buildValsPerKey = 3;
    final int probeValsPerKey = 10;
    // create a build input that gives 3 million pairs with 3 values sharing the same key
    MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
    // create a probe input that gives 10 million pairs with 10 values sharing a key
    MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
    // allocate the memory for the HashTable
    MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(200 * PAGE_SIZE).setPageSize(PAGE_SIZE).build();
    final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 100 * PAGE_SIZE, ioManager);
    // ----------------------------------------------------------------------------------------
    int numRecordsInJoinResult = join(table, buildInput, probeInput);
    Assert.assertEquals("Wrong number of records in join result.", numKeys * buildValsPerKey * probeValsPerKey, numRecordsInJoinResult);
    table.close();
    // ----------------------------------------------------------------------------------------
    table.free();
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 48 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BinaryHashTableTest method testHashWithBuildSideOuterJoin2.

@Test
public void testHashWithBuildSideOuterJoin2() throws Exception {
    final int numKeys = 40000;
    final int buildValsPerKey = 2;
    final int probeValsPerKey = 1;
    // The keys of probe and build sides are overlapped, so there would be none unmatched build
    // elements
    // after probe phase, make sure build side outer join works well in this case.
    // create a build input that gives 80000 pairs with 2 values sharing the same key
    MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
    // create a probe input that gives 40000 pairs with 1 values sharing a key
    MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
    // allocate the memory for the HashTable
    MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(35 * PAGE_SIZE).build();
    final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 33 * PAGE_SIZE, ioManager);
    // ----------------------------------------------------------------------------------------
    int numRecordsInJoinResult = join(table, buildInput, probeInput, true);
    Assert.assertEquals("Wrong number of records in join result.", numKeys * buildValsPerKey * probeValsPerKey, numRecordsInJoinResult);
    table.close();
    table.free();
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 49 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class BinaryHashTableTest method testJoin.

private void testJoin(BinaryHashTable table, HashMap<Integer, Long> map) throws IOException {
    BinaryRowData record;
    int numBuildValues = 0;
    final RowData probeRec = table.getCurrentProbeRow();
    int key = probeRec.getInt(0);
    RowIterator<BinaryRowData> buildSide = table.getBuildSideIterator();
    if (buildSide.advanceNext()) {
        numBuildValues = 1;
        record = buildSide.getRow();
        assertEquals("Probe-side key was different than build-side key.", key, record.getInt(0));
    } else {
        fail("No build side values found for a probe key.");
    }
    while (buildSide.advanceNext()) {
        numBuildValues++;
        record = buildSide.getRow();
        assertEquals("Probe-side key was different than build-side key.", key, record.getInt(0));
    }
    Long contained = map.get(key);
    if (contained == null) {
        contained = (long) numBuildValues;
    } else {
        contained = contained + numBuildValues;
    }
    map.put(key, contained);
}
Also used : RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData)

Example 50 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class LongHashTableTest method testSpillingHashJoinWithMassiveCollisions.

@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
    // the following two values are known to have a hash-code collision on the initial level.
    // we use them to make sure one partition grows over-proportionally large
    final int repeatedValue1 = 40559;
    final int repeatedValue2 = 92882;
    final int repeatedValueCountBuild = 200000;
    final int repeatedValueCountProbe = 5;
    final int numKeys = 1000000;
    final int buildValsPerKey = 3;
    final int probeValsPerKey = 10;
    // create a build input that gives 3 million pairs with 3 values sharing the same key, plus
    // 400k pairs with two colliding keys
    MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
    MutableObjectIterator<BinaryRowData> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
    MutableObjectIterator<BinaryRowData> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
    List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
    builds.add(build1);
    builds.add(build2);
    builds.add(build3);
    MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);
    // create a probe input that gives 10 million pairs with 10 values sharing a key
    MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
    MutableObjectIterator<BinaryRowData> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
    MutableObjectIterator<BinaryRowData> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
    List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
    probes.add(probe1);
    probes.add(probe2);
    probes.add(probe3);
    MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
    // create the map for validating the results
    HashMap<Integer, Long> map = new HashMap<>(numKeys);
    final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);
    BinaryRowData buildRow = buildSideSerializer.createInstance();
    while ((buildRow = buildInput.next(buildRow)) != null) {
        table.putBuildRow(buildRow);
    }
    table.endBuild();
    BinaryRowData probeRow = probeSideSerializer.createInstance();
    while ((probeRow = probeInput.next(probeRow)) != null) {
        if (table.tryProbe(probeRow)) {
            testJoin(table, map);
        }
    }
    while (table.nextMatching()) {
        testJoin(table, map);
    }
    table.close();
    Assert.assertEquals("Wrong number of keys", numKeys, map.size());
    for (Map.Entry<Integer, Long> entry : map.entrySet()) {
        long val = entry.getValue();
        int key = entry.getKey();
        Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, (key == repeatedValue1 || key == repeatedValue2) ? (probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) : probeValsPerKey * buildValsPerKey, val);
    }
    // ----------------------------------------------------------------------------------------
    table.free();
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) UnionIterator(org.apache.flink.runtime.operators.testutils.UnionIterator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) HashMap(java.util.HashMap) Map(java.util.Map) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Aggregations

BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)173 Test (org.junit.Test)81 BinaryRowWriter (org.apache.flink.table.data.writer.BinaryRowWriter)54 RowData (org.apache.flink.table.data.RowData)31 ArrayList (java.util.ArrayList)30 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)22 UniformBinaryRowGenerator (org.apache.flink.table.runtime.util.UniformBinaryRowGenerator)21 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)16 MemorySegment (org.apache.flink.core.memory.MemorySegment)15 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)14 GenericRowData (org.apache.flink.table.data.GenericRowData)13 Random (java.util.Random)12 BinaryRowDataSerializer (org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)12 HashMap (java.util.HashMap)9 RowDataSerializer (org.apache.flink.table.runtime.typeutils.RowDataSerializer)9 Map (java.util.Map)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)7 RandomAccessInputView (org.apache.flink.runtime.io.disk.RandomAccessInputView)6 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)6