use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.
the class LongHashTableTest method join.
private int join(MyHashTable table, MutableObjectIterator<BinaryRowData> buildInput, MutableObjectIterator<BinaryRowData> probeInput) throws IOException {
int count = 0;
BinaryRowData reuseBuildSizeRow = buildSideSerializer.createInstance();
BinaryRowData buildRow;
while ((buildRow = buildInput.next(reuseBuildSizeRow)) != null) {
table.putBuildRow(buildRow);
}
table.endBuild();
BinaryRowData probeRow = probeSideSerializer.createInstance();
while ((probeRow = probeInput.next(probeRow)) != null) {
if (table.tryProbe(probeRow)) {
count += joinWithNextKey(table);
}
}
while (table.nextMatching()) {
count += joinWithNextKey(table);
}
return count;
}
use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.
the class LongHashTableTest method testSpillingHashJoinOneRecursionValidity.
@Test
public void testSpillingHashJoinOneRecursionValidity() throws IOException {
final int numKeys = 1000000;
final int buildValsPerKey = 3;
final int probeValsPerKey = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key
MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
// create the map for validating the results
HashMap<Integer, Long> map = new HashMap<>(numKeys);
// ----------------------------------------------------------------------------------------
final MyHashTable table = new MyHashTable(100 * PAGE_SIZE);
BinaryRowData buildRow = buildSideSerializer.createInstance();
while ((buildRow = buildInput.next(buildRow)) != null) {
table.putBuildRow(buildRow);
}
table.endBuild();
BinaryRowData probeRow = probeSideSerializer.createInstance();
while ((probeRow = probeInput.next(probeRow)) != null) {
if (table.tryProbe(probeRow)) {
testJoin(table, map);
}
}
while (table.nextMatching()) {
testJoin(table, map);
}
table.close();
Assert.assertEquals("Wrong number of keys", numKeys, map.size());
for (Map.Entry<Integer, Long> entry : map.entrySet()) {
long val = entry.getValue();
int key = entry.getKey();
Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, probeValsPerKey * buildValsPerKey, val);
}
// ----------------------------------------------------------------------------------------
table.free();
}
use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.
the class LongHashTableTest method testJoin.
private void testJoin(MyHashTable table, HashMap<Integer, Long> map) throws IOException {
BinaryRowData record;
int numBuildValues = 0;
final RowData probeRec = table.getCurrentProbeRow();
int key = probeRec.getInt(0);
RowIterator<BinaryRowData> buildSide = table.getBuildSideIterator();
if (buildSide.advanceNext()) {
numBuildValues = 1;
record = buildSide.getRow();
assertEquals("Probe-side key was different than build-side key.", key, record.getInt(0));
} else {
fail("No build side values found for a probe key.");
}
while (buildSide.advanceNext()) {
numBuildValues++;
record = buildSide.getRow();
assertEquals("Probe-side key was different than build-side key.", key, record.getInt(0));
}
Long contained = map.get(key);
if (contained == null) {
contained = (long) numBuildValues;
} else {
contained = contained + numBuildValues;
}
map.put(key, contained);
}
use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.
the class LongHashTableTest method testSparseProbeSpilling.
@Test
public void testSparseProbeSpilling() throws IOException, MemoryAllocationException {
final int numBuildKeys = 1000000;
final int numBuildVals = 1;
final int numProbeKeys = 20;
final int numProbeVals = 1;
MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(numBuildKeys, numBuildVals, false);
final MyHashTable table = new MyHashTable(100 * PAGE_SIZE);
int expectedNumResults = (Math.min(numProbeKeys, numBuildKeys) * numBuildVals) * numProbeVals;
int numRecordsInJoinResult = join(table, buildInput, new UniformBinaryRowGenerator(numProbeKeys, numProbeVals, true));
Assert.assertEquals("Wrong number of records in join result.", expectedNumResults, numRecordsInJoinResult);
table.close();
table.free();
}
use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.
the class LongHashTableTest method testSpillingHashJoinWithTwoRecursions.
@Test
public void testSpillingHashJoinWithTwoRecursions() throws IOException {
// the following two values are known to have a hash-code collision on the first recursion
// level.
// we use them to make sure one partition grows over-proportionally large
final int repeatedValue1 = 40559;
final int repeatedValue2 = 92882;
final int repeatedValueCountBuild = 200000;
final int repeatedValueCountProbe = 5;
final int numKeys = 1000000;
final int buildValsPerKey = 3;
final int probeValsPerKey = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key, plus
// 400k pairs with two colliding keys
MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
MutableObjectIterator<BinaryRowData> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
MutableObjectIterator<BinaryRowData> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
builds.add(build1);
builds.add(build2);
builds.add(build3);
MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
MutableObjectIterator<BinaryRowData> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
MutableObjectIterator<BinaryRowData> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
probes.add(probe1);
probes.add(probe2);
probes.add(probe3);
MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
// create the map for validating the results
HashMap<Integer, Long> map = new HashMap<>(numKeys);
final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);
BinaryRowData buildRow = buildSideSerializer.createInstance();
while ((buildRow = buildInput.next(buildRow)) != null) {
table.putBuildRow(buildRow);
}
table.endBuild();
BinaryRowData probeRow = probeSideSerializer.createInstance();
while ((probeRow = probeInput.next(probeRow)) != null) {
if (table.tryProbe(probeRow)) {
testJoin(table, map);
}
}
while (table.nextMatching()) {
testJoin(table, map);
}
table.close();
Assert.assertEquals("Wrong number of keys", numKeys, map.size());
for (Map.Entry<Integer, Long> entry : map.entrySet()) {
long val = entry.getValue();
int key = entry.getKey();
Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, (key == repeatedValue1 || key == repeatedValue2) ? (probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) : probeValsPerKey * buildValsPerKey, val);
}
// ----------------------------------------------------------------------------------------
table.free();
}
Aggregations