use of org.apache.flink.runtime.memory.MemoryManager in project flink by apache.
the class BinaryHashTableTest method testSpillingHashJoinWithMassiveCollisions.
@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
// the following two values are known to have a hash-code collision on the initial level.
// we use them to make sure one partition grows over-proportionally large
final int repeatedValue1 = 40559;
final int repeatedValue2 = 92882;
final int repeatedValueCountBuild = 200000;
final int repeatedValueCountProbe = 5;
final int numKeys = 1000000;
final int buildValsPerKey = 3;
final int probeValsPerKey = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key, plus
// 400k pairs with two colliding keys
MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
builds.add(build1);
builds.add(build2);
builds.add(build3);
MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
probes.add(probe1);
probes.add(probe2);
probes.add(probe3);
MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
// create the map for validating the results
HashMap<Integer, Long> map = new HashMap<>(numKeys);
MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
// ----------------------------------------------------------------------------------------
final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 896 * PAGE_SIZE, ioManager);
final BinaryRowData recordReuse = new BinaryRowData(2);
BinaryRowData buildRow = buildSideSerializer.createInstance();
while ((buildRow = buildInput.next(buildRow)) != null) {
table.putBuildRow(buildRow);
}
table.endBuild();
BinaryRowData probeRow = probeSideSerializer.createInstance();
while ((probeRow = probeInput.next(probeRow)) != null) {
if (table.tryProbe(probeRow)) {
testJoin(table, map);
}
}
while (table.nextMatching()) {
testJoin(table, map);
}
table.close();
Assert.assertEquals("Wrong number of keys", numKeys, map.size());
for (Map.Entry<Integer, Long> entry : map.entrySet()) {
long val = entry.getValue();
int key = entry.getKey();
Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, (key == repeatedValue1 || key == repeatedValue2) ? (probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) : probeValsPerKey * buildValsPerKey, val);
}
// ----------------------------------------------------------------------------------------
table.free();
}
use of org.apache.flink.runtime.memory.MemoryManager in project flink by apache.
the class BinaryHashTableTest method testRepeatBuildJoin.
@Test
public void testRepeatBuildJoin() throws Exception {
final int numKeys = 500;
final int probeValsPerKey = 1;
MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(40 * PAGE_SIZE).build();
MutableObjectIterator<BinaryRowData> buildInput = new MutableObjectIterator<BinaryRowData>() {
int cnt = 0;
@Override
public BinaryRowData next(BinaryRowData reuse) throws IOException {
return next();
}
@Override
public BinaryRowData next() throws IOException {
cnt++;
if (cnt > numKeys) {
return null;
}
BinaryRowData row = new BinaryRowData(2);
BinaryRowWriter writer = new BinaryRowWriter(row);
writer.writeInt(0, 1);
writer.writeInt(1, 1);
writer.complete();
return row;
}
};
MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
final BinaryHashTable table = new BinaryHashTable(conf, new Object(), buildSideSerializer, probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 40 * PAGE_SIZE, ioManager, 24, 200000, true, HashJoinType.INNER, null, false, new boolean[] { true }, true);
int numRecordsInJoinResult = join(table, buildInput, probeInput, true);
Assert.assertEquals("Wrong number of records in join result.", 1, numRecordsInJoinResult);
table.close();
table.free();
}
use of org.apache.flink.runtime.memory.MemoryManager in project flink by apache.
the class BinaryHashTableTest method validateSpillingDuringInsertion.
/*
* This test validates a bug fix against former memory loss in the case where a partition was spilled
* during an insert into the same.
*/
@Test
public void validateSpillingDuringInsertion() throws IOException, MemoryAllocationException {
final int numBuildKeys = 500000;
final int numBuildVals = 1;
final int numProbeKeys = 10;
final int numProbeVals = 1;
MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(numBuildKeys, numBuildVals, false);
MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(85 * PAGE_SIZE).build();
final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 85 * PAGE_SIZE, ioManager);
int expectedNumResults = (Math.min(numProbeKeys, numBuildKeys) * numBuildVals) * numProbeVals;
int numRecordsInJoinResult = join(table, buildInput, new UniformBinaryRowGenerator(numProbeKeys, numProbeVals, true));
Assert.assertEquals("Wrong number of records in join result.", expectedNumResults, numRecordsInJoinResult);
table.close();
table.free();
}
use of org.apache.flink.runtime.memory.MemoryManager in project flink by apache.
the class BinaryHashTableTest method testHashWithBuildSideOuterJoin1.
@Test
public void testHashWithBuildSideOuterJoin1() throws Exception {
final int numKeys = 20000;
final int buildValsPerKey = 1;
final int probeValsPerKey = 1;
// create a build input that gives 40000 pairs with 1 values sharing the same key
MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(2 * numKeys, buildValsPerKey, false);
// create a probe input that gives 20000 pairs with 1 values sharing a key
MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(35 * PAGE_SIZE).build();
// allocate the memory for the HashTable
final BinaryHashTable table = new BinaryHashTable(conf, new Object(), this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 35 * PAGE_SIZE, ioManager, 24, 200000, true, HashJoinType.BUILD_OUTER, null, true, new boolean[] { true }, false);
int numRecordsInJoinResult = join(table, buildInput, probeInput, true);
Assert.assertEquals("Wrong number of records in join result.", 2 * numKeys * buildValsPerKey * probeValsPerKey, numRecordsInJoinResult);
table.close();
table.free();
}
use of org.apache.flink.runtime.memory.MemoryManager in project flink by apache.
the class BytesHashMapTestBase method testSingleKeyMultipleOps.
@Test
public void testSingleKeyMultipleOps() throws Exception {
final int numMemSegments = needNumMemSegments(NUM_ENTRIES, rowLength(RowType.of(VALUE_TYPES)), rowLength(RowType.of(KEY_TYPES)), PAGE_SIZE);
int memorySize = numMemSegments * PAGE_SIZE;
MemoryManager memoryManager = MemoryManagerBuilder.newBuilder().setMemorySize(memorySize).build();
AbstractBytesHashMap<K> table = createBytesHashMap(memoryManager, memorySize, KEY_TYPES, VALUE_TYPES);
final K key = generateRandomKeys(1)[0];
for (int i = 0; i < 3; i++) {
BytesMap.LookupInfo<K, BinaryRowData> lookupInfo = table.lookup(key);
Assert.assertFalse(lookupInfo.isFound());
}
for (int i = 0; i < 3; i++) {
BytesMap.LookupInfo<K, BinaryRowData> lookupInfo = table.lookup(key);
BinaryRowData entry = lookupInfo.getValue();
if (i == 0) {
Assert.assertFalse(lookupInfo.isFound());
entry = table.append(lookupInfo, defaultValue);
} else {
Assert.assertTrue(lookupInfo.isFound());
}
Assert.assertNotNull(entry);
}
table.free();
}
Aggregations