use of org.apache.flink.util.MutableObjectIterator in project flink by apache.
the class BinaryHashTableTest method testFailingHashJoinTooManyRecursions.
/*
* This test is basically identical to the "testSpillingHashJoinWithMassiveCollisions" test, only that the number
* of repeated values (causing bucket collisions) are large enough to make sure that their target partition no longer
* fits into memory by itself and needs to be repartitioned in the recursion again.
*/
@Test
public void testFailingHashJoinTooManyRecursions() throws IOException {
// the following two values are known to have a hash-code collision on the first recursion
// level.
// we use them to make sure one partition grows over-proportionally large
final int repeatedValue1 = 40559;
final int repeatedValue2 = 92882;
final int repeatedValueCount = 3000000;
final int numKeys = 1000000;
final int buildValsPerKey = 3;
final int probeValsPerKey = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key, plus
// 400k pairs with two colliding keys
MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
builds.add(build1);
builds.add(build2);
builds.add(build3);
MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
probes.add(probe1);
probes.add(probe2);
probes.add(probe3);
MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
// ----------------------------------------------------------------------------------------
MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 896 * PAGE_SIZE, ioManager);
try {
join(table, buildInput, probeInput);
fail("Hash Join must have failed due to too many recursions.");
} catch (Exception ex) {
// expected
}
table.close();
// ----------------------------------------------------------------------------------------
table.free();
}
use of org.apache.flink.util.MutableObjectIterator in project flink by apache.
the class BinaryHashTableTest method testSpillingHashJoinWithMassiveCollisions.
@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
// the following two values are known to have a hash-code collision on the initial level.
// we use them to make sure one partition grows over-proportionally large
final int repeatedValue1 = 40559;
final int repeatedValue2 = 92882;
final int repeatedValueCountBuild = 200000;
final int repeatedValueCountProbe = 5;
final int numKeys = 1000000;
final int buildValsPerKey = 3;
final int probeValsPerKey = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key, plus
// 400k pairs with two colliding keys
MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
builds.add(build1);
builds.add(build2);
builds.add(build3);
MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
probes.add(probe1);
probes.add(probe2);
probes.add(probe3);
MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
// create the map for validating the results
HashMap<Integer, Long> map = new HashMap<>(numKeys);
MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
// ----------------------------------------------------------------------------------------
final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 896 * PAGE_SIZE, ioManager);
final BinaryRowData recordReuse = new BinaryRowData(2);
BinaryRowData buildRow = buildSideSerializer.createInstance();
while ((buildRow = buildInput.next(buildRow)) != null) {
table.putBuildRow(buildRow);
}
table.endBuild();
BinaryRowData probeRow = probeSideSerializer.createInstance();
while ((probeRow = probeInput.next(probeRow)) != null) {
if (table.tryProbe(probeRow)) {
testJoin(table, map);
}
}
while (table.nextMatching()) {
testJoin(table, map);
}
table.close();
Assert.assertEquals("Wrong number of keys", numKeys, map.size());
for (Map.Entry<Integer, Long> entry : map.entrySet()) {
long val = entry.getValue();
int key = entry.getKey();
Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, (key == repeatedValue1 || key == repeatedValue2) ? (probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) : probeValsPerKey * buildValsPerKey, val);
}
// ----------------------------------------------------------------------------------------
table.free();
}
use of org.apache.flink.util.MutableObjectIterator in project flink by apache.
the class BinaryHashTableTest method testRepeatBuildJoin.
@Test
public void testRepeatBuildJoin() throws Exception {
final int numKeys = 500;
final int probeValsPerKey = 1;
MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(40 * PAGE_SIZE).build();
MutableObjectIterator<BinaryRowData> buildInput = new MutableObjectIterator<BinaryRowData>() {
int cnt = 0;
@Override
public BinaryRowData next(BinaryRowData reuse) throws IOException {
return next();
}
@Override
public BinaryRowData next() throws IOException {
cnt++;
if (cnt > numKeys) {
return null;
}
BinaryRowData row = new BinaryRowData(2);
BinaryRowWriter writer = new BinaryRowWriter(row);
writer.writeInt(0, 1);
writer.writeInt(1, 1);
writer.complete();
return row;
}
};
MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
final BinaryHashTable table = new BinaryHashTable(conf, new Object(), buildSideSerializer, probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 40 * PAGE_SIZE, ioManager, 24, 200000, true, HashJoinType.INNER, null, false, new boolean[] { true }, true);
int numRecordsInJoinResult = join(table, buildInput, probeInput, true);
Assert.assertEquals("Wrong number of records in join result.", 1, numRecordsInJoinResult);
table.close();
table.free();
}
use of org.apache.flink.util.MutableObjectIterator in project flink by apache.
the class BinaryExternalSorterTest method testSpillingRandom.
@Test
public void testSpillingRandom() throws Exception {
int size = 1000_000;
MockBinaryRowReader reader = new MockBinaryRowReader(size);
LOG.debug("initializing sortmerger");
long minMemorySize = memoryManager.computeNumberOfPages(0.1) * MemoryManager.DEFAULT_PAGE_SIZE;
BinaryExternalSorter sorter = new BinaryExternalSorter(new Object(), this.memoryManager, minMemorySize, this.ioManager, (AbstractRowDataSerializer) serializer, serializer, IntNormalizedKeyComputer.INSTANCE, IntRecordComparator.INSTANCE, conf, 0.7f);
sorter.startThreads();
List<BinaryRowData> data = new ArrayList<>();
BinaryRowData row = serializer.createInstance();
for (int i = 0; i < size; i++) {
row = reader.next(row);
data.add(row.copy());
}
Collections.shuffle(data);
for (int i = 0; i < size; i++) {
sorter.write(data.get(i));
}
MutableObjectIterator<BinaryRowData> iterator = sorter.getIterator();
data.sort(Comparator.comparingInt(o -> o.getInt(0)));
BinaryRowData next = serializer.createInstance();
for (int i = 0; i < size; i++) {
next = iterator.next(next);
Assert.assertEquals(data.get(i).getInt(0), next.getInt(0));
Assert.assertEquals(data.get(i).getString(1), next.getString(1));
}
sorter.close();
}
use of org.apache.flink.util.MutableObjectIterator in project flink by apache.
the class BinaryMergeIteratorTest method testOneStream.
@Test
public void testOneStream() throws Exception {
List<MutableObjectIterator<BinaryRowData>> iterators = new ArrayList<>();
iterators.add(newIterator(new int[] { 1, 2, 4, 5, 10 }, new String[] { "1", "2", "4", "5", "10" }));
final int[] expected = new int[] { 1, 2, 4, 5, 10 };
MutableObjectIterator<BinaryRowData> iterator = new BinaryMergeIterator<>(iterators, Collections.singletonList(serializer.createInstance()), (o1, o2) -> this.comparator.compare(o1, o2));
BinaryRowData row = serializer.createInstance();
int pos = 0;
while ((row = iterator.next(row)) != null) {
Assert.assertEquals(expected[pos++], row.getInt(0));
}
}
Aggregations