use of org.apache.flink.util.MutableObjectIterator in project flink by apache.
the class HashTableITCase method testSpillingHashJoinWithTwoRecursionsIntPair.
/*
* This test is basically identical to the "testSpillingHashJoinWithMassiveCollisions" test, only that the number
* of repeated values (causing bucket collisions) are large enough to make sure that their target partition no longer
* fits into memory by itself and needs to be repartitioned in the recursion again.
*/
@Test
public void testSpillingHashJoinWithTwoRecursionsIntPair() throws IOException {
// the following two values are known to have a hash-code collision on the first recursion level.
// we use them to make sure one partition grows over-proportionally large
final int REPEATED_VALUE_1 = 40559;
final int REPEATED_VALUE_2 = 92882;
final int REPEATED_VALUE_COUNT_BUILD = 200000;
final int REPEATED_VALUE_COUNT_PROBE = 5;
final int NUM_KEYS = 1000000;
final int BUILD_VALS_PER_KEY = 3;
final int PROBE_VALS_PER_KEY = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
MutableObjectIterator<IntPair> build1 = new UniformIntPairGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
MutableObjectIterator<IntPair> build2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT_BUILD);
MutableObjectIterator<IntPair> build3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT_BUILD);
List<MutableObjectIterator<IntPair>> builds = new ArrayList<MutableObjectIterator<IntPair>>();
builds.add(build1);
builds.add(build2);
builds.add(build3);
MutableObjectIterator<IntPair> buildInput = new UnionIterator<IntPair>(builds);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<IntPair> probe1 = new UniformIntPairGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
MutableObjectIterator<IntPair> probe2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, 5);
MutableObjectIterator<IntPair> probe3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, 5);
List<MutableObjectIterator<IntPair>> probes = new ArrayList<MutableObjectIterator<IntPair>>();
probes.add(probe1);
probes.add(probe2);
probes.add(probe3);
MutableObjectIterator<IntPair> probeInput = new UnionIterator<IntPair>(probes);
// allocate the memory for the HashTable
List<MemorySegment> memSegments;
try {
memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
} catch (MemoryAllocationException maex) {
fail("Memory for the Join could not be provided.");
return;
}
// create the map for validating the results
HashMap<Integer, Long> map = new HashMap<Integer, Long>(NUM_KEYS);
// ----------------------------------------------------------------------------------------
final MutableHashTable<IntPair, IntPair> join = new MutableHashTable<IntPair, IntPair>(this.pairBuildSideAccesssor, this.pairProbeSideAccesssor, this.pairBuildSideComparator, this.pairProbeSideComparator, this.pairComparator, memSegments, ioManager);
join.open(buildInput, probeInput);
IntPair record;
final IntPair recordReuse = new IntPair();
while (join.nextRecord()) {
int numBuildValues = 0;
final IntPair probeRec = join.getCurrentProbeRecord();
int key = probeRec.getKey();
MutableObjectIterator<IntPair> buildSide = join.getBuildSideIterator();
if ((record = buildSide.next(recordReuse)) != null) {
numBuildValues = 1;
Assert.assertEquals("Probe-side key was different than build-side key.", key, record.getKey());
} else {
fail("No build side values found for a probe key.");
}
while ((record = buildSide.next(recordReuse)) != null) {
numBuildValues++;
Assert.assertEquals("Probe-side key was different than build-side key.", key, record.getKey());
}
Long contained = map.get(key);
if (contained == null) {
contained = Long.valueOf(numBuildValues);
} else {
contained = Long.valueOf(contained.longValue() + numBuildValues);
}
map.put(key, contained);
}
join.close();
Assert.assertEquals("Wrong number of keys", NUM_KEYS, map.size());
for (Map.Entry<Integer, Long> entry : map.entrySet()) {
long val = entry.getValue();
int key = entry.getKey();
Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, (key == REPEATED_VALUE_1 || key == REPEATED_VALUE_2) ? (PROBE_VALS_PER_KEY + REPEATED_VALUE_COUNT_PROBE) * (BUILD_VALS_PER_KEY + REPEATED_VALUE_COUNT_BUILD) : PROBE_VALS_PER_KEY * BUILD_VALS_PER_KEY, val);
}
// ----------------------------------------------------------------------------------------
this.memManager.release(join.getFreedMemory());
}
use of org.apache.flink.util.MutableObjectIterator in project flink by apache.
the class HashTableITCase method testSpillingHashJoinWithTwoRecursions.
/*
* This test is basically identical to the "testSpillingHashJoinWithMassiveCollisions" test, only that the number
* of repeated values (causing bucket collisions) are large enough to make sure that their target partition no longer
* fits into memory by itself and needs to be repartitioned in the recursion again.
*/
@Test
public void testSpillingHashJoinWithTwoRecursions() throws IOException {
// the following two values are known to have a hash-code collision on the first recursion level.
// we use them to make sure one partition grows over-proportionally large
final int REPEATED_VALUE_1 = 40559;
final int REPEATED_VALUE_2 = 92882;
final int REPEATED_VALUE_COUNT_BUILD = 200000;
final int REPEATED_VALUE_COUNT_PROBE = 5;
final int NUM_KEYS = 1000000;
final int BUILD_VALS_PER_KEY = 3;
final int PROBE_VALS_PER_KEY = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
MutableObjectIterator<Record> build1 = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
MutableObjectIterator<Record> build2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT_BUILD);
MutableObjectIterator<Record> build3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT_BUILD);
List<MutableObjectIterator<Record>> builds = new ArrayList<MutableObjectIterator<Record>>();
builds.add(build1);
builds.add(build2);
builds.add(build3);
MutableObjectIterator<Record> buildInput = new UnionIterator<Record>(builds);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<Record> probe1 = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
MutableObjectIterator<Record> probe2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, 5);
MutableObjectIterator<Record> probe3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, 5);
List<MutableObjectIterator<Record>> probes = new ArrayList<MutableObjectIterator<Record>>();
probes.add(probe1);
probes.add(probe2);
probes.add(probe3);
MutableObjectIterator<Record> probeInput = new UnionIterator<Record>(probes);
// allocate the memory for the HashTable
List<MemorySegment> memSegments;
try {
memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
} catch (MemoryAllocationException maex) {
fail("Memory for the Join could not be provided.");
return;
}
// create the map for validating the results
HashMap<Integer, Long> map = new HashMap<Integer, Long>(NUM_KEYS);
// ----------------------------------------------------------------------------------------
final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator, memSegments, ioManager);
join.open(buildInput, probeInput);
Record record;
final Record recordReuse = new Record();
while (join.nextRecord()) {
int numBuildValues = 0;
final Record probeRec = join.getCurrentProbeRecord();
int key = probeRec.getField(0, IntValue.class).getValue();
MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
if ((record = buildSide.next(recordReuse)) != null) {
numBuildValues = 1;
Assert.assertEquals("Probe-side key was different than build-side key.", key, record.getField(0, IntValue.class).getValue());
} else {
fail("No build side values found for a probe key.");
}
while ((record = buildSide.next(recordReuse)) != null) {
numBuildValues++;
Assert.assertEquals("Probe-side key was different than build-side key.", key, record.getField(0, IntValue.class).getValue());
}
Long contained = map.get(key);
if (contained == null) {
contained = Long.valueOf(numBuildValues);
} else {
contained = Long.valueOf(contained.longValue() + numBuildValues);
}
map.put(key, contained);
}
join.close();
Assert.assertEquals("Wrong number of keys", NUM_KEYS, map.size());
for (Map.Entry<Integer, Long> entry : map.entrySet()) {
long val = entry.getValue();
int key = entry.getKey();
Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, (key == REPEATED_VALUE_1 || key == REPEATED_VALUE_2) ? (PROBE_VALS_PER_KEY + REPEATED_VALUE_COUNT_PROBE) * (BUILD_VALS_PER_KEY + REPEATED_VALUE_COUNT_BUILD) : PROBE_VALS_PER_KEY * BUILD_VALS_PER_KEY, val);
}
// ----------------------------------------------------------------------------------------
this.memManager.release(join.getFreedMemory());
}
use of org.apache.flink.util.MutableObjectIterator in project flink by apache.
the class HashTableITCase method testSpillingHashJoinWithMassiveCollisions.
@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
// the following two values are known to have a hash-code collision on the initial level.
// we use them to make sure one partition grows over-proportionally large
final int REPEATED_VALUE_1 = 40559;
final int REPEATED_VALUE_2 = 92882;
final int REPEATED_VALUE_COUNT_BUILD = 200000;
final int REPEATED_VALUE_COUNT_PROBE = 5;
final int NUM_KEYS = 1000000;
final int BUILD_VALS_PER_KEY = 3;
final int PROBE_VALS_PER_KEY = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
MutableObjectIterator<Record> build1 = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
MutableObjectIterator<Record> build2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT_BUILD);
MutableObjectIterator<Record> build3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT_BUILD);
List<MutableObjectIterator<Record>> builds = new ArrayList<MutableObjectIterator<Record>>();
builds.add(build1);
builds.add(build2);
builds.add(build3);
MutableObjectIterator<Record> buildInput = new UnionIterator<Record>(builds);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<Record> probe1 = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
MutableObjectIterator<Record> probe2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, 5);
MutableObjectIterator<Record> probe3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, 5);
List<MutableObjectIterator<Record>> probes = new ArrayList<MutableObjectIterator<Record>>();
probes.add(probe1);
probes.add(probe2);
probes.add(probe3);
MutableObjectIterator<Record> probeInput = new UnionIterator<Record>(probes);
// allocate the memory for the HashTable
List<MemorySegment> memSegments;
try {
memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
} catch (MemoryAllocationException maex) {
fail("Memory for the Join could not be provided.");
return;
}
// create the map for validating the results
HashMap<Integer, Long> map = new HashMap<Integer, Long>(NUM_KEYS);
// ----------------------------------------------------------------------------------------
final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator, memSegments, ioManager);
join.open(buildInput, probeInput);
Record record;
final Record recordReuse = new Record();
while (join.nextRecord()) {
int numBuildValues = 0;
final Record probeRec = join.getCurrentProbeRecord();
int key = probeRec.getField(0, IntValue.class).getValue();
MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
if ((record = buildSide.next(recordReuse)) != null) {
numBuildValues = 1;
Assert.assertEquals("Probe-side key was different than build-side key.", key, record.getField(0, IntValue.class).getValue());
} else {
fail("No build side values found for a probe key.");
}
while ((record = buildSide.next(recordReuse)) != null) {
numBuildValues++;
Assert.assertEquals("Probe-side key was different than build-side key.", key, record.getField(0, IntValue.class).getValue());
}
Long contained = map.get(key);
if (contained == null) {
contained = Long.valueOf(numBuildValues);
} else {
contained = Long.valueOf(contained.longValue() + numBuildValues);
}
map.put(key, contained);
}
join.close();
Assert.assertEquals("Wrong number of keys", NUM_KEYS, map.size());
for (Map.Entry<Integer, Long> entry : map.entrySet()) {
long val = entry.getValue();
int key = entry.getKey();
Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, (key == REPEATED_VALUE_1 || key == REPEATED_VALUE_2) ? (PROBE_VALS_PER_KEY + REPEATED_VALUE_COUNT_PROBE) * (BUILD_VALS_PER_KEY + REPEATED_VALUE_COUNT_BUILD) : PROBE_VALS_PER_KEY * BUILD_VALS_PER_KEY, val);
}
// ----------------------------------------------------------------------------------------
this.memManager.release(join.getFreedMemory());
}
use of org.apache.flink.util.MutableObjectIterator in project flink by apache.
the class CompactingHashTableTest method testHashTableGrowthWithInsert.
// ------------------------------------------------------------------------
// tests
// ------------------------------------------------------------------------
/**
* This has to be duplicated in InPlaceMutableHashTableTest and CompactingHashTableTest
* because of the different constructor calls.
*/
@Test
public void testHashTableGrowthWithInsert() {
try {
final int numElements = 1000000;
List<MemorySegment> memory = getMemory(10000, 32 * 1024);
// we create a hash table that thinks the records are super large. that makes it choose initially
// a lot of memory for the partition buffers, and start with a smaller hash table. that way
// we trigger a hash table growth early.
CompactingHashTable<Tuple2<Long, String>> table = new CompactingHashTable<Tuple2<Long, String>>(tuple2LongStringSerializer, tuple2LongStringComparator, memory, 10000);
table.open();
for (long i = 0; i < numElements; i++) {
table.insert(new Tuple2<Long, String>(i, String.valueOf(i)));
}
// make sure that all elements are contained via the entry iterator
{
BitSet bitSet = new BitSet(numElements);
MutableObjectIterator<Tuple2<Long, String>> iter = table.getEntryIterator();
Tuple2<Long, String> next;
while ((next = iter.next()) != null) {
assertNotNull(next.f0);
assertNotNull(next.f1);
assertEquals(next.f0.longValue(), Long.parseLong(next.f1));
bitSet.set(next.f0.intValue());
}
assertEquals(numElements, bitSet.cardinality());
}
// make sure all entries are contained via the prober
{
CompactingHashTable<Tuple2<Long, String>>.HashTableProber<Long> proper = table.getProber(probeComparator, pairComparator);
for (long i = 0; i < numElements; i++) {
assertNotNull(proper.getMatchFor(i));
assertNull(proper.getMatchFor(i + numElements));
}
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.util.MutableObjectIterator in project flink by apache.
the class ExternalSortLargeRecordsITCase method testSortWithLongAndShortRecordsMixed.
@Test
public void testSortWithLongAndShortRecordsMixed() {
try {
final int NUM_RECORDS = 1000000;
final int LARGE_REC_INTERVAL = 100000;
final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeMaybeLongValue>(SomeMaybeLongValue.class) };
final TupleTypeInfo<Tuple2<Long, SomeMaybeLongValue>> typeInfo = new TupleTypeInfo<Tuple2<Long, SomeMaybeLongValue>>(types);
final TypeSerializer<Tuple2<Long, SomeMaybeLongValue>> serializer = typeInfo.createSerializer(new ExecutionConfig());
final TypeComparator<Tuple2<Long, SomeMaybeLongValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { false }, 0, new ExecutionConfig());
MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>> source = new MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>>() {
private final Random rnd = new Random(145610843608763871L);
private int num = -1;
@Override
public Tuple2<Long, SomeMaybeLongValue> next(Tuple2<Long, SomeMaybeLongValue> reuse) {
return next();
}
@Override
public Tuple2<Long, SomeMaybeLongValue> next() {
if (++num < NUM_RECORDS) {
long val = rnd.nextLong();
return new Tuple2<Long, SomeMaybeLongValue>(val, new SomeMaybeLongValue((int) val, num % LARGE_REC_INTERVAL == 0));
} else {
return null;
}
}
};
@SuppressWarnings("unchecked") Sorter<Tuple2<Long, SomeMaybeLongValue>> sorter = new UnilateralSortMerger<Tuple2<Long, SomeMaybeLongValue>>(this.memoryManager, this.ioManager, source, this.parentTask, new RuntimeSerializerFactory<Tuple2<Long, SomeMaybeLongValue>>(serializer, (Class<Tuple2<Long, SomeMaybeLongValue>>) (Class<?>) Tuple2.class), comparator, 1.0, 1, 128, 0.7f, true, /*use large record handler*/
true);
// check order
MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>> iterator = sorter.getIterator();
Tuple2<Long, SomeMaybeLongValue> val = serializer.createInstance();
long prevKey = Long.MAX_VALUE;
for (int i = 0; i < NUM_RECORDS; i++) {
val = iterator.next(val);
assertTrue("Sort order violated", val.f0 <= prevKey);
assertEquals("Serialization of test data type incorrect", val.f0.intValue(), val.f1.val());
}
assertNull(iterator.next(val));
sorter.close();
testSuccess = true;
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations