use of org.apache.flink.runtime.operators.testutils.UniformIntPairGenerator in project flink by apache.
the class NonReusingHashJoinIteratorITCase method testBuildFirstWithMixedDataTypes.
@Test
public void testBuildFirstWithMixedDataTypes() {
try {
MutableObjectIterator<IntPair> input1 = new UniformIntPairGenerator(500, 40, false);
final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
// collect expected data
final Map<Integer, Collection<TupleIntPairMatch>> expectedMatchesMap = joinIntPairs(collectIntPairData(input1), collectTupleData(input2));
final FlatJoinFunction<IntPair, Tuple2<Integer, String>, Tuple2<Integer, String>> matcher = new TupleIntPairMatchRemovingMatcher(expectedMatchesMap);
final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();
// reset the generators
input1 = new UniformIntPairGenerator(500, 40, false);
generator2.reset();
input2.reset();
// compare with iterator values
NonReusingBuildSecondHashJoinIterator<IntPair, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator = new NonReusingBuildSecondHashJoinIterator<>(input1, input2, this.pairSerializer, this.pairComparator, this.recordSerializer, this.record2Comparator, this.pairRecordPairComparator, this.memoryManager, this.ioManager, this.parentTask, 1.0, false, false, true);
iterator.open();
while (iterator.callWithNextKey(matcher, collector)) ;
iterator.close();
// assert that each expected match was seen
for (Entry<Integer, Collection<TupleIntPairMatch>> entry : expectedMatchesMap.entrySet()) {
if (!entry.getValue().isEmpty()) {
Assert.fail("Collection for key " + entry.getKey() + " is not empty");
}
}
} catch (Exception e) {
e.printStackTrace();
Assert.fail("An exception occurred during the test: " + e.getMessage());
}
}
use of org.apache.flink.runtime.operators.testutils.UniformIntPairGenerator in project flink by apache.
the class ReusingHashJoinIteratorITCase method testBuildFirstWithMixedDataTypes.
@Test
public void testBuildFirstWithMixedDataTypes() {
try {
MutableObjectIterator<IntPair> input1 = new UniformIntPairGenerator(500, 40, false);
final TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
// collect expected data
final Map<Integer, Collection<TupleIntPairMatch>> expectedMatchesMap = joinIntPairs(collectIntPairData(input1), collectTupleData(input2));
final FlatJoinFunction<IntPair, Tuple2<Integer, String>, Tuple2<Integer, String>> matcher = new TupleIntPairMatchRemovingMatcher(expectedMatchesMap);
final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();
// reset the generators
input1 = new UniformIntPairGenerator(500, 40, false);
generator2.reset();
input2.reset();
// compare with iterator values
ReusingBuildSecondHashJoinIterator<IntPair, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator = new ReusingBuildSecondHashJoinIterator<>(input1, input2, this.pairSerializer, this.pairComparator, this.recordSerializer, this.record2Comparator, this.pairRecordPairComparator, this.memoryManager, this.ioManager, this.parentTask, 1.0, false, false, true);
iterator.open();
while (iterator.callWithNextKey(matcher, collector)) ;
iterator.close();
// assert that each expected match was seen
for (Entry<Integer, Collection<TupleIntPairMatch>> entry : expectedMatchesMap.entrySet()) {
if (!entry.getValue().isEmpty()) {
Assert.fail("Collection for key " + entry.getKey() + " is not empty");
}
}
} catch (Exception e) {
e.printStackTrace();
Assert.fail("An exception occurred during the test: " + e.getMessage());
}
}
use of org.apache.flink.runtime.operators.testutils.UniformIntPairGenerator in project flink by apache.
the class FixedLengthRecordSorterTest method testFlushPartialMemoryPage.
@Test
public void testFlushPartialMemoryPage() throws Exception {
// Insert IntPair which would fill 2 memory pages.
final int NUM_RECORDS = 2 * MEMORY_PAGE_SIZE / 8;
final List<MemorySegment> memory = this.memoryManager.allocatePages(new DummyInvokable(), 3);
FixedLengthRecordSorter<IntPair> sorter = newSortBuffer(memory);
UniformIntPairGenerator generator = new UniformIntPairGenerator(Integer.MAX_VALUE, 1, false);
// write the records
IntPair record = new IntPair();
int num = -1;
do {
generator.next(record);
num++;
} while (sorter.write(record) && num < NUM_RECORDS);
FileIOChannel.ID channelID = this.ioManager.createChannelEnumerator().next();
BlockChannelWriter<MemorySegment> blockChannelWriter = this.ioManager.createBlockChannelWriter(channelID);
final List<MemorySegment> writeBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
ChannelWriterOutputView outputView = new ChannelWriterOutputView(blockChannelWriter, writeBuffer, writeBuffer.get(0).size());
sorter.writeToOutput(outputView, 1, NUM_RECORDS - 1);
this.memoryManager.release(outputView.close());
BlockChannelReader<MemorySegment> blockChannelReader = this.ioManager.createBlockChannelReader(channelID);
final List<MemorySegment> readBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
ChannelReaderInputView readerInputView = new ChannelReaderInputView(blockChannelReader, readBuffer, false);
final List<MemorySegment> dataBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
ChannelReaderInputViewIterator<IntPair> iterator = new ChannelReaderInputViewIterator(readerInputView, dataBuffer, this.serializer);
record = iterator.next(record);
int i = 1;
while (record != null) {
Assert.assertEquals(i, record.getKey());
record = iterator.next(record);
i++;
}
Assert.assertEquals(NUM_RECORDS, i);
this.memoryManager.release(dataBuffer);
// release the memory occupied by the buffers
sorter.dispose();
this.memoryManager.release(memory);
}
use of org.apache.flink.runtime.operators.testutils.UniformIntPairGenerator in project flink by apache.
the class HashTablePerformanceComparison method testCompactingHashMapPerformance.
@Test
public void testCompactingHashMapPerformance() {
try {
final int NUM_MEM_PAGES = SIZE * NUM_PAIRS / PAGE_SIZE;
MutableObjectIterator<IntPair> buildInput = new UniformIntPairGenerator(NUM_PAIRS, 1, false);
MutableObjectIterator<IntPair> probeTester = new UniformIntPairGenerator(NUM_PAIRS, 1, false);
MutableObjectIterator<IntPair> updater = new UniformIntPairGenerator(NUM_PAIRS, 1, false);
MutableObjectIterator<IntPair> updateTester = new UniformIntPairGenerator(NUM_PAIRS, 1, false);
long start;
long end;
long first = System.currentTimeMillis();
System.out.println("Creating and filling CompactingHashMap...");
start = System.currentTimeMillis();
AbstractMutableHashTable<IntPair> table = new CompactingHashTable<IntPair>(serializer, comparator, getMemory(NUM_MEM_PAGES, PAGE_SIZE));
table.open();
IntPair target = new IntPair();
while (buildInput.next(target) != null) {
table.insert(target);
}
end = System.currentTimeMillis();
System.out.println("HashMap ready. Time: " + (end - start) + " ms");
System.out.println("Starting first probing run...");
start = System.currentTimeMillis();
AbstractHashTableProber<IntPair, IntPair> prober = table.getProber(comparator, pairComparator);
IntPair temp = new IntPair();
while (probeTester.next(target) != null) {
assertNotNull(prober.getMatchFor(target, temp));
assertEquals(temp.getValue(), target.getValue());
}
end = System.currentTimeMillis();
System.out.println("Probing done. Time: " + (end - start) + " ms");
System.out.println("Starting update...");
start = System.currentTimeMillis();
while (updater.next(target) != null) {
target.setValue(target.getValue() + 1);
table.insertOrReplaceRecord(target);
}
end = System.currentTimeMillis();
System.out.println("Update done. Time: " + (end - start) + " ms");
System.out.println("Starting second probing run...");
start = System.currentTimeMillis();
while (updateTester.next(target) != null) {
assertNotNull(prober.getMatchFor(target, temp));
assertEquals(target.getValue() + 1, temp.getValue());
}
end = System.currentTimeMillis();
System.out.println("Probing done. Time: " + (end - start) + " ms");
table.close();
end = System.currentTimeMillis();
System.out.println("Overall time: " + (end - first) + " ms");
assertEquals("Memory lost", NUM_MEM_PAGES, table.getFreeMemory().size());
} catch (Exception e) {
e.printStackTrace();
fail("Error: " + e.getMessage());
}
}
use of org.apache.flink.runtime.operators.testutils.UniformIntPairGenerator in project flink by apache.
the class HashTableITCase method testFailingHashJoinTooManyRecursionsIntPair.
/*
* This test is basically identical to the "testSpillingHashJoinWithMassiveCollisions" test, only that the number
* of repeated values (causing bucket collisions) are large enough to make sure that their target partition no longer
* fits into memory by itself and needs to be repartitioned in the recursion again.
*/
@Test
public void testFailingHashJoinTooManyRecursionsIntPair() throws IOException {
// the following two values are known to have a hash-code collision on the first recursion
// level.
// we use them to make sure one partition grows over-proportionally large
final int REPEATED_VALUE_1 = 40559;
final int REPEATED_VALUE_2 = 92882;
final int REPEATED_VALUE_COUNT = 3000000;
final int NUM_KEYS = 1000000;
final int BUILD_VALS_PER_KEY = 3;
final int PROBE_VALS_PER_KEY = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key, plus
// 400k pairs with two colliding keys
MutableObjectIterator<IntPair> build1 = new UniformIntPairGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
MutableObjectIterator<IntPair> build2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
MutableObjectIterator<IntPair> build3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
List<MutableObjectIterator<IntPair>> builds = new ArrayList<MutableObjectIterator<IntPair>>();
builds.add(build1);
builds.add(build2);
builds.add(build3);
MutableObjectIterator<IntPair> buildInput = new UnionIterator<IntPair>(builds);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<IntPair> probe1 = new UniformIntPairGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
MutableObjectIterator<IntPair> probe2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
MutableObjectIterator<IntPair> probe3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
List<MutableObjectIterator<IntPair>> probes = new ArrayList<MutableObjectIterator<IntPair>>();
probes.add(probe1);
probes.add(probe2);
probes.add(probe3);
MutableObjectIterator<IntPair> probeInput = new UnionIterator<IntPair>(probes);
// allocate the memory for the HashTable
List<MemorySegment> memSegments;
try {
memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
} catch (MemoryAllocationException maex) {
fail("Memory for the Join could not be provided.");
return;
}
// ----------------------------------------------------------------------------------------
final MutableHashTable<IntPair, IntPair> join = new MutableHashTable<IntPair, IntPair>(this.pairBuildSideAccesssor, this.pairProbeSideAccesssor, this.pairBuildSideComparator, this.pairProbeSideComparator, this.pairComparator, memSegments, ioManager);
join.open(buildInput, probeInput);
final IntPair recordReuse = new IntPair();
try {
while (join.nextRecord()) {
MutableObjectIterator<IntPair> buildSide = join.getBuildSideIterator();
if (buildSide.next(recordReuse) == null) {
fail("No build side values found for a probe key.");
}
while (buildSide.next(recordReuse) != null) ;
}
fail("Hash Join must have failed due to too many recursions.");
} catch (Exception ex) {
// expected
}
join.close();
// ----------------------------------------------------------------------------------------
this.memManager.release(join.getFreedMemory());
}
Aggregations