use of org.apache.flink.runtime.operators.testutils.UniformRecordGenerator in project flink by apache.
the class HashTableITCase method testSparseProbeSpillingWithOuterJoin.
/*
* Same test as {@link #testSparseProbeSpilling} but using a build-side outer join
* that requires spilled build-side records to be returned and counted.
*/
@Test
public void testSparseProbeSpillingWithOuterJoin() throws IOException, MemoryAllocationException {
final int NUM_BUILD_KEYS = 1000000;
final int NUM_BUILD_VALS = 1;
final int NUM_PROBE_KEYS = 20;
final int NUM_PROBE_VALS = 1;
MutableObjectIterator<Record> buildInput = new UniformRecordGenerator(NUM_BUILD_KEYS, NUM_BUILD_VALS, false);
// allocate the memory for the HashTable
List<MemorySegment> memSegments;
try {
memSegments = this.memManager.allocatePages(MEM_OWNER, 96);
} catch (MemoryAllocationException maex) {
fail("Memory for the Join could not be provided.");
return;
}
final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator, memSegments, ioManager);
join.open(buildInput, new UniformRecordGenerator(NUM_PROBE_KEYS, NUM_PROBE_VALS, true), true);
int expectedNumResults = (Math.max(NUM_PROBE_KEYS, NUM_BUILD_KEYS) * NUM_BUILD_VALS) * NUM_PROBE_VALS;
final Record recordReuse = new Record();
int numRecordsInJoinResult = 0;
while (join.nextRecord()) {
MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
while (buildSide.next(recordReuse) != null) {
numRecordsInJoinResult++;
}
}
Assert.assertEquals("Wrong number of records in join result.", expectedNumResults, numRecordsInJoinResult);
join.close();
this.memManager.release(join.getFreedMemory());
}
use of org.apache.flink.runtime.operators.testutils.UniformRecordGenerator in project flink by apache.
the class HashTableITCase method testInMemoryMutableHashTable.
@Test
public void testInMemoryMutableHashTable() throws IOException {
final int NUM_KEYS = 100000;
final int BUILD_VALS_PER_KEY = 3;
final int PROBE_VALS_PER_KEY = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key
MutableObjectIterator<Record> buildInput = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<Record> probeInput = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
// allocate the memory for the HashTable
List<MemorySegment> memSegments;
try {
memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
} catch (MemoryAllocationException maex) {
fail("Memory for the Join could not be provided.");
return;
}
// ----------------------------------------------------------------------------------------
final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator, memSegments, ioManager);
join.open(buildInput, probeInput);
final Record recordReuse = new Record();
int numRecordsInJoinResult = 0;
while (join.nextRecord()) {
MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
while (buildSide.next(recordReuse) != null) {
numRecordsInJoinResult++;
}
}
Assert.assertEquals("Wrong number of records in join result.", NUM_KEYS * BUILD_VALS_PER_KEY * PROBE_VALS_PER_KEY, numRecordsInJoinResult);
join.close();
// ----------------------------------------------------------------------------------------
this.memManager.release(join.getFreedMemory());
}
use of org.apache.flink.runtime.operators.testutils.UniformRecordGenerator in project flink by apache.
the class HashTableITCase method testFailingHashJoinTooManyRecursions.
/*
* This test is basically identical to the "testSpillingHashJoinWithMassiveCollisions" test, only that the number
* of repeated values (causing bucket collisions) are large enough to make sure that their target partition no longer
* fits into memory by itself and needs to be repartitioned in the recursion again.
*/
@Test
public void testFailingHashJoinTooManyRecursions() throws IOException {
// the following two values are known to have a hash-code collision on the first recursion
// level.
// we use them to make sure one partition grows over-proportionally large
final int REPEATED_VALUE_1 = 40559;
final int REPEATED_VALUE_2 = 92882;
final int REPEATED_VALUE_COUNT = 3000000;
final int NUM_KEYS = 1000000;
final int BUILD_VALS_PER_KEY = 3;
final int PROBE_VALS_PER_KEY = 10;
// create a build input that gives 3 million pairs with 3 values sharing the same key, plus
// 400k pairs with two colliding keys
MutableObjectIterator<Record> build1 = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
MutableObjectIterator<Record> build2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
MutableObjectIterator<Record> build3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
List<MutableObjectIterator<Record>> builds = new ArrayList<MutableObjectIterator<Record>>();
builds.add(build1);
builds.add(build2);
builds.add(build3);
MutableObjectIterator<Record> buildInput = new UnionIterator<Record>(builds);
// create a probe input that gives 10 million pairs with 10 values sharing a key
MutableObjectIterator<Record> probe1 = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
MutableObjectIterator<Record> probe2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
MutableObjectIterator<Record> probe3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
List<MutableObjectIterator<Record>> probes = new ArrayList<MutableObjectIterator<Record>>();
probes.add(probe1);
probes.add(probe2);
probes.add(probe3);
MutableObjectIterator<Record> probeInput = new UnionIterator<Record>(probes);
// allocate the memory for the HashTable
List<MemorySegment> memSegments;
try {
memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
} catch (MemoryAllocationException maex) {
fail("Memory for the Join could not be provided.");
return;
}
// ----------------------------------------------------------------------------------------
final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator, memSegments, ioManager);
join.open(buildInput, probeInput);
final Record recordReuse = new Record();
try {
while (join.nextRecord()) {
MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
if (buildSide.next(recordReuse) == null) {
fail("No build side values found for a probe key.");
}
while (buildSide.next(recordReuse) != null) ;
}
fail("Hash Join must have failed due to too many recursions.");
} catch (Exception ex) {
// expected
}
join.close();
// ----------------------------------------------------------------------------------------
this.memManager.release(join.getFreedMemory());
}
use of org.apache.flink.runtime.operators.testutils.UniformRecordGenerator in project flink by apache.
the class ChainedAllReduceDriverTest method testMapTask.
@Test
public void testMapTask() throws Exception {
final int keyCnt = 100;
final int valCnt = 20;
final double memoryFraction = 1.0;
// environment
initEnvironment(MEMORY_MANAGER_SIZE, NETWORK_BUFFER_SIZE);
mockEnv.getExecutionConfig().enableObjectReuse();
addInput(new UniformRecordGenerator(keyCnt, valCnt, false), 0);
addOutput(this.outList);
// chained reduce config
{
final TaskConfig reduceConfig = new TaskConfig(new Configuration());
// input
reduceConfig.addInputToGroup(0);
reduceConfig.setInputSerializer(serFact, 0);
// output
reduceConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
reduceConfig.setOutputSerializer(serFact);
// driver
reduceConfig.setDriverStrategy(DriverStrategy.ALL_REDUCE);
reduceConfig.setDriverComparator(compFact, 0);
reduceConfig.setDriverComparator(compFact, 1);
reduceConfig.setRelativeMemoryDriver(memoryFraction);
// udf
reduceConfig.setStubWrapper(new UserCodeClassWrapper<>(MockReduceStub.class));
getTaskConfig().addChainedTask(ChainedAllReduceDriver.class, reduceConfig, "reduce");
}
// chained map+reduce
{
registerTask(FlatMapDriver.class, MockMapStub.class);
BatchTask<FlatMapFunction<Record, Record>, Record> testTask = new BatchTask<>(mockEnv);
testTask.invoke();
}
int sumTotal = valCnt * keyCnt * (keyCnt - 1) / 2;
Assert.assertEquals(1, this.outList.size());
Assert.assertEquals(sumTotal, this.outList.get(0).getField(0, IntValue.class).getValue());
}
use of org.apache.flink.runtime.operators.testutils.UniformRecordGenerator in project flink by apache.
the class JoinTaskTest method testFailingMatchTask.
@Test
public void testFailingMatchTask() {
int keyCnt1 = 20;
int valCnt1 = 20;
int keyCnt2 = 20;
int valCnt2 = 20;
setOutput(new NirvanaOutputList());
addDriverComparator(this.comparator1);
addDriverComparator(this.comparator2);
getTaskConfig().setDriverPairComparator(RecordPairComparatorFactory.get());
getTaskConfig().setDriverStrategy(DriverStrategy.INNER_MERGE);
getTaskConfig().setRelativeMemoryDriver(bnljn_frac);
setNumFileHandlesForSort(4);
final JoinDriver<Record, Record, Record> testTask = new JoinDriver<>();
addInput(new UniformRecordGenerator(keyCnt1, valCnt1, true));
addInput(new UniformRecordGenerator(keyCnt2, valCnt2, true));
try {
testDriver(testTask, MockFailingMatchStub.class);
Assert.fail("Driver did not forward Exception.");
} catch (ExpectedTestException e) {
// good!
} catch (Exception e) {
e.printStackTrace();
Assert.fail("The test caused an exception.");
}
}
Aggregations