use of org.apache.flink.streaming.api.operators.StreamOperator in project flink by apache.
the class RandomSortMergeInnerJoinTest method testMergeWithHighNumberOfCommonKeys.
@Test
public void testMergeWithHighNumberOfCommonKeys() {
// the size of the left and right inputs
final int input1Size = 200;
final int input2Size = 100;
final int input1Duplicates = 10;
final int input2Duplicates = 4000;
final int duplicateKey = 13;
try {
final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, input1Size);
final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, input2Size);
final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(duplicateKey, "LEFT String for Duplicate Keys", input1Duplicates);
final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(duplicateKey, "RIGHT String for Duplicate Keys", input2Duplicates);
final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
inList1.add(gen1Iter);
inList1.add(const1Iter);
final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
inList2.add(gen2Iter);
inList2.add(const2Iter);
MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<>(inList1, comparator1.duplicate());
MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<>(inList2, comparator2.duplicate());
// collect expected data
final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(collectData(input1), collectData(input2));
// re-create the whole thing for actual processing
// reset the generators and iterators
generator1.reset();
generator2.reset();
const1Iter.reset();
const2Iter.reset();
gen1Iter.reset();
gen2Iter.reset();
inList1.clear();
inList1.add(gen1Iter);
inList1.add(const1Iter);
inList2.clear();
inList2.add(gen2Iter);
inList2.add(const2Iter);
input1 = new MergeIterator<>(inList1, comparator1.duplicate());
input2 = new MergeIterator<>(inList2, comparator2.duplicate());
StreamOperator operator = getOperator();
match(expectedMatchesMap, transformToBinary(join(operator, input1, input2)));
// assert that each expected match was seen
for (Map.Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
if (!entry.getValue().isEmpty()) {
Assert.fail("Collection for key " + entry.getKey() + " is not empty");
}
}
} catch (Exception e) {
e.printStackTrace();
Assert.fail("An exception occurred during the test: " + e.getMessage());
}
}
use of org.apache.flink.streaming.api.operators.StreamOperator in project flink by apache.
the class Int2HashJoinOperatorTest method joinAndAssert.
@SuppressWarnings("unchecked")
static void joinAndAssert(Object operator, MutableObjectIterator<BinaryRowData> input1, MutableObjectIterator<BinaryRowData> input2, int expectOutSize, int expectOutKeySize, int expectOutVal, boolean semiJoin) throws Exception {
InternalTypeInfo<RowData> typeInfo = InternalTypeInfo.ofFields(new IntType(), new IntType());
InternalTypeInfo<RowData> rowDataTypeInfo = InternalTypeInfo.ofFields(new IntType(), new IntType(), new IntType(), new IntType());
TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> testHarness = new TwoInputStreamTaskTestHarness<>(TwoInputStreamTask::new, 2, 1, new int[] { 1, 2 }, typeInfo, (TypeInformation) typeInfo, rowDataTypeInfo);
testHarness.memorySize = 36 * 1024 * 1024;
testHarness.getExecutionConfig().enableObjectReuse();
testHarness.setupOutputForSingletonOperatorChain();
if (operator instanceof StreamOperator) {
testHarness.getStreamConfig().setStreamOperator((StreamOperator<?>) operator);
} else {
testHarness.getStreamConfig().setStreamOperatorFactory((StreamOperatorFactory<?>) operator);
}
testHarness.getStreamConfig().setOperatorID(new OperatorID());
testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.OPERATOR, 0.99);
testHarness.invoke();
testHarness.waitForTaskRunning();
Random random = new Random();
do {
BinaryRowData row1 = null;
BinaryRowData row2 = null;
if (random.nextInt(2) == 0) {
row1 = input1.next();
if (row1 == null) {
row2 = input2.next();
}
} else {
row2 = input2.next();
if (row2 == null) {
row1 = input1.next();
}
}
if (row1 == null && row2 == null) {
break;
}
if (row1 != null) {
testHarness.processElement(new StreamRecord<>(row1), 0, 0);
} else {
testHarness.processElement(new StreamRecord<>(row2), 1, 0);
}
} while (true);
testHarness.endInput(0, 0);
testHarness.endInput(1, 0);
testHarness.waitForInputProcessing();
testHarness.waitForTaskCompletion();
Queue<Object> actual = testHarness.getOutput();
Assert.assertEquals("Output was not correct.", expectOutSize, actual.size());
// Don't verify the output value when experOutVal is -1
if (expectOutVal != -1) {
if (semiJoin) {
HashMap<Integer, Long> map = new HashMap<>(expectOutKeySize);
for (Object o : actual) {
StreamRecord<RowData> record = (StreamRecord<RowData>) o;
RowData row = record.getValue();
int key = row.getInt(0);
int val = row.getInt(1);
Long contained = map.get(key);
if (contained == null) {
contained = (long) val;
} else {
contained = valueOf(contained + val);
}
map.put(key, contained);
}
Assert.assertEquals("Wrong number of keys", expectOutKeySize, map.size());
for (Map.Entry<Integer, Long> entry : map.entrySet()) {
long val = entry.getValue();
int key = entry.getKey();
Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, expectOutVal, val);
}
} else {
// create the map for validating the results
HashMap<Integer, Long> map = new HashMap<>(expectOutKeySize);
for (Object o : actual) {
StreamRecord<RowData> record = (StreamRecord<RowData>) o;
RowData row = record.getValue();
int key = row.isNullAt(0) ? row.getInt(2) : row.getInt(0);
int val1 = 0;
int val2 = 0;
if (!row.isNullAt(1)) {
val1 = row.getInt(1);
}
if (!row.isNullAt(3)) {
val2 = row.getInt(3);
}
int val = val1 + val2;
Long contained = map.get(key);
if (contained == null) {
contained = (long) val;
} else {
contained = valueOf(contained + val);
}
map.put(key, contained);
}
Assert.assertEquals("Wrong number of keys", expectOutKeySize, map.size());
for (Map.Entry<Integer, Long> entry : map.entrySet()) {
long val = entry.getValue();
int key = entry.getKey();
Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, expectOutVal, val);
}
}
}
}
use of org.apache.flink.streaming.api.operators.StreamOperator in project flink by apache.
the class RandomSortMergeOuterJoinTest method testOuterJoinWithHighNumberOfCommonKeys.
@SuppressWarnings("unchecked, rawtypes")
protected void testOuterJoinWithHighNumberOfCommonKeys(FlinkJoinType outerJoinType, int input1Size, int input1Duplicates, int input1ValueLength, float input1KeyDensity, int input2Size, int input2Duplicates, int input2ValueLength, float input2KeyDensity) {
TypeComparator<Tuple2<Integer, String>> comparator1 = new TupleComparator<>(new int[] { 0 }, new TypeComparator<?>[] { new IntComparator(true) }, new TypeSerializer<?>[] { IntSerializer.INSTANCE });
TypeComparator<Tuple2<Integer, String>> comparator2 = new TupleComparator<>(new int[] { 0 }, new TypeComparator<?>[] { new IntComparator(true) }, new TypeSerializer<?>[] { IntSerializer.INSTANCE });
final int duplicateKey = 13;
try {
final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, input1KeyDensity, input1ValueLength, KeyMode.SORTED_SPARSE, ValueMode.RANDOM_LENGTH, null);
final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, input2KeyDensity, input2ValueLength, KeyMode.SORTED_SPARSE, ValueMode.RANDOM_LENGTH, null);
final TupleGeneratorIterator gen1Iter = new TupleGeneratorIterator(generator1, input1Size);
final TupleGeneratorIterator gen2Iter = new TupleGeneratorIterator(generator2, input2Size);
final TupleConstantValueIterator const1Iter = new TupleConstantValueIterator(duplicateKey, "LEFT String for Duplicate Keys", input1Duplicates);
final TupleConstantValueIterator const2Iter = new TupleConstantValueIterator(duplicateKey, "RIGHT String for Duplicate Keys", input2Duplicates);
final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
inList1.add(gen1Iter);
inList1.add(const1Iter);
final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
inList2.add(gen2Iter);
inList2.add(const2Iter);
MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<>(inList1, comparator1.duplicate());
MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<>(inList2, comparator2.duplicate());
// collect expected data
final Map<Integer, Collection<Match>> expectedMatchesMap = joinValues(RandomSortMergeInnerJoinTest.collectData(input1), RandomSortMergeInnerJoinTest.collectData(input2), outerJoinType);
// re-create the whole thing for actual processing
// reset the generators and iterators
generator1.reset();
generator2.reset();
const1Iter.reset();
const2Iter.reset();
gen1Iter.reset();
gen2Iter.reset();
inList1.clear();
inList1.add(gen1Iter);
inList1.add(const1Iter);
inList2.clear();
inList2.add(gen2Iter);
inList2.add(const2Iter);
input1 = new MergeIterator<>(inList1, comparator1.duplicate());
input2 = new MergeIterator<>(inList2, comparator2.duplicate());
StreamOperator operator = getOperator(outerJoinType);
RandomSortMergeInnerJoinTest.match(expectedMatchesMap, RandomSortMergeInnerJoinTest.transformToBinary(myJoin(operator, input1, input2)));
// assert that each expected match was seen
for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
if (!entry.getValue().isEmpty()) {
Assert.fail("Collection for key " + entry.getKey() + " is not empty");
}
}
} catch (Exception e) {
e.printStackTrace();
Assert.fail("An exception occurred during the test: " + e.getMessage());
}
}
use of org.apache.flink.streaming.api.operators.StreamOperator in project flink by apache.
the class StreamTaskTest method testExecuteMailboxActionsAfterLeavingInputProcessorMailboxLoop.
@Test
public void testExecuteMailboxActionsAfterLeavingInputProcessorMailboxLoop() throws Exception {
OneShotLatch latch = new OneShotLatch();
try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().build()) {
RunningTask<StreamTask<?, ?>> task = runTask(() -> new StreamTask<Object, StreamOperator<Object>>(mockEnvironment) {
@Override
protected void init() throws Exception {
}
@Override
protected void processInput(MailboxDefaultAction.Controller controller) throws Exception {
mailboxProcessor.getMailboxExecutor(0).execute(latch::trigger, "trigger");
controller.suspendDefaultAction();
mailboxProcessor.suspend();
}
});
latch.await();
task.waitForTaskCompletion(false);
}
}
use of org.apache.flink.streaming.api.operators.StreamOperator in project flink by apache.
the class StreamTaskMailboxTestHarnessBuilder method initializeSourceInput.
private SourceInputConfig initializeSourceInput(int inputId, SourceInputConfigPlaceHolder sourceInput, StreamNode mainNode) {
Map<Integer, StreamConfig> transitiveChainedTaskConfigs = streamConfig.getTransitiveChainedTaskConfigs(getClass().getClassLoader());
Integer maxNodeId = transitiveChainedTaskConfigs.isEmpty() ? StreamConfigChainer.MAIN_NODE_ID : Collections.max(transitiveChainedTaskConfigs.keySet());
List<StreamEdge> outEdgesInOrder = new LinkedList<>();
StreamEdge sourceToMainEdge = new StreamEdge(new StreamNode(maxNodeId + inputId + 1337, null, null, (StreamOperator<?>) null, null, null), mainNode, 0, new ForwardPartitioner<>(), null);
outEdgesInOrder.add(sourceToMainEdge);
StreamConfig sourceConfig = new StreamConfig(new Configuration());
sourceConfig.setTimeCharacteristic(streamConfig.getTimeCharacteristic());
sourceConfig.setOutEdgesInOrder(outEdgesInOrder);
sourceConfig.setChainedOutputs(outEdgesInOrder);
sourceConfig.setTypeSerializerOut(sourceInput.getSourceSerializer());
sourceConfig.setOperatorID(sourceInput.getOperatorId());
sourceConfig.setStreamOperatorFactory(sourceInput.getSourceOperatorFactory());
transitiveChainedTaskConfigs.put(sourceToMainEdge.getSourceId(), sourceConfig);
streamConfig.setTransitiveChainedTaskConfigs(transitiveChainedTaskConfigs);
return new SourceInputConfig(sourceToMainEdge);
}
Aggregations