Search in sources :

Example 51 with Collection

use of java.util.Collection in project flink by apache.

the class ReusingSortMergeInnerJoinIteratorITCase method matchValues.

// --------------------------------------------------------------------------------------------
//                                    Utilities
// --------------------------------------------------------------------------------------------
private Map<Integer, Collection<Match>> matchValues(Map<Integer, Collection<String>> leftMap, Map<Integer, Collection<String>> rightMap) {
    Map<Integer, Collection<Match>> map = new HashMap<Integer, Collection<Match>>();
    for (Integer key : leftMap.keySet()) {
        Collection<String> leftValues = leftMap.get(key);
        Collection<String> rightValues = rightMap.get(key);
        if (rightValues == null) {
            continue;
        }
        if (!map.containsKey(key)) {
            map.put(key, new ArrayList<Match>());
        }
        Collection<Match> matchedValues = map.get(key);
        for (String leftValue : leftValues) {
            for (String rightValue : rightValues) {
                matchedValues.add(new Match(leftValue, rightValue));
            }
        }
    }
    return map;
}
Also used : HashMap(java.util.HashMap) Collection(java.util.Collection) Match(org.apache.flink.runtime.operators.testutils.Match)

Example 52 with Collection

use of java.util.Collection in project flink by apache.

the class ReusingSortMergeInnerJoinIteratorITCase method testMerge.

@Test
public void testMerge() {
    try {
        final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
        final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
        final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
        final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
        // collect expected data
        final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(collectData(input1), collectData(input2));
        final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> joinFunction = new MatchRemovingJoiner(expectedMatchesMap);
        final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();
        // reset the generators
        generator1.reset();
        generator2.reset();
        input1.reset();
        input2.reset();
        // compare with iterator values
        ReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator = new ReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>>(input1, input2, this.serializer1, this.comparator1, this.serializer2, this.comparator2, this.pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);
        iterator.open();
        while (iterator.callWithNextKey(joinFunction, collector)) ;
        iterator.close();
        // assert that each expected match was seen
        for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
            Assert.assertTrue("Collection for key " + entry.getKey() + " is not empty", entry.getValue().isEmpty());
        }
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail("An exception occurred during the test: " + e.getMessage());
    }
}
Also used : TestData(org.apache.flink.runtime.operators.testutils.TestData) TupleGenerator(org.apache.flink.runtime.operators.testutils.TestData.TupleGenerator) MatchRemovingJoiner(org.apache.flink.runtime.operators.testutils.MatchRemovingJoiner) DiscardingOutputCollector(org.apache.flink.runtime.operators.testutils.DiscardingOutputCollector) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collection(java.util.Collection) Test(org.junit.Test)

Example 53 with Collection

use of java.util.Collection in project flink by apache.

the class AbstractSortMergeOuterJoinIteratorITCase method testOuterJoinWithHighNumberOfCommonKeys.

@SuppressWarnings("unchecked, rawtypes")
protected void testOuterJoinWithHighNumberOfCommonKeys(OuterJoinType outerJoinType, int input1Size, int input1Duplicates, int input1ValueLength, float input1KeyDensity, int input2Size, int input2Duplicates, int input2ValueLength, float input2KeyDensity) {
    TypeSerializer<Tuple2<Integer, String>> serializer1 = new TupleSerializer<>((Class<Tuple2<Integer, String>>) (Class<?>) Tuple2.class, new TypeSerializer<?>[] { IntSerializer.INSTANCE, StringSerializer.INSTANCE });
    TypeSerializer<Tuple2<Integer, String>> serializer2 = new TupleSerializer<>((Class<Tuple2<Integer, String>>) (Class<?>) Tuple2.class, new TypeSerializer<?>[] { IntSerializer.INSTANCE, StringSerializer.INSTANCE });
    TypeComparator<Tuple2<Integer, String>> comparator1 = new TupleComparator<>(new int[] { 0 }, new TypeComparator<?>[] { new IntComparator(true) }, new TypeSerializer<?>[] { IntSerializer.INSTANCE });
    TypeComparator<Tuple2<Integer, String>> comparator2 = new TupleComparator<>(new int[] { 0 }, new TypeComparator<?>[] { new IntComparator(true) }, new TypeSerializer<?>[] { IntSerializer.INSTANCE });
    TypePairComparator<Tuple2<Integer, String>, Tuple2<Integer, String>> pairComparator = new GenericPairComparator<>(comparator1, comparator2);
    this.memoryManager = new MemoryManager(MEMORY_SIZE, 1);
    this.ioManager = new IOManagerAsync();
    final int DUPLICATE_KEY = 13;
    try {
        final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, input1KeyDensity, input1ValueLength, KeyMode.SORTED_SPARSE, ValueMode.RANDOM_LENGTH, null);
        final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, input2KeyDensity, input2ValueLength, KeyMode.SORTED_SPARSE, ValueMode.RANDOM_LENGTH, null);
        final TupleGeneratorIterator gen1Iter = new TupleGeneratorIterator(generator1, input1Size);
        final TupleGeneratorIterator gen2Iter = new TupleGeneratorIterator(generator2, input2Size);
        final TupleConstantValueIterator const1Iter = new TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", input1Duplicates);
        final TupleConstantValueIterator const2Iter = new TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", input2Duplicates);
        final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
        inList1.add(gen1Iter);
        inList1.add(const1Iter);
        final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
        inList2.add(gen2Iter);
        inList2.add(const2Iter);
        MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<>(inList1, comparator1.duplicate());
        MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<>(inList2, comparator2.duplicate());
        // collect expected data
        final Map<Integer, Collection<Match>> expectedMatchesMap = joinValues(collectData(input1), collectData(input2), outerJoinType);
        // re-create the whole thing for actual processing
        // reset the generators and iterators
        generator1.reset();
        generator2.reset();
        const1Iter.reset();
        const2Iter.reset();
        gen1Iter.reset();
        gen2Iter.reset();
        inList1.clear();
        inList1.add(gen1Iter);
        inList1.add(const1Iter);
        inList2.clear();
        inList2.add(gen2Iter);
        inList2.add(const2Iter);
        input1 = new MergeIterator<>(inList1, comparator1.duplicate());
        input2 = new MergeIterator<>(inList2, comparator2.duplicate());
        final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> joinFunction = new MatchRemovingJoiner(expectedMatchesMap);
        final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();
        // we create this sort-merge iterator with little memory for the block-nested-loops fall-back to make sure it
        // needs to spill for the duplicate keys
        AbstractMergeOuterJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator = createOuterJoinIterator(outerJoinType, input1, input2, serializer1, comparator1, serializer2, comparator2, pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);
        iterator.open();
        while (iterator.callWithNextKey(joinFunction, collector)) ;
        iterator.close();
        // assert that each expected match was seen
        for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
            if (!entry.getValue().isEmpty()) {
                Assert.fail("Collection for key " + entry.getKey() + " is not empty");
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail("An exception occurred during the test: " + e.getMessage());
    }
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) ResettableMutableObjectIterator(org.apache.flink.runtime.util.ResettableMutableObjectIterator) ArrayList(java.util.ArrayList) IntComparator(org.apache.flink.api.common.typeutils.base.IntComparator) TupleComparator(org.apache.flink.api.java.typeutils.runtime.TupleComparator) MatchRemovingJoiner(org.apache.flink.runtime.operators.testutils.MatchRemovingJoiner) TupleSerializer(org.apache.flink.api.java.typeutils.runtime.TupleSerializer) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) GenericPairComparator(org.apache.flink.api.common.typeutils.GenericPairComparator) TupleConstantValueIterator(org.apache.flink.runtime.operators.testutils.TestData.TupleConstantValueIterator) TupleGenerator(org.apache.flink.runtime.operators.testutils.TestData.TupleGenerator) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TupleGeneratorIterator(org.apache.flink.runtime.operators.testutils.TestData.TupleGeneratorIterator) DiscardingOutputCollector(org.apache.flink.runtime.operators.testutils.DiscardingOutputCollector) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collection(java.util.Collection)

Example 54 with Collection

use of java.util.Collection in project flink by apache.

the class NonReusingSortMergeCoGroupIteratorITCase method testMerge.

@Test
public void testMerge() {
    try {
        generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
        generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
        reader1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
        reader2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
        // collect expected data
        Map<Integer, Collection<String>> expectedValuesMap1 = collectData(generator1, INPUT_1_SIZE);
        Map<Integer, Collection<String>> expectedValuesMap2 = collectData(generator2, INPUT_2_SIZE);
        Map<Integer, List<Collection<String>>> expectedCoGroupsMap = coGroupValues(expectedValuesMap1, expectedValuesMap2);
        // reset the generators
        generator1.reset();
        generator2.reset();
        // compare with iterator values
        NonReusingSortMergeCoGroupIterator<Tuple2<Integer, String>, Tuple2<Integer, String>> iterator = new NonReusingSortMergeCoGroupIterator<>(this.reader1, this.reader2, this.serializer1, this.comparator1, this.serializer2, this.comparator2, this.pairComparator);
        iterator.open();
        int key = 0;
        while (iterator.next()) {
            Iterator<Tuple2<Integer, String>> iter1 = iterator.getValues1().iterator();
            Iterator<Tuple2<Integer, String>> iter2 = iterator.getValues2().iterator();
            String v1 = null;
            String v2 = null;
            if (iter1.hasNext()) {
                Tuple2<Integer, String> rec = iter1.next();
                key = rec.f0;
                v1 = rec.f1;
            } else if (iter2.hasNext()) {
                Tuple2<Integer, String> rec = iter2.next();
                key = rec.f0;
                v2 = rec.f1;
            } else {
                Assert.fail("No input on both sides.");
            }
            // assert that matches for this key exist
            Assert.assertTrue("No matches for key " + key, expectedCoGroupsMap.containsKey(key));
            Collection<String> expValues1 = expectedCoGroupsMap.get(key).get(0);
            Collection<String> expValues2 = expectedCoGroupsMap.get(key).get(1);
            if (v1 != null) {
                expValues1.remove(v1);
            } else {
                expValues2.remove(v2);
            }
            while (iter1.hasNext()) {
                Tuple2<Integer, String> rec = iter1.next();
                Assert.assertTrue("Value not in expected set of first input", expValues1.remove(rec.f1));
            }
            Assert.assertTrue("Expected set of first input not empty", expValues1.isEmpty());
            while (iter2.hasNext()) {
                Tuple2<Integer, String> rec = iter2.next();
                Assert.assertTrue("Value not in expected set of second input", expValues2.remove(rec.f1));
            }
            Assert.assertTrue("Expected set of second input not empty", expValues2.isEmpty());
            expectedCoGroupsMap.remove(key);
        }
        iterator.close();
        Assert.assertTrue("Expected key set not empty", expectedCoGroupsMap.isEmpty());
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail("An exception occurred during the test: " + e.getMessage());
    }
}
Also used : TestData(org.apache.flink.runtime.operators.testutils.TestData) TupleGenerator(org.apache.flink.runtime.operators.testutils.TestData.TupleGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collection(java.util.Collection) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 55 with Collection

use of java.util.Collection in project flink by apache.

the class NonReusingSortMergeInnerJoinIteratorITCase method testMergeWithHighNumberOfCommonKeys.

@Test
public void testMergeWithHighNumberOfCommonKeys() {
    // the size of the left and right inputs
    final int INPUT_1_SIZE = 200;
    final int INPUT_2_SIZE = 100;
    final int INPUT_1_DUPLICATES = 10;
    final int INPUT_2_DUPLICATES = 4000;
    final int DUPLICATE_KEY = 13;
    try {
        final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
        final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
        final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
        final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
        final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
        final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
        final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
        inList1.add(gen1Iter);
        inList1.add(const1Iter);
        final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
        inList2.add(gen2Iter);
        inList2.add(const2Iter);
        MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
        MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
        // collect expected data
        final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(collectData(input1), collectData(input2));
        // re-create the whole thing for actual processing
        // reset the generators and iterators
        generator1.reset();
        generator2.reset();
        const1Iter.reset();
        const2Iter.reset();
        gen1Iter.reset();
        gen2Iter.reset();
        inList1.clear();
        inList1.add(gen1Iter);
        inList1.add(const1Iter);
        inList2.clear();
        inList2.add(gen2Iter);
        inList2.add(const2Iter);
        input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
        input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
        final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> joinFunction = new MatchRemovingJoiner(expectedMatchesMap);
        final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();
        // we create this sort-merge iterator with little memory for the block-nested-loops fall-back to make sure it
        // needs to spill for the duplicate keys
        NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator = new NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>>(input1, input2, this.serializer1, this.comparator1, this.serializer2, this.comparator2, this.pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);
        iterator.open();
        while (iterator.callWithNextKey(joinFunction, collector)) ;
        iterator.close();
        // assert that each expected match was seen
        for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
            if (!entry.getValue().isEmpty()) {
                Assert.fail("Collection for key " + entry.getKey() + " is not empty");
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail("An exception occurred during the test: " + e.getMessage());
    }
}
Also used : TestData(org.apache.flink.runtime.operators.testutils.TestData) MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) ArrayList(java.util.ArrayList) MatchRemovingJoiner(org.apache.flink.runtime.operators.testutils.MatchRemovingJoiner) TupleGenerator(org.apache.flink.runtime.operators.testutils.TestData.TupleGenerator) DiscardingOutputCollector(org.apache.flink.runtime.operators.testutils.DiscardingOutputCollector) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collection(java.util.Collection) Test(org.junit.Test)

Aggregations

Collection (java.util.Collection)2848 ArrayList (java.util.ArrayList)801 Map (java.util.Map)581 Test (org.junit.Test)537 HashMap (java.util.HashMap)479 List (java.util.List)387 Iterator (java.util.Iterator)325 HashSet (java.util.HashSet)279 IOException (java.io.IOException)258 Set (java.util.Set)250 File (java.io.File)114 Collectors (java.util.stream.Collectors)95 LinkedHashMap (java.util.LinkedHashMap)90 LinkedList (java.util.LinkedList)82 Test (org.testng.annotations.Test)78 NotNull (org.jetbrains.annotations.NotNull)75 Region (org.apache.geode.cache.Region)71 Collections (java.util.Collections)67 Field (java.lang.reflect.Field)65 Logger (org.slf4j.Logger)63