Search in sources :

Example 36 with MutableObjectIterator

use of org.apache.flink.util.MutableObjectIterator in project flink by apache.

the class BinaryHashTableTest method testFailingHashJoinTooManyRecursions.

/*
     * This test is basically identical to the "testSpillingHashJoinWithMassiveCollisions" test, only that the number
     * of repeated values (causing bucket collisions) are large enough to make sure that their target partition no longer
     * fits into memory by itself and needs to be repartitioned in the recursion again.
     */
@Test
public void testFailingHashJoinTooManyRecursions() throws IOException {
    // the following two values are known to have a hash-code collision on the first recursion
    // level.
    // we use them to make sure one partition grows over-proportionally large
    final int repeatedValue1 = 40559;
    final int repeatedValue2 = 92882;
    final int repeatedValueCount = 3000000;
    final int numKeys = 1000000;
    final int buildValsPerKey = 3;
    final int probeValsPerKey = 10;
    // create a build input that gives 3 million pairs with 3 values sharing the same key, plus
    // 400k pairs with two colliding keys
    MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
    MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
    MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
    List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
    builds.add(build1);
    builds.add(build2);
    builds.add(build3);
    MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);
    // create a probe input that gives 10 million pairs with 10 values sharing a key
    MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
    MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
    MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
    List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
    probes.add(probe1);
    probes.add(probe2);
    probes.add(probe3);
    MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
    // ----------------------------------------------------------------------------------------
    MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
    final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 896 * PAGE_SIZE, ioManager);
    try {
        join(table, buildInput, probeInput);
        fail("Hash Join must have failed due to too many recursions.");
    } catch (Exception ex) {
    // expected
    }
    table.close();
    // ----------------------------------------------------------------------------------------
    table.free();
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) UnionIterator(org.apache.flink.runtime.operators.testutils.UnionIterator) ArrayList(java.util.ArrayList) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) MemoryAllocationException(org.apache.flink.runtime.memory.MemoryAllocationException) IOException(java.io.IOException) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 37 with MutableObjectIterator

use of org.apache.flink.util.MutableObjectIterator in project flink by apache.

the class BinaryHashTableTest method testSpillingHashJoinWithMassiveCollisions.

@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
    // the following two values are known to have a hash-code collision on the initial level.
    // we use them to make sure one partition grows over-proportionally large
    final int repeatedValue1 = 40559;
    final int repeatedValue2 = 92882;
    final int repeatedValueCountBuild = 200000;
    final int repeatedValueCountProbe = 5;
    final int numKeys = 1000000;
    final int buildValsPerKey = 3;
    final int probeValsPerKey = 10;
    // create a build input that gives 3 million pairs with 3 values sharing the same key, plus
    // 400k pairs with two colliding keys
    MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
    MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
    MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
    List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
    builds.add(build1);
    builds.add(build2);
    builds.add(build3);
    MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);
    // create a probe input that gives 10 million pairs with 10 values sharing a key
    MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
    MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
    MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
    List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
    probes.add(probe1);
    probes.add(probe2);
    probes.add(probe3);
    MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
    // create the map for validating the results
    HashMap<Integer, Long> map = new HashMap<>(numKeys);
    MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
    // ----------------------------------------------------------------------------------------
    final BinaryHashTable table = newBinaryHashTable(this.buildSideSerializer, this.probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 896 * PAGE_SIZE, ioManager);
    final BinaryRowData recordReuse = new BinaryRowData(2);
    BinaryRowData buildRow = buildSideSerializer.createInstance();
    while ((buildRow = buildInput.next(buildRow)) != null) {
        table.putBuildRow(buildRow);
    }
    table.endBuild();
    BinaryRowData probeRow = probeSideSerializer.createInstance();
    while ((probeRow = probeInput.next(probeRow)) != null) {
        if (table.tryProbe(probeRow)) {
            testJoin(table, map);
        }
    }
    while (table.nextMatching()) {
        testJoin(table, map);
    }
    table.close();
    Assert.assertEquals("Wrong number of keys", numKeys, map.size());
    for (Map.Entry<Integer, Long> entry : map.entrySet()) {
        long val = entry.getValue();
        int key = entry.getKey();
        Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, (key == repeatedValue1 || key == repeatedValue2) ? (probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) : probeValsPerKey * buildValsPerKey, val);
    }
    // ----------------------------------------------------------------------------------------
    table.free();
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) UnionIterator(org.apache.flink.runtime.operators.testutils.UnionIterator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) HashMap(java.util.HashMap) Map(java.util.Map) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 38 with MutableObjectIterator

use of org.apache.flink.util.MutableObjectIterator in project flink by apache.

the class BinaryHashTableTest method testRepeatBuildJoin.

@Test
public void testRepeatBuildJoin() throws Exception {
    final int numKeys = 500;
    final int probeValsPerKey = 1;
    MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(40 * PAGE_SIZE).build();
    MutableObjectIterator<BinaryRowData> buildInput = new MutableObjectIterator<BinaryRowData>() {

        int cnt = 0;

        @Override
        public BinaryRowData next(BinaryRowData reuse) throws IOException {
            return next();
        }

        @Override
        public BinaryRowData next() throws IOException {
            cnt++;
            if (cnt > numKeys) {
                return null;
            }
            BinaryRowData row = new BinaryRowData(2);
            BinaryRowWriter writer = new BinaryRowWriter(row);
            writer.writeInt(0, 1);
            writer.writeInt(1, 1);
            writer.complete();
            return row;
        }
    };
    MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
    final BinaryHashTable table = new BinaryHashTable(conf, new Object(), buildSideSerializer, probeSideSerializer, new MyProjection(), new MyProjection(), memManager, 40 * PAGE_SIZE, ioManager, 24, 200000, true, HashJoinType.INNER, null, false, new boolean[] { true }, true);
    int numRecordsInJoinResult = join(table, buildInput, probeInput, true);
    Assert.assertEquals("Wrong number of records in join result.", 1, numRecordsInJoinResult);
    table.close();
    table.free();
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 39 with MutableObjectIterator

use of org.apache.flink.util.MutableObjectIterator in project flink by apache.

the class BinaryExternalSorterTest method testSpillingRandom.

@Test
public void testSpillingRandom() throws Exception {
    int size = 1000_000;
    MockBinaryRowReader reader = new MockBinaryRowReader(size);
    LOG.debug("initializing sortmerger");
    long minMemorySize = memoryManager.computeNumberOfPages(0.1) * MemoryManager.DEFAULT_PAGE_SIZE;
    BinaryExternalSorter sorter = new BinaryExternalSorter(new Object(), this.memoryManager, minMemorySize, this.ioManager, (AbstractRowDataSerializer) serializer, serializer, IntNormalizedKeyComputer.INSTANCE, IntRecordComparator.INSTANCE, conf, 0.7f);
    sorter.startThreads();
    List<BinaryRowData> data = new ArrayList<>();
    BinaryRowData row = serializer.createInstance();
    for (int i = 0; i < size; i++) {
        row = reader.next(row);
        data.add(row.copy());
    }
    Collections.shuffle(data);
    for (int i = 0; i < size; i++) {
        sorter.write(data.get(i));
    }
    MutableObjectIterator<BinaryRowData> iterator = sorter.getIterator();
    data.sort(Comparator.comparingInt(o -> o.getInt(0)));
    BinaryRowData next = serializer.createInstance();
    for (int i = 0; i < size; i++) {
        next = iterator.next(next);
        Assert.assertEquals(data.get(i).getInt(0), next.getInt(0));
        Assert.assertEquals(data.get(i).getString(1), next.getString(1));
    }
    sorter.close();
}
Also used : Arrays(java.util.Arrays) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RunWith(org.junit.runner.RunWith) LoggerFactory(org.slf4j.LoggerFactory) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer) ArrayList(java.util.ArrayList) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) BinaryRowWriter(org.apache.flink.table.data.writer.BinaryRowWriter) After(org.junit.After) MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) Parameterized(org.junit.runners.Parameterized) MemoryManagerBuilder(org.apache.flink.runtime.memory.MemoryManagerBuilder) Before(org.junit.Before) RowData(org.apache.flink.table.data.RowData) Logger(org.slf4j.Logger) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer) StringData(org.apache.flink.table.data.StringData) List(java.util.List) Assert(org.junit.Assert) Comparator(java.util.Comparator) ExecutionConfigOptions(org.apache.flink.table.api.config.ExecutionConfigOptions) Collections(java.util.Collections) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 40 with MutableObjectIterator

use of org.apache.flink.util.MutableObjectIterator in project flink by apache.

the class BinaryMergeIteratorTest method testOneStream.

@Test
public void testOneStream() throws Exception {
    List<MutableObjectIterator<BinaryRowData>> iterators = new ArrayList<>();
    iterators.add(newIterator(new int[] { 1, 2, 4, 5, 10 }, new String[] { "1", "2", "4", "5", "10" }));
    final int[] expected = new int[] { 1, 2, 4, 5, 10 };
    MutableObjectIterator<BinaryRowData> iterator = new BinaryMergeIterator<>(iterators, Collections.singletonList(serializer.createInstance()), (o1, o2) -> this.comparator.compare(o1, o2));
    BinaryRowData row = serializer.createInstance();
    int pos = 0;
    while ((row = iterator.next(row)) != null) {
        Assert.assertEquals(expected[pos++], row.getInt(0));
    }
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) ArrayList(java.util.ArrayList) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) Test(org.junit.Test)

Aggregations

MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)42 Test (org.junit.Test)32 ArrayList (java.util.ArrayList)26 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)19 UnionIterator (org.apache.flink.runtime.operators.testutils.UnionIterator)15 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)15 MemorySegment (org.apache.flink.core.memory.MemorySegment)12 Collection (java.util.Collection)10 HashMap (java.util.HashMap)9 Map (java.util.Map)9 IOException (java.io.IOException)7 MemoryAllocationException (org.apache.flink.runtime.memory.MemoryAllocationException)7 DiscardingOutputCollector (org.apache.flink.runtime.operators.testutils.DiscardingOutputCollector)7 TestData (org.apache.flink.runtime.operators.testutils.TestData)7 TupleGenerator (org.apache.flink.runtime.operators.testutils.TestData.TupleGenerator)7 UniformBinaryRowGenerator (org.apache.flink.table.runtime.util.UniformBinaryRowGenerator)7 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)6 NullKeyFieldException (org.apache.flink.types.NullKeyFieldException)6 BitSet (java.util.BitSet)4 Random (java.util.Random)4