Search in sources :

Example 41 with MutableObjectIterator

use of org.apache.flink.util.MutableObjectIterator in project flink by apache.

the class HashTableRecordWidthCombinations method main.

public static void main(String[] args) throws Exception {
    @SuppressWarnings("unchecked") final TypeSerializer<Tuple2<Long, byte[]>> buildSerializer = new TupleSerializer<Tuple2<Long, byte[]>>((Class<Tuple2<Long, byte[]>>) (Class<?>) Tuple2.class, new TypeSerializer<?>[] { LongSerializer.INSTANCE, BytePrimitiveArraySerializer.INSTANCE });
    final TypeSerializer<Long> probeSerializer = LongSerializer.INSTANCE;
    final TypeComparator<Tuple2<Long, byte[]>> buildComparator = new TupleComparator<Tuple2<Long, byte[]>>(new int[] { 0 }, new TypeComparator<?>[] { new LongComparator(true) }, new TypeSerializer<?>[] { LongSerializer.INSTANCE });
    final TypeComparator<Long> probeComparator = new LongComparator(true);
    final TypePairComparator<Long, Tuple2<Long, byte[]>> pairComparator = new TypePairComparator<Long, Tuple2<Long, byte[]>>() {

        private long ref;

        @Override
        public void setReference(Long reference) {
            ref = reference;
        }

        @Override
        public boolean equalToReference(Tuple2<Long, byte[]> candidate) {
            // noinspection UnnecessaryUnboxing
            return candidate.f0.longValue() == ref;
        }

        @Override
        public int compareToReference(Tuple2<Long, byte[]> candidate) {
            long x = ref;
            long y = candidate.f0;
            return (x < y) ? -1 : ((x == y) ? 0 : 1);
        }
    };
    try (final IOManager ioMan = new IOManagerAsync()) {
        final int pageSize = 32 * 1024;
        final int numSegments = 34;
        for (int num = 3400; num < 3550; num++) {
            final int numRecords = num;
            for (int recordLen = 270; recordLen < 320; recordLen++) {
                final byte[] payload = new byte[recordLen - 8 - 4];
                System.out.println("testing " + numRecords + " / " + recordLen);
                List<MemorySegment> memory = getMemory(numSegments, pageSize);
                // we create a hash table that thinks the records are super large. that makes it
                // choose initially
                // a lot of memory for the partition buffers, and start with a smaller hash
                // table. that way
                // we trigger a hash table growth early.
                MutableHashTable<Tuple2<Long, byte[]>, Long> table = new MutableHashTable<>(buildSerializer, probeSerializer, buildComparator, probeComparator, pairComparator, memory, ioMan, 16, false);
                final MutableObjectIterator<Tuple2<Long, byte[]>> buildInput = new MutableObjectIterator<Tuple2<Long, byte[]>>() {

                    private int count = 0;

                    @Override
                    public Tuple2<Long, byte[]> next(Tuple2<Long, byte[]> reuse) {
                        return next();
                    }

                    @Override
                    public Tuple2<Long, byte[]> next() {
                        if (count++ < numRecords) {
                            return new Tuple2<>(42L, payload);
                        } else {
                            return null;
                        }
                    }
                };
                // probe side
                final MutableObjectIterator<Long> probeInput = new MutableObjectIterator<Long>() {

                    private final long numRecords = 10000;

                    private long value = 0;

                    @Override
                    public Long next(Long aLong) {
                        return next();
                    }

                    @Override
                    public Long next() {
                        if (value < numRecords) {
                            return value++;
                        } else {
                            return null;
                        }
                    }
                };
                table.open(buildInput, probeInput);
                try {
                    while (table.nextRecord()) {
                        MutableObjectIterator<Tuple2<Long, byte[]>> matches = table.getBuildSideIterator();
                        while (matches.next() != null) {
                        }
                    }
                } catch (RuntimeException e) {
                    if (!e.getMessage().contains("exceeded maximum number of recursions")) {
                        throw e;
                    }
                } finally {
                    table.close();
                }
                // make sure no temp files are left
                checkNoTempFilesRemain(ioMan);
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) TupleComparator(org.apache.flink.api.java.typeutils.runtime.TupleComparator) TupleSerializer(org.apache.flink.api.java.typeutils.runtime.TupleSerializer) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TypePairComparator(org.apache.flink.api.common.typeutils.TypePairComparator) LongComparator(org.apache.flink.api.common.typeutils.base.LongComparator) MemorySegment(org.apache.flink.core.memory.MemorySegment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) MutableHashTable(org.apache.flink.runtime.operators.hash.MutableHashTable)

Example 42 with MutableObjectIterator

use of org.apache.flink.util.MutableObjectIterator in project flink by apache.

the class SortMergeJoinOperator method doSortMergeJoin.

private void doSortMergeJoin() throws Exception {
    MutableObjectIterator iterator1 = sorter1.getIterator();
    MutableObjectIterator iterator2 = sorter2.getIterator();
    if (type.equals(FlinkJoinType.INNER)) {
        if (!leftIsSmaller) {
            try (SortMergeInnerJoinIterator joinIterator = new SortMergeInnerJoinIterator(serializer1, serializer2, projection1, projection2, keyComparator, iterator1, iterator2, newBuffer(serializer2), filterNulls)) {
                innerJoin(joinIterator, false);
            }
        } else {
            try (SortMergeInnerJoinIterator joinIterator = new SortMergeInnerJoinIterator(serializer2, serializer1, projection2, projection1, keyComparator, iterator2, iterator1, newBuffer(serializer1), filterNulls)) {
                innerJoin(joinIterator, true);
            }
        }
    } else if (type.equals(FlinkJoinType.LEFT)) {
        try (SortMergeOneSideOuterJoinIterator joinIterator = new SortMergeOneSideOuterJoinIterator(serializer1, serializer2, projection1, projection2, keyComparator, iterator1, iterator2, newBuffer(serializer2), filterNulls)) {
            oneSideOuterJoin(joinIterator, false, rightNullRow);
        }
    } else if (type.equals(FlinkJoinType.RIGHT)) {
        try (SortMergeOneSideOuterJoinIterator joinIterator = new SortMergeOneSideOuterJoinIterator(serializer2, serializer1, projection2, projection1, keyComparator, iterator2, iterator1, newBuffer(serializer1), filterNulls)) {
            oneSideOuterJoin(joinIterator, true, leftNullRow);
        }
    } else if (type.equals(FlinkJoinType.FULL)) {
        try (SortMergeFullOuterJoinIterator fullOuterJoinIterator = new SortMergeFullOuterJoinIterator(serializer1, serializer2, projection1, projection2, keyComparator, iterator1, iterator2, newBuffer(serializer1), newBuffer(serializer2), filterNulls)) {
            fullOuterJoin(fullOuterJoinIterator);
        }
    } else if (type.equals(FlinkJoinType.SEMI)) {
        try (SortMergeInnerJoinIterator joinIterator = new SortMergeInnerJoinIterator(serializer1, serializer2, projection1, projection2, keyComparator, iterator1, iterator2, newBuffer(serializer2), filterNulls)) {
            while (joinIterator.nextInnerJoin()) {
                RowData probeRow = joinIterator.getProbeRow();
                boolean matched = false;
                try (ResettableExternalBuffer.BufferIterator iter = joinIterator.getMatchBuffer().newIterator()) {
                    while (iter.advanceNext()) {
                        RowData row = iter.getRow();
                        if (condFunc.apply(probeRow, row)) {
                            matched = true;
                            break;
                        }
                    }
                }
                if (matched) {
                    collector.collect(probeRow);
                }
            }
        }
    } else if (type.equals(FlinkJoinType.ANTI)) {
        try (SortMergeOneSideOuterJoinIterator joinIterator = new SortMergeOneSideOuterJoinIterator(serializer1, serializer2, projection1, projection2, keyComparator, iterator1, iterator2, newBuffer(serializer2), filterNulls)) {
            while (joinIterator.nextOuterJoin()) {
                RowData probeRow = joinIterator.getProbeRow();
                ResettableExternalBuffer matchBuffer = joinIterator.getMatchBuffer();
                boolean matched = false;
                if (matchBuffer != null) {
                    try (ResettableExternalBuffer.BufferIterator iter = matchBuffer.newIterator()) {
                        while (iter.advanceNext()) {
                            RowData row = iter.getRow();
                            if (condFunc.apply(probeRow, row)) {
                                matched = true;
                                break;
                            }
                        }
                    }
                }
                if (!matched) {
                    collector.collect(probeRow);
                }
            }
        }
    } else {
        throw new RuntimeException("Not support type: " + type);
    }
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) ResettableExternalBuffer(org.apache.flink.table.runtime.util.ResettableExternalBuffer) MutableObjectIterator(org.apache.flink.util.MutableObjectIterator)

Aggregations

MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)42 Test (org.junit.Test)32 ArrayList (java.util.ArrayList)26 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)19 UnionIterator (org.apache.flink.runtime.operators.testutils.UnionIterator)15 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)15 MemorySegment (org.apache.flink.core.memory.MemorySegment)12 Collection (java.util.Collection)10 HashMap (java.util.HashMap)9 Map (java.util.Map)9 IOException (java.io.IOException)7 MemoryAllocationException (org.apache.flink.runtime.memory.MemoryAllocationException)7 DiscardingOutputCollector (org.apache.flink.runtime.operators.testutils.DiscardingOutputCollector)7 TestData (org.apache.flink.runtime.operators.testutils.TestData)7 TupleGenerator (org.apache.flink.runtime.operators.testutils.TestData.TupleGenerator)7 UniformBinaryRowGenerator (org.apache.flink.table.runtime.util.UniformBinaryRowGenerator)7 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)6 NullKeyFieldException (org.apache.flink.types.NullKeyFieldException)6 BitSet (java.util.BitSet)4 Random (java.util.Random)4