Search in sources :

Example 1 with BinaryRowChannelInputViewIterator

use of org.apache.flink.table.runtime.io.BinaryRowChannelInputViewIterator in project flink by apache.

the class BinaryHashTable method buildTableFromSpilledPartition.

private void buildTableFromSpilledPartition(final BinaryHashPartition p) throws IOException {
    final int nextRecursionLevel = p.getRecursionLevel() + 1;
    if (nextRecursionLevel == 2) {
        LOG.info("Recursive hash join: partition number is " + p.getPartitionNumber());
    } else if (nextRecursionLevel > MAX_RECURSION_DEPTH) {
        throw new RuntimeException("Hash join exceeded maximum number of recursions, without reducing " + "partitions enough to be memory resident. Probably cause: Too many duplicate keys.");
    }
    if (p.getBuildSideBlockCount() > p.getProbeSideBlockCount()) {
        LOG.info(String.format("Hash join: Partition(%d) " + "build side block [%d] more than probe side block [%d]", p.getPartitionNumber(), p.getBuildSideBlockCount(), p.getProbeSideBlockCount()));
    }
    // we distinguish two cases here:
    // 1) The partition fits entirely into main memory. That is the case if we have enough
    // buffers for
    // all partition segments, plus enough buffers to hold the table structure.
    // --> We read the partition in as it is and create a hashtable that references only
    // that single partition.
    // 2) We can not guarantee that enough memory segments are available and read the partition
    // in, distributing its data among newly created partitions.
    final int totalBuffersAvailable = this.internalPool.freePages() + this.buildSpillRetBufferNumbers;
    if (totalBuffersAvailable != this.totalNumBuffers) {
        throw new RuntimeException(String.format("Hash Join bug in memory management: Memory buffers leaked." + " availableMemory(%s), buildSpillRetBufferNumbers(%s), reservedNumBuffers(%s)", internalPool.freePages(), buildSpillRetBufferNumbers, totalNumBuffers));
    }
    long numBuckets = p.getBuildSideRecordCount() / BinaryHashBucketArea.NUM_ENTRIES_PER_BUCKET + 1;
    // we need to consider the worst case where everything hashes to one bucket which needs to
    // overflow by the same
    // number of total buckets again. Also, one buffer needs to remain for the probing
    int maxBucketAreaBuffers = Math.max((int) (2 * (numBuckets / (this.bucketsPerSegmentMask + 1))), 1);
    final long totalBuffersNeeded = maxBucketAreaBuffers + p.getBuildSideBlockCount() + 2;
    if (totalBuffersNeeded < totalBuffersAvailable) {
        LOG.info(String.format("Build in memory hash table from spilled partition [%d]", p.getPartitionNumber()));
        // first read the partition in
        final List<MemorySegment> partitionBuffers = readAllBuffers(p.getBuildSideChannel().getChannelID(), p.getBuildSideBlockCount());
        BinaryHashBucketArea area = new BinaryHashBucketArea(this, (int) p.getBuildSideRecordCount(), maxBucketAreaBuffers, false);
        final BinaryHashPartition newPart = new BinaryHashPartition(area, this.binaryBuildSideSerializer, this.binaryProbeSideSerializer, 0, nextRecursionLevel, partitionBuffers, p.getBuildSideRecordCount(), this.segmentSize, p.getLastSegmentLimit());
        area.setPartition(newPart);
        this.partitionsBeingBuilt.add(newPart);
        // now, index the partition through a hash table
        final BinaryHashPartition.PartitionIterator pIter = newPart.newPartitionIterator();
        while (pIter.advanceNext()) {
            final int hashCode = hash(buildSideProjection.apply(pIter.getRow()).hashCode(), nextRecursionLevel);
            final int pointer = (int) pIter.getPointer();
            area.insertToBucket(hashCode, pointer, true);
        }
    } else {
        // go over the complete input and insert every element into the hash table
        // compute in how many splits, we'd need to partition the result
        final int splits = (int) (totalBuffersNeeded / totalBuffersAvailable) + 1;
        final int partitionFanOut = Math.min(Math.min(10 * splits, MAX_NUM_PARTITIONS), maxNumPartition());
        createPartitions(partitionFanOut, nextRecursionLevel);
        LOG.info(String.format("Build hybrid hash table from spilled partition [%d] with recursion level [%d]", p.getPartitionNumber(), nextRecursionLevel));
        ChannelReaderInputView inView = createInputView(p.getBuildSideChannel().getChannelID(), p.getBuildSideBlockCount(), p.getLastSegmentLimit());
        final BinaryRowChannelInputViewIterator inIter = new BinaryRowChannelInputViewIterator(inView, this.binaryBuildSideSerializer);
        BinaryRowData rec = this.binaryBuildSideSerializer.createInstance();
        while ((rec = inIter.next(rec)) != null) {
            final int hashCode = hash(this.buildSideProjection.apply(rec).hashCode(), nextRecursionLevel);
            insertIntoTable(rec, hashCode);
        }
        inView.getChannel().closeAndDelete();
        // finalize the partitions
        int buildWriteBuffers = 0;
        for (BinaryHashPartition part : this.partitionsBeingBuilt) {
            buildWriteBuffers += part.finalizeBuildPhase(this.ioManager, this.currentEnumerator);
        }
        buildSpillRetBufferNumbers += buildWriteBuffers;
    }
}
Also used : ChannelReaderInputView(org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView) BinaryRowChannelInputViewIterator(org.apache.flink.table.runtime.io.BinaryRowChannelInputViewIterator) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) MemorySegment(org.apache.flink.core.memory.MemorySegment)

Example 2 with BinaryRowChannelInputViewIterator

use of org.apache.flink.table.runtime.io.BinaryRowChannelInputViewIterator in project flink by apache.

the class BinaryHashTable method prepareNextPartition.

private boolean prepareNextPartition() throws IOException {
    // finalize and cleanup the partitions of the current table
    for (final BinaryHashPartition p : this.partitionsBeingBuilt) {
        p.finalizeProbePhase(this.internalPool, this.partitionsPending, type.needSetProbed());
    }
    this.partitionsBeingBuilt.clear();
    if (this.currentSpilledBuildSide != null) {
        this.currentSpilledBuildSide.getChannel().closeAndDelete();
        this.currentSpilledBuildSide = null;
    }
    if (this.currentSpilledProbeSide != null) {
        this.currentSpilledProbeSide.getChannel().closeAndDelete();
        this.currentSpilledProbeSide = null;
    }
    if (this.partitionsPending.isEmpty()) {
        // no more data
        return false;
    }
    // there are pending partitions
    final BinaryHashPartition p = this.partitionsPending.get(0);
    LOG.info(String.format("Begin to process spilled partition [%d]", p.getPartitionNumber()));
    if (p.probeSideRecordCounter == 0) {
        // unprobed spilled partitions are only re-processed for a build-side outer join;
        // there is no need to create a hash table since there are no probe-side records
        this.currentSpilledBuildSide = createInputView(p.getBuildSideChannel().getChannelID(), p.getBuildSideBlockCount(), p.getLastSegmentLimit());
        this.buildIterator = new WrappedRowIterator<>(new BinaryRowChannelInputViewIterator(currentSpilledBuildSide, this.binaryBuildSideSerializer), binaryBuildSideSerializer.createInstance());
        this.partitionsPending.remove(0);
        return true;
    }
    this.probeMatchedPhase = true;
    this.buildIterVisited = false;
    // build the next table; memory must be allocated after this call
    buildTableFromSpilledPartition(p);
    // set the probe side
    ChannelWithMeta channelWithMeta = new ChannelWithMeta(p.probeSideBuffer.getChannel().getChannelID(), p.probeSideBuffer.getBlockCount(), p.probeNumBytesInLastSeg);
    this.currentSpilledProbeSide = FileChannelUtil.createInputView(ioManager, channelWithMeta, new ArrayList<>(), compressionEnable, compressionCodecFactory, compressionBlockSize, segmentSize);
    ChannelReaderInputViewIterator<BinaryRowData> probeReader = new ChannelReaderInputViewIterator(this.currentSpilledProbeSide, new ArrayList<>(), this.binaryProbeSideSerializer);
    this.probeIterator.set(probeReader);
    this.probeIterator.setReuse(binaryProbeSideSerializer.createInstance());
    // unregister the pending partition
    this.partitionsPending.remove(0);
    this.currentRecursionDepth = p.getRecursionLevel() + 1;
    // recursively get the next
    return nextMatching();
}
Also used : BinaryRowChannelInputViewIterator(org.apache.flink.table.runtime.io.BinaryRowChannelInputViewIterator) ArrayList(java.util.ArrayList) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) ChannelReaderInputViewIterator(org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator) ChannelWithMeta(org.apache.flink.table.runtime.io.ChannelWithMeta)

Aggregations

BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)2 BinaryRowChannelInputViewIterator (org.apache.flink.table.runtime.io.BinaryRowChannelInputViewIterator)2 ArrayList (java.util.ArrayList)1 MemorySegment (org.apache.flink.core.memory.MemorySegment)1 ChannelReaderInputViewIterator (org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator)1 ChannelReaderInputView (org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView)1 ChannelWithMeta (org.apache.flink.table.runtime.io.ChannelWithMeta)1