Search in sources :

Example 41 with MemorySegment

use of org.apache.flink.core.memory.MemorySegment in project flink by apache.

the class CompactingHashTable method insertBucketEntryFromSearch.

/**
	 * IMPORTANT!!! We pass only the partition number, because we must make sure we get a fresh
	 * partition reference. The partition reference used during search for the key may have become
	 * invalid during the compaction.
	 */
private void insertBucketEntryFromSearch(MemorySegment originalBucket, MemorySegment currentBucket, int originalBucketOffset, int currentBucketOffset, int countInCurrentBucket, long originalForwardPointer, int hashCode, long pointer, int partitionNumber) throws IOException {
    boolean checkForResize = false;
    if (countInCurrentBucket < NUM_ENTRIES_PER_BUCKET) {
        // we are good in our current bucket, put the values
        // hash code
        currentBucket.putInt(currentBucketOffset + BUCKET_HEADER_LENGTH + (countInCurrentBucket * HASH_CODE_LEN), hashCode);
        // pointer
        currentBucket.putLong(currentBucketOffset + BUCKET_POINTER_START_OFFSET + (countInCurrentBucket * POINTER_LEN), pointer);
        // update count
        currentBucket.putInt(currentBucketOffset + HEADER_COUNT_OFFSET, countInCurrentBucket + 1);
    } else {
        // we go to a new overflow bucket
        final InMemoryPartition<T> partition = this.partitions.get(partitionNumber);
        MemorySegment overflowSeg;
        final int overflowSegmentNum;
        final int overflowBucketOffset;
        // first, see if there is space for an overflow bucket remaining in the last overflow segment
        if (partition.nextOverflowBucket == 0) {
            // no space left in last bucket, or no bucket yet, so create an overflow segment
            overflowSeg = getNextBuffer();
            overflowBucketOffset = 0;
            overflowSegmentNum = partition.numOverflowSegments;
            // add the new overflow segment
            if (partition.overflowSegments.length <= partition.numOverflowSegments) {
                MemorySegment[] newSegsArray = new MemorySegment[partition.overflowSegments.length * 2];
                System.arraycopy(partition.overflowSegments, 0, newSegsArray, 0, partition.overflowSegments.length);
                partition.overflowSegments = newSegsArray;
            }
            partition.overflowSegments[partition.numOverflowSegments] = overflowSeg;
            partition.numOverflowSegments++;
            checkForResize = true;
        } else {
            // there is space in the last overflow segment
            overflowSegmentNum = partition.numOverflowSegments - 1;
            overflowSeg = partition.overflowSegments[overflowSegmentNum];
            overflowBucketOffset = partition.nextOverflowBucket << NUM_INTRA_BUCKET_BITS;
        }
        // next overflow bucket is one ahead. if the segment is full, the next will be at the beginning
        // of a new segment
        partition.nextOverflowBucket = (partition.nextOverflowBucket == this.bucketsPerSegmentMask ? 0 : partition.nextOverflowBucket + 1);
        // insert the new overflow bucket in the chain of buckets
        // 1) set the old forward pointer
        // 2) let the bucket in the main table point to this one
        overflowSeg.putLong(overflowBucketOffset + HEADER_FORWARD_OFFSET, originalForwardPointer);
        final long pointerToNewBucket = (((long) overflowSegmentNum) << 32) | ((long) overflowBucketOffset);
        originalBucket.putLong(originalBucketOffset + HEADER_FORWARD_OFFSET, pointerToNewBucket);
        // finally, insert the values into the overflow buckets
        // hash code
        overflowSeg.putInt(overflowBucketOffset + BUCKET_HEADER_LENGTH, hashCode);
        // pointer
        overflowSeg.putLong(overflowBucketOffset + BUCKET_POINTER_START_OFFSET, pointer);
        // set the count to one
        overflowSeg.putInt(overflowBucketOffset + HEADER_COUNT_OFFSET, 1);
        if (checkForResize && !this.isResizing) {
            // check if we should resize buckets
            if (this.buckets.length <= getOverflowSegmentCount()) {
                resizeHashTable();
            }
        }
    }
}
Also used : MemorySegment(org.apache.flink.core.memory.MemorySegment)

Example 42 with MemorySegment

use of org.apache.flink.core.memory.MemorySegment in project flink by apache.

the class InPlaceMutableHashTable method insert.

/**
	 * Inserts the given record into the hash table.
	 * Note: this method doesn't care about whether a record with the same key is already present.
	 * @param record The record to insert.
	 * @throws IOException (EOFException specifically, if memory ran out)
     */
@Override
public void insert(T record) throws IOException {
    if (closed) {
        return;
    }
    final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record));
    final int bucket = hashCode & numBucketsMask;
    // which segment contains the bucket
    final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits;
    final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex];
    // offset of the bucket in the segment
    final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits;
    final long firstPointer = bucketSegment.getLong(bucketOffset);
    try {
        final long newFirstPointer = recordArea.appendPointerAndRecord(firstPointer, record);
        bucketSegment.putLong(bucketOffset, newFirstPointer);
    } catch (EOFException ex) {
        compactOrThrow();
        insert(record);
        return;
    }
    numElements++;
    resizeTableIfNecessary();
}
Also used : EOFException(java.io.EOFException) MemorySegment(org.apache.flink.core.memory.MemorySegment)

Example 43 with MemorySegment

use of org.apache.flink.core.memory.MemorySegment in project flink by apache.

the class MutableHashTable method getMatchesFor.

public HashBucketIterator<BT, PT> getMatchesFor(PT record) throws IOException {
    final TypeComparator<PT> probeAccessors = this.probeSideComparator;
    final int hash = hash(probeAccessors.hash(record), this.currentRecursionDepth);
    final int posHashCode = hash % this.numBuckets;
    // get the bucket for the given hash code
    final int bucketArrayPos = posHashCode >> this.bucketsPerSegmentBits;
    final int bucketInSegmentOffset = (posHashCode & this.bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;
    final MemorySegment bucket = this.buckets[bucketArrayPos];
    // get the basic characteristics of the bucket
    final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET);
    final HashPartition<BT, PT> p = this.partitionsBeingBuilt.get(partitionNumber);
    // for an in-memory partition, process set the return iterators, else spill the probe records
    if (p.isInMemory()) {
        this.recordComparator.setReference(record);
        this.bucketIterator.set(bucket, p.overflowSegments, p, hash, bucketInSegmentOffset);
        return this.bucketIterator;
    } else {
        throw new IllegalStateException("Method is not applicable to partially spilled hash tables.");
    }
}
Also used : MemorySegment(org.apache.flink.core.memory.MemorySegment)

Example 44 with MemorySegment

use of org.apache.flink.core.memory.MemorySegment in project flink by apache.

the class MutableHashTable method processProbeIter.

protected boolean processProbeIter() throws IOException {
    final ProbeIterator<PT> probeIter = this.probeIterator;
    final TypeComparator<PT> probeAccessors = this.probeSideComparator;
    if (!this.probeMatchedPhase) {
        return false;
    }
    PT next;
    while ((next = probeIter.next()) != null) {
        final int hash = hash(probeAccessors.hash(next), this.currentRecursionDepth);
        final int posHashCode = hash % this.numBuckets;
        // get the bucket for the given hash code
        final int bucketArrayPos = posHashCode >> this.bucketsPerSegmentBits;
        final int bucketInSegmentOffset = (posHashCode & this.bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;
        final MemorySegment bucket = this.buckets[bucketArrayPos];
        // get the basic characteristics of the bucket
        final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET);
        final HashPartition<BT, PT> p = this.partitionsBeingBuilt.get(partitionNumber);
        // for an in-memory partition, process set the return iterators, else spill the probe records
        if (p.isInMemory()) {
            this.recordComparator.setReference(next);
            this.bucketIterator.set(bucket, p.overflowSegments, p, hash, bucketInSegmentOffset);
            return true;
        } else {
            byte status = bucket.get(bucketInSegmentOffset + HEADER_STATUS_OFFSET);
            if (status == BUCKET_STATUS_IN_FILTER) {
                this.bloomFilter.setBitsLocation(bucket, bucketInSegmentOffset + BUCKET_HEADER_LENGTH);
                // Use BloomFilter to filter out all the probe records which would not match any key in spilled build table buckets.
                if (this.bloomFilter.testHash(hash)) {
                    p.insertIntoProbeBuffer(next);
                }
            } else {
                p.insertIntoProbeBuffer(next);
            }
        }
    }
    return false;
}
Also used : MemorySegment(org.apache.flink.core.memory.MemorySegment)

Example 45 with MemorySegment

use of org.apache.flink.core.memory.MemorySegment in project flink by apache.

the class MutableHashTable method insertBucketEntry.

final void insertBucketEntry(final HashPartition<BT, PT> p, final MemorySegment bucket, final int bucketInSegmentPos, final int hashCode, final long pointer, final boolean spillingAllowed) throws IOException {
    // find the position to put the hash code and pointer
    final int count = bucket.getShort(bucketInSegmentPos + HEADER_COUNT_OFFSET);
    if (count < NUM_ENTRIES_PER_BUCKET) {
        // we are good in our current bucket, put the values
        // hash code
        bucket.putInt(bucketInSegmentPos + BUCKET_HEADER_LENGTH + (count * HASH_CODE_LEN), hashCode);
        // pointer
        bucket.putLong(bucketInSegmentPos + BUCKET_POINTER_START_OFFSET + (count * POINTER_LEN), pointer);
        // update count
        bucket.putShort(bucketInSegmentPos + HEADER_COUNT_OFFSET, (short) (count + 1));
    } else {
        // we need to go to the overflow buckets
        final long originalForwardPointer = bucket.getLong(bucketInSegmentPos + HEADER_FORWARD_OFFSET);
        final long forwardForNewBucket;
        if (originalForwardPointer != BUCKET_FORWARD_POINTER_NOT_SET) {
            // forward pointer set
            final int overflowSegNum = (int) (originalForwardPointer >>> 32);
            final int segOffset = (int) originalForwardPointer;
            final MemorySegment seg = p.overflowSegments[overflowSegNum];
            final short obCount = seg.getShort(segOffset + HEADER_COUNT_OFFSET);
            // check if there is space in this overflow bucket
            if (obCount < NUM_ENTRIES_PER_BUCKET) {
                // space in this bucket and we are done
                // hash code
                seg.putInt(segOffset + BUCKET_HEADER_LENGTH + (obCount * HASH_CODE_LEN), hashCode);
                // pointer
                seg.putLong(segOffset + BUCKET_POINTER_START_OFFSET + (obCount * POINTER_LEN), pointer);
                // update count
                seg.putShort(segOffset + HEADER_COUNT_OFFSET, (short) (obCount + 1));
                return;
            } else {
                // no space here, we need a new bucket. this current overflow bucket will be the
                // target of the new overflow bucket
                forwardForNewBucket = originalForwardPointer;
            }
        } else {
            // no overflow bucket yet, so we need a first one
            forwardForNewBucket = BUCKET_FORWARD_POINTER_NOT_SET;
        }
        // we need a new overflow bucket
        MemorySegment overflowSeg;
        final int overflowBucketNum;
        final int overflowBucketOffset;
        // first, see if there is space for an overflow bucket remaining in the last overflow segment
        if (p.nextOverflowBucket == 0) {
            // no space left in last bucket, or no bucket yet, so create an overflow segment
            overflowSeg = getNextBuffer();
            if (overflowSeg == null) {
                // no memory available to create overflow bucket. we need to spill a partition
                if (!spillingAllowed) {
                    throw new IOException("Hashtable memory ran out in a non-spillable situation. " + "This is probably related to wrong size calculations.");
                }
                final int spilledPart = spillPartition();
                if (spilledPart == p.getPartitionNumber()) {
                    // this bucket is no longer in-memory
                    return;
                }
                overflowSeg = getNextBuffer();
                if (overflowSeg == null) {
                    throw new RuntimeException("Bug in HybridHashJoin: No memory became available after spilling a partition.");
                }
            }
            overflowBucketOffset = 0;
            overflowBucketNum = p.numOverflowSegments;
            // add the new overflow segment
            if (p.overflowSegments.length <= p.numOverflowSegments) {
                MemorySegment[] newSegsArray = new MemorySegment[p.overflowSegments.length * 2];
                System.arraycopy(p.overflowSegments, 0, newSegsArray, 0, p.overflowSegments.length);
                p.overflowSegments = newSegsArray;
            }
            p.overflowSegments[p.numOverflowSegments] = overflowSeg;
            p.numOverflowSegments++;
        } else {
            // there is space in the last overflow bucket
            overflowBucketNum = p.numOverflowSegments - 1;
            overflowSeg = p.overflowSegments[overflowBucketNum];
            overflowBucketOffset = p.nextOverflowBucket << NUM_INTRA_BUCKET_BITS;
        }
        // next overflow bucket is one ahead. if the segment is full, the next will be at the beginning
        // of a new segment
        p.nextOverflowBucket = (p.nextOverflowBucket == this.bucketsPerSegmentMask ? 0 : p.nextOverflowBucket + 1);
        // insert the new overflow bucket in the chain of buckets
        // 1) set the old forward pointer
        // 2) let the bucket in the main table point to this one
        overflowSeg.putLong(overflowBucketOffset + HEADER_FORWARD_OFFSET, forwardForNewBucket);
        final long pointerToNewBucket = (((long) overflowBucketNum) << 32) | ((long) overflowBucketOffset);
        bucket.putLong(bucketInSegmentPos + HEADER_FORWARD_OFFSET, pointerToNewBucket);
        // finally, insert the values into the overflow buckets
        // hash code
        overflowSeg.putInt(overflowBucketOffset + BUCKET_HEADER_LENGTH, hashCode);
        // pointer
        overflowSeg.putLong(overflowBucketOffset + BUCKET_POINTER_START_OFFSET, pointer);
        // set the count to one
        overflowSeg.putShort(overflowBucketOffset + HEADER_COUNT_OFFSET, (short) 1);
        // initiate the probed bitset to 0.
        overflowSeg.putShort(overflowBucketOffset + HEADER_PROBED_FLAGS_OFFSET, (short) 0);
    }
}
Also used : IOException(java.io.IOException) MemorySegment(org.apache.flink.core.memory.MemorySegment)

Aggregations

MemorySegment (org.apache.flink.core.memory.MemorySegment)161 Test (org.junit.Test)86 DummyInvokable (org.apache.flink.runtime.operators.testutils.DummyInvokable)38 ArrayList (java.util.ArrayList)30 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)24 IntPair (org.apache.flink.runtime.operators.testutils.types.IntPair)24 MemoryAllocationException (org.apache.flink.runtime.memory.MemoryAllocationException)22 IOException (java.io.IOException)19 TestData (org.apache.flink.runtime.operators.testutils.TestData)18 FileIOChannel (org.apache.flink.runtime.io.disk.iomanager.FileIOChannel)17 UniformIntPairGenerator (org.apache.flink.runtime.operators.testutils.UniformIntPairGenerator)16 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)15 IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)15 EOFException (java.io.EOFException)14 AbstractInvokable (org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable)14 Random (java.util.Random)11 ChannelReaderInputView (org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView)10 UniformRecordGenerator (org.apache.flink.runtime.operators.testutils.UniformRecordGenerator)9 Record (org.apache.flink.types.Record)9 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)9