use of org.apache.flink.core.memory.MemorySegment in project flink by apache.
the class CompactingHashTable method insertBucketEntryFromSearch.
/**
* IMPORTANT!!! We pass only the partition number, because we must make sure we get a fresh
* partition reference. The partition reference used during search for the key may have become
* invalid during the compaction.
*/
private void insertBucketEntryFromSearch(MemorySegment originalBucket, MemorySegment currentBucket, int originalBucketOffset, int currentBucketOffset, int countInCurrentBucket, long originalForwardPointer, int hashCode, long pointer, int partitionNumber) throws IOException {
boolean checkForResize = false;
if (countInCurrentBucket < NUM_ENTRIES_PER_BUCKET) {
// we are good in our current bucket, put the values
// hash code
currentBucket.putInt(currentBucketOffset + BUCKET_HEADER_LENGTH + (countInCurrentBucket * HASH_CODE_LEN), hashCode);
// pointer
currentBucket.putLong(currentBucketOffset + BUCKET_POINTER_START_OFFSET + (countInCurrentBucket * POINTER_LEN), pointer);
// update count
currentBucket.putInt(currentBucketOffset + HEADER_COUNT_OFFSET, countInCurrentBucket + 1);
} else {
// we go to a new overflow bucket
final InMemoryPartition<T> partition = this.partitions.get(partitionNumber);
MemorySegment overflowSeg;
final int overflowSegmentNum;
final int overflowBucketOffset;
// first, see if there is space for an overflow bucket remaining in the last overflow segment
if (partition.nextOverflowBucket == 0) {
// no space left in last bucket, or no bucket yet, so create an overflow segment
overflowSeg = getNextBuffer();
overflowBucketOffset = 0;
overflowSegmentNum = partition.numOverflowSegments;
// add the new overflow segment
if (partition.overflowSegments.length <= partition.numOverflowSegments) {
MemorySegment[] newSegsArray = new MemorySegment[partition.overflowSegments.length * 2];
System.arraycopy(partition.overflowSegments, 0, newSegsArray, 0, partition.overflowSegments.length);
partition.overflowSegments = newSegsArray;
}
partition.overflowSegments[partition.numOverflowSegments] = overflowSeg;
partition.numOverflowSegments++;
checkForResize = true;
} else {
// there is space in the last overflow segment
overflowSegmentNum = partition.numOverflowSegments - 1;
overflowSeg = partition.overflowSegments[overflowSegmentNum];
overflowBucketOffset = partition.nextOverflowBucket << NUM_INTRA_BUCKET_BITS;
}
// next overflow bucket is one ahead. if the segment is full, the next will be at the beginning
// of a new segment
partition.nextOverflowBucket = (partition.nextOverflowBucket == this.bucketsPerSegmentMask ? 0 : partition.nextOverflowBucket + 1);
// insert the new overflow bucket in the chain of buckets
// 1) set the old forward pointer
// 2) let the bucket in the main table point to this one
overflowSeg.putLong(overflowBucketOffset + HEADER_FORWARD_OFFSET, originalForwardPointer);
final long pointerToNewBucket = (((long) overflowSegmentNum) << 32) | ((long) overflowBucketOffset);
originalBucket.putLong(originalBucketOffset + HEADER_FORWARD_OFFSET, pointerToNewBucket);
// finally, insert the values into the overflow buckets
// hash code
overflowSeg.putInt(overflowBucketOffset + BUCKET_HEADER_LENGTH, hashCode);
// pointer
overflowSeg.putLong(overflowBucketOffset + BUCKET_POINTER_START_OFFSET, pointer);
// set the count to one
overflowSeg.putInt(overflowBucketOffset + HEADER_COUNT_OFFSET, 1);
if (checkForResize && !this.isResizing) {
// check if we should resize buckets
if (this.buckets.length <= getOverflowSegmentCount()) {
resizeHashTable();
}
}
}
}
use of org.apache.flink.core.memory.MemorySegment in project flink by apache.
the class InPlaceMutableHashTable method insert.
/**
* Inserts the given record into the hash table.
* Note: this method doesn't care about whether a record with the same key is already present.
* @param record The record to insert.
* @throws IOException (EOFException specifically, if memory ran out)
*/
@Override
public void insert(T record) throws IOException {
if (closed) {
return;
}
final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record));
final int bucket = hashCode & numBucketsMask;
// which segment contains the bucket
final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits;
final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex];
// offset of the bucket in the segment
final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits;
final long firstPointer = bucketSegment.getLong(bucketOffset);
try {
final long newFirstPointer = recordArea.appendPointerAndRecord(firstPointer, record);
bucketSegment.putLong(bucketOffset, newFirstPointer);
} catch (EOFException ex) {
compactOrThrow();
insert(record);
return;
}
numElements++;
resizeTableIfNecessary();
}
use of org.apache.flink.core.memory.MemorySegment in project flink by apache.
the class MutableHashTable method getMatchesFor.
public HashBucketIterator<BT, PT> getMatchesFor(PT record) throws IOException {
final TypeComparator<PT> probeAccessors = this.probeSideComparator;
final int hash = hash(probeAccessors.hash(record), this.currentRecursionDepth);
final int posHashCode = hash % this.numBuckets;
// get the bucket for the given hash code
final int bucketArrayPos = posHashCode >> this.bucketsPerSegmentBits;
final int bucketInSegmentOffset = (posHashCode & this.bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;
final MemorySegment bucket = this.buckets[bucketArrayPos];
// get the basic characteristics of the bucket
final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET);
final HashPartition<BT, PT> p = this.partitionsBeingBuilt.get(partitionNumber);
// for an in-memory partition, process set the return iterators, else spill the probe records
if (p.isInMemory()) {
this.recordComparator.setReference(record);
this.bucketIterator.set(bucket, p.overflowSegments, p, hash, bucketInSegmentOffset);
return this.bucketIterator;
} else {
throw new IllegalStateException("Method is not applicable to partially spilled hash tables.");
}
}
use of org.apache.flink.core.memory.MemorySegment in project flink by apache.
the class MutableHashTable method processProbeIter.
protected boolean processProbeIter() throws IOException {
final ProbeIterator<PT> probeIter = this.probeIterator;
final TypeComparator<PT> probeAccessors = this.probeSideComparator;
if (!this.probeMatchedPhase) {
return false;
}
PT next;
while ((next = probeIter.next()) != null) {
final int hash = hash(probeAccessors.hash(next), this.currentRecursionDepth);
final int posHashCode = hash % this.numBuckets;
// get the bucket for the given hash code
final int bucketArrayPos = posHashCode >> this.bucketsPerSegmentBits;
final int bucketInSegmentOffset = (posHashCode & this.bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;
final MemorySegment bucket = this.buckets[bucketArrayPos];
// get the basic characteristics of the bucket
final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET);
final HashPartition<BT, PT> p = this.partitionsBeingBuilt.get(partitionNumber);
// for an in-memory partition, process set the return iterators, else spill the probe records
if (p.isInMemory()) {
this.recordComparator.setReference(next);
this.bucketIterator.set(bucket, p.overflowSegments, p, hash, bucketInSegmentOffset);
return true;
} else {
byte status = bucket.get(bucketInSegmentOffset + HEADER_STATUS_OFFSET);
if (status == BUCKET_STATUS_IN_FILTER) {
this.bloomFilter.setBitsLocation(bucket, bucketInSegmentOffset + BUCKET_HEADER_LENGTH);
// Use BloomFilter to filter out all the probe records which would not match any key in spilled build table buckets.
if (this.bloomFilter.testHash(hash)) {
p.insertIntoProbeBuffer(next);
}
} else {
p.insertIntoProbeBuffer(next);
}
}
}
return false;
}
use of org.apache.flink.core.memory.MemorySegment in project flink by apache.
the class MutableHashTable method insertBucketEntry.
final void insertBucketEntry(final HashPartition<BT, PT> p, final MemorySegment bucket, final int bucketInSegmentPos, final int hashCode, final long pointer, final boolean spillingAllowed) throws IOException {
// find the position to put the hash code and pointer
final int count = bucket.getShort(bucketInSegmentPos + HEADER_COUNT_OFFSET);
if (count < NUM_ENTRIES_PER_BUCKET) {
// we are good in our current bucket, put the values
// hash code
bucket.putInt(bucketInSegmentPos + BUCKET_HEADER_LENGTH + (count * HASH_CODE_LEN), hashCode);
// pointer
bucket.putLong(bucketInSegmentPos + BUCKET_POINTER_START_OFFSET + (count * POINTER_LEN), pointer);
// update count
bucket.putShort(bucketInSegmentPos + HEADER_COUNT_OFFSET, (short) (count + 1));
} else {
// we need to go to the overflow buckets
final long originalForwardPointer = bucket.getLong(bucketInSegmentPos + HEADER_FORWARD_OFFSET);
final long forwardForNewBucket;
if (originalForwardPointer != BUCKET_FORWARD_POINTER_NOT_SET) {
// forward pointer set
final int overflowSegNum = (int) (originalForwardPointer >>> 32);
final int segOffset = (int) originalForwardPointer;
final MemorySegment seg = p.overflowSegments[overflowSegNum];
final short obCount = seg.getShort(segOffset + HEADER_COUNT_OFFSET);
// check if there is space in this overflow bucket
if (obCount < NUM_ENTRIES_PER_BUCKET) {
// space in this bucket and we are done
// hash code
seg.putInt(segOffset + BUCKET_HEADER_LENGTH + (obCount * HASH_CODE_LEN), hashCode);
// pointer
seg.putLong(segOffset + BUCKET_POINTER_START_OFFSET + (obCount * POINTER_LEN), pointer);
// update count
seg.putShort(segOffset + HEADER_COUNT_OFFSET, (short) (obCount + 1));
return;
} else {
// no space here, we need a new bucket. this current overflow bucket will be the
// target of the new overflow bucket
forwardForNewBucket = originalForwardPointer;
}
} else {
// no overflow bucket yet, so we need a first one
forwardForNewBucket = BUCKET_FORWARD_POINTER_NOT_SET;
}
// we need a new overflow bucket
MemorySegment overflowSeg;
final int overflowBucketNum;
final int overflowBucketOffset;
// first, see if there is space for an overflow bucket remaining in the last overflow segment
if (p.nextOverflowBucket == 0) {
// no space left in last bucket, or no bucket yet, so create an overflow segment
overflowSeg = getNextBuffer();
if (overflowSeg == null) {
// no memory available to create overflow bucket. we need to spill a partition
if (!spillingAllowed) {
throw new IOException("Hashtable memory ran out in a non-spillable situation. " + "This is probably related to wrong size calculations.");
}
final int spilledPart = spillPartition();
if (spilledPart == p.getPartitionNumber()) {
// this bucket is no longer in-memory
return;
}
overflowSeg = getNextBuffer();
if (overflowSeg == null) {
throw new RuntimeException("Bug in HybridHashJoin: No memory became available after spilling a partition.");
}
}
overflowBucketOffset = 0;
overflowBucketNum = p.numOverflowSegments;
// add the new overflow segment
if (p.overflowSegments.length <= p.numOverflowSegments) {
MemorySegment[] newSegsArray = new MemorySegment[p.overflowSegments.length * 2];
System.arraycopy(p.overflowSegments, 0, newSegsArray, 0, p.overflowSegments.length);
p.overflowSegments = newSegsArray;
}
p.overflowSegments[p.numOverflowSegments] = overflowSeg;
p.numOverflowSegments++;
} else {
// there is space in the last overflow bucket
overflowBucketNum = p.numOverflowSegments - 1;
overflowSeg = p.overflowSegments[overflowBucketNum];
overflowBucketOffset = p.nextOverflowBucket << NUM_INTRA_BUCKET_BITS;
}
// next overflow bucket is one ahead. if the segment is full, the next will be at the beginning
// of a new segment
p.nextOverflowBucket = (p.nextOverflowBucket == this.bucketsPerSegmentMask ? 0 : p.nextOverflowBucket + 1);
// insert the new overflow bucket in the chain of buckets
// 1) set the old forward pointer
// 2) let the bucket in the main table point to this one
overflowSeg.putLong(overflowBucketOffset + HEADER_FORWARD_OFFSET, forwardForNewBucket);
final long pointerToNewBucket = (((long) overflowBucketNum) << 32) | ((long) overflowBucketOffset);
bucket.putLong(bucketInSegmentPos + HEADER_FORWARD_OFFSET, pointerToNewBucket);
// finally, insert the values into the overflow buckets
// hash code
overflowSeg.putInt(overflowBucketOffset + BUCKET_HEADER_LENGTH, hashCode);
// pointer
overflowSeg.putLong(overflowBucketOffset + BUCKET_POINTER_START_OFFSET, pointer);
// set the count to one
overflowSeg.putShort(overflowBucketOffset + HEADER_COUNT_OFFSET, (short) 1);
// initiate the probed bitset to 0.
overflowSeg.putShort(overflowBucketOffset + HEADER_PROBED_FLAGS_OFFSET, (short) 0);
}
}
Aggregations