use of org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator in project flink by apache.
the class MutableHashTable method prepareNextPartition.
protected boolean prepareNextPartition() throws IOException {
// finalize and cleanup the partitions of the current table
int buffersAvailable = 0;
for (int i = 0; i < this.partitionsBeingBuilt.size(); i++) {
final HashPartition<BT, PT> p = this.partitionsBeingBuilt.get(i);
p.setFurtherPatitioning(this.furtherPartitioning);
buffersAvailable += p.finalizeProbePhase(this.availableMemory, this.partitionsPending, this.buildSideOuterJoin);
}
this.partitionsBeingBuilt.clear();
this.writeBehindBuffersAvailable += buffersAvailable;
releaseTable();
if (this.currentSpilledBuildSide != null) {
this.currentSpilledBuildSide.closeAndDelete();
this.currentSpilledBuildSide = null;
}
if (this.currentSpilledProbeSide != null) {
this.currentSpilledProbeSide.closeAndDelete();
this.currentSpilledProbeSide = null;
}
if (this.partitionsPending.isEmpty()) {
// no more data
return false;
}
// there are pending partitions
final HashPartition<BT, PT> p = this.partitionsPending.get(0);
if (p.probeSideRecordCounter == 0) {
// unprobed spilled partitions are only re-processed for a build-side outer join;
// there is no need to create a hash table since there are no probe-side records
List<MemorySegment> memory = new ArrayList<MemorySegment>();
MemorySegment seg1 = getNextBuffer();
if (seg1 != null) {
memory.add(seg1);
MemorySegment seg2 = getNextBuffer();
if (seg2 != null) {
memory.add(seg2);
}
} else {
throw new IllegalStateException("Attempting to begin reading spilled partition without any memory available");
}
this.currentSpilledBuildSide = this.ioManager.createBlockChannelReader(p.getBuildSideChannel().getChannelID());
final ChannelReaderInputView inView = new HeaderlessChannelReaderInputView(currentSpilledBuildSide, memory, p.getBuildSideBlockCount(), p.getLastSegmentLimit(), false);
final ChannelReaderInputViewIterator<BT> inIter = new ChannelReaderInputViewIterator<BT>(inView, this.availableMemory, this.buildSideSerializer);
this.unmatchedBuildIterator = inIter;
this.partitionsPending.remove(0);
return true;
}
this.probeMatchedPhase = true;
this.unmatchedBuildVisited = false;
// build the next table; memory must be allocated after this call
buildTableFromSpilledPartition(p);
// set the probe side - gather memory segments for reading
LinkedBlockingQueue<MemorySegment> returnQueue = new LinkedBlockingQueue<MemorySegment>();
this.currentSpilledProbeSide = this.ioManager.createBlockChannelReader(p.getProbeSideChannel().getChannelID(), returnQueue);
List<MemorySegment> memory = new ArrayList<MemorySegment>();
MemorySegment seg1 = getNextBuffer();
if (seg1 != null) {
memory.add(seg1);
MemorySegment seg2 = getNextBuffer();
if (seg2 != null) {
memory.add(seg2);
}
} else {
throw new IllegalStateException("Attempting to begin probing of partition without any memory available");
}
ChannelReaderInputViewIterator<PT> probeReader = new ChannelReaderInputViewIterator<PT>(this.currentSpilledProbeSide, returnQueue, memory, this.availableMemory, this.probeSideSerializer, p.getProbeSideBlockCount());
this.probeIterator.set(probeReader);
// unregister the pending partition
this.partitionsPending.remove(0);
this.currentRecursionDepth = p.getRecursionLevel() + 1;
// recursively get the next
return nextRecord();
}
use of org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator in project flink by apache.
the class FixedLengthRecordSorterTest method testFlushPartialMemoryPage.
@Test
public void testFlushPartialMemoryPage() throws Exception {
// Insert IntPair which would fill 2 memory pages.
final int NUM_RECORDS = 2 * MEMORY_PAGE_SIZE / 8;
final List<MemorySegment> memory = this.memoryManager.allocatePages(new DummyInvokable(), 3);
FixedLengthRecordSorter<IntPair> sorter = newSortBuffer(memory);
UniformIntPairGenerator generator = new UniformIntPairGenerator(Integer.MAX_VALUE, 1, false);
// write the records
IntPair record = new IntPair();
int num = -1;
do {
generator.next(record);
num++;
} while (sorter.write(record) && num < NUM_RECORDS);
FileIOChannel.ID channelID = this.ioManager.createChannelEnumerator().next();
BlockChannelWriter<MemorySegment> blockChannelWriter = this.ioManager.createBlockChannelWriter(channelID);
final List<MemorySegment> writeBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
ChannelWriterOutputView outputView = new ChannelWriterOutputView(blockChannelWriter, writeBuffer, writeBuffer.get(0).size());
sorter.writeToOutput(outputView, 1, NUM_RECORDS - 1);
this.memoryManager.release(outputView.close());
BlockChannelReader<MemorySegment> blockChannelReader = this.ioManager.createBlockChannelReader(channelID);
final List<MemorySegment> readBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
ChannelReaderInputView readerInputView = new ChannelReaderInputView(blockChannelReader, readBuffer, false);
final List<MemorySegment> dataBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
ChannelReaderInputViewIterator<IntPair> iterator = new ChannelReaderInputViewIterator(readerInputView, dataBuffer, this.serializer);
record = iterator.next(record);
int i = 1;
while (record != null) {
Assert.assertEquals(i, record.getKey());
record = iterator.next(record);
i++;
}
Assert.assertEquals(NUM_RECORDS, i);
this.memoryManager.release(dataBuffer);
// release the memory occupied by the buffers
sorter.dispose();
this.memoryManager.release(memory);
}
use of org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator in project flink by apache.
the class MutableHashTable method buildTableFromSpilledPartition.
protected void buildTableFromSpilledPartition(final HashPartition<BT, PT> p) throws IOException {
final int nextRecursionLevel = p.getRecursionLevel() + 1;
if (nextRecursionLevel > MAX_RECURSION_DEPTH) {
throw new RuntimeException("Hash join exceeded maximum number of recursions, without reducing " + "partitions enough to be memory resident. Probably cause: Too many duplicate keys.");
}
// we distinguish two cases here:
// 1) The partition fits entirely into main memory. That is the case if we have enough
// buffers for
// all partition segments, plus enough buffers to hold the table structure.
// --> We read the partition in as it is and create a hashtable that references only
// that single partition.
// 2) We can not guarantee that enough memory segments are available and read the partition
// in, distributing its data among newly created partitions.
final int totalBuffersAvailable = this.availableMemory.size() + this.writeBehindBuffersAvailable;
if (totalBuffersAvailable != this.totalNumBuffers - this.numWriteBehindBuffers) {
throw new RuntimeException("Hash Join bug in memory management: Memory buffers leaked.");
}
long numBuckets = p.getBuildSideRecordCount() / NUM_ENTRIES_PER_BUCKET + 1;
// we need to consider the worst case where everything hashes to one bucket which needs to
// overflow by the same
// number of total buckets again. Also, one buffer needs to remain for the probing
final long totalBuffersNeeded = 2 * (numBuckets / (this.bucketsPerSegmentMask + 1)) + p.getBuildSideBlockCount() + 2;
if (totalBuffersNeeded < totalBuffersAvailable) {
// we are guaranteed to stay in memory
ensureNumBuffersReturned(p.getBuildSideBlockCount());
// first read the partition in
final BulkBlockChannelReader reader = this.ioManager.createBulkBlockChannelReader(p.getBuildSideChannel().getChannelID(), this.availableMemory, p.getBuildSideBlockCount());
// call waits until all is read
if (keepBuildSidePartitions && p.recursionLevel == 0) {
// keep the partitions
reader.close();
} else {
reader.closeAndDelete();
}
final List<MemorySegment> partitionBuffers = reader.getFullSegments();
final HashPartition<BT, PT> newPart = new HashPartition<BT, PT>(this.buildSideSerializer, this.probeSideSerializer, 0, nextRecursionLevel, partitionBuffers, p.getBuildSideRecordCount(), this.segmentSize, p.getLastSegmentLimit());
this.partitionsBeingBuilt.add(newPart);
// erect the buckets
initTable((int) numBuckets, (byte) 1);
// now, index the partition through a hash table
final HashPartition<BT, PT>.PartitionIterator pIter = newPart.getPartitionIterator(this.buildSideComparator);
BT record = this.buildSideSerializer.createInstance();
while ((record = pIter.next(record)) != null) {
final int hashCode = hash(pIter.getCurrentHashCode(), nextRecursionLevel);
final int posHashCode = hashCode % this.numBuckets;
final long pointer = pIter.getPointer();
// get the bucket for the given hash code
final int bucketArrayPos = posHashCode >> this.bucketsPerSegmentBits;
final int bucketInSegmentPos = (posHashCode & this.bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;
final MemorySegment bucket = this.buckets[bucketArrayPos];
insertBucketEntry(newPart, bucket, bucketInSegmentPos, hashCode, pointer, false);
}
} else {
// we need to partition and partially spill
final int avgRecordLenPartition = (int) (((long) p.getBuildSideBlockCount()) * this.segmentSize / p.getBuildSideRecordCount());
final int bucketCount = getInitialTableSize(totalBuffersAvailable, this.segmentSize, getPartitioningFanOutNoEstimates(totalBuffersAvailable), avgRecordLenPartition);
// compute in how many splits, we'd need to partition the result
final int splits = (int) (totalBuffersNeeded / totalBuffersAvailable) + 1;
final int partitionFanOut = Math.min(10 * splits, /* being conservative */
MAX_NUM_PARTITIONS);
createPartitions(partitionFanOut, nextRecursionLevel);
// set up the table structure. the write behind buffers are taken away, as are one
// buffer per partition
initTable(bucketCount, (byte) partitionFanOut);
// go over the complete input and insert every element into the hash table
// first set up the reader with some memory.
final List<MemorySegment> segments = new ArrayList<MemorySegment>(2);
segments.add(getNextBuffer());
segments.add(getNextBuffer());
final BlockChannelReader<MemorySegment> inReader = this.ioManager.createBlockChannelReader(p.getBuildSideChannel().getChannelID());
final ChannelReaderInputView inView = new HeaderlessChannelReaderInputView(inReader, segments, p.getBuildSideBlockCount(), p.getLastSegmentLimit(), false);
final ChannelReaderInputViewIterator<BT> inIter = new ChannelReaderInputViewIterator<BT>(inView, this.availableMemory, this.buildSideSerializer);
final TypeComparator<BT> btComparator = this.buildSideComparator;
BT rec = this.buildSideSerializer.createInstance();
while ((rec = inIter.next(rec)) != null) {
final int hashCode = hash(btComparator.hash(rec), nextRecursionLevel);
insertIntoTable(rec, hashCode);
}
if (keepBuildSidePartitions && p.recursionLevel == 0) {
// keep the partitions
inReader.close();
} else {
inReader.closeAndDelete();
}
// finalize the partitions
for (int i = 0; i < this.partitionsBeingBuilt.size(); i++) {
HashPartition<BT, PT> part = this.partitionsBeingBuilt.get(i);
part.finalizeBuildPhase(this.ioManager, this.currentEnumerator, this.writeBehindBuffers);
}
}
}
use of org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator in project flink by apache.
the class FixedLengthRecordSorterTest method testFlushFullMemoryPage.
@Test
public void testFlushFullMemoryPage() throws Exception {
// Insert IntPair which would fill 2 memory pages.
final int NUM_RECORDS = 2 * MEMORY_PAGE_SIZE / 8;
final List<MemorySegment> memory = this.memoryManager.allocatePages(new DummyInvokable(), 3);
FixedLengthRecordSorter<IntPair> sorter = newSortBuffer(memory);
UniformIntPairGenerator generator = new UniformIntPairGenerator(Integer.MAX_VALUE, 1, false);
// write the records
IntPair record = new IntPair();
int num = -1;
do {
generator.next(record);
num++;
} while (sorter.write(record) && num < NUM_RECORDS);
FileIOChannel.ID channelID = this.ioManager.createChannelEnumerator().next();
BlockChannelWriter<MemorySegment> blockChannelWriter = this.ioManager.createBlockChannelWriter(channelID);
final List<MemorySegment> writeBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
ChannelWriterOutputView outputView = new ChannelWriterOutputView(blockChannelWriter, writeBuffer, writeBuffer.get(0).size());
sorter.writeToOutput(outputView, 0, NUM_RECORDS);
this.memoryManager.release(outputView.close());
BlockChannelReader<MemorySegment> blockChannelReader = this.ioManager.createBlockChannelReader(channelID);
final List<MemorySegment> readBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
ChannelReaderInputView readerInputView = new ChannelReaderInputView(blockChannelReader, readBuffer, false);
final List<MemorySegment> dataBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
ChannelReaderInputViewIterator<IntPair> iterator = new ChannelReaderInputViewIterator(readerInputView, dataBuffer, this.serializer);
record = iterator.next(record);
int i = 0;
while (record != null) {
Assert.assertEquals(i, record.getKey());
record = iterator.next(record);
i++;
}
Assert.assertEquals(NUM_RECORDS, i);
this.memoryManager.release(dataBuffer);
// release the memory occupied by the buffers
sorter.dispose();
this.memoryManager.release(memory);
}
use of org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator in project flink by apache.
the class BinaryHashTable method prepareNextPartition.
private boolean prepareNextPartition() throws IOException {
// finalize and cleanup the partitions of the current table
for (final BinaryHashPartition p : this.partitionsBeingBuilt) {
p.finalizeProbePhase(this.internalPool, this.partitionsPending, type.needSetProbed());
}
this.partitionsBeingBuilt.clear();
if (this.currentSpilledBuildSide != null) {
this.currentSpilledBuildSide.getChannel().closeAndDelete();
this.currentSpilledBuildSide = null;
}
if (this.currentSpilledProbeSide != null) {
this.currentSpilledProbeSide.getChannel().closeAndDelete();
this.currentSpilledProbeSide = null;
}
if (this.partitionsPending.isEmpty()) {
// no more data
return false;
}
// there are pending partitions
final BinaryHashPartition p = this.partitionsPending.get(0);
LOG.info(String.format("Begin to process spilled partition [%d]", p.getPartitionNumber()));
if (p.probeSideRecordCounter == 0) {
// unprobed spilled partitions are only re-processed for a build-side outer join;
// there is no need to create a hash table since there are no probe-side records
this.currentSpilledBuildSide = createInputView(p.getBuildSideChannel().getChannelID(), p.getBuildSideBlockCount(), p.getLastSegmentLimit());
this.buildIterator = new WrappedRowIterator<>(new BinaryRowChannelInputViewIterator(currentSpilledBuildSide, this.binaryBuildSideSerializer), binaryBuildSideSerializer.createInstance());
this.partitionsPending.remove(0);
return true;
}
this.probeMatchedPhase = true;
this.buildIterVisited = false;
// build the next table; memory must be allocated after this call
buildTableFromSpilledPartition(p);
// set the probe side
ChannelWithMeta channelWithMeta = new ChannelWithMeta(p.probeSideBuffer.getChannel().getChannelID(), p.probeSideBuffer.getBlockCount(), p.probeNumBytesInLastSeg);
this.currentSpilledProbeSide = FileChannelUtil.createInputView(ioManager, channelWithMeta, new ArrayList<>(), compressionEnable, compressionCodecFactory, compressionBlockSize, segmentSize);
ChannelReaderInputViewIterator<BinaryRowData> probeReader = new ChannelReaderInputViewIterator(this.currentSpilledProbeSide, new ArrayList<>(), this.binaryProbeSideSerializer);
this.probeIterator.set(probeReader);
this.probeIterator.setReuse(binaryProbeSideSerializer.createInstance());
// unregister the pending partition
this.partitionsPending.remove(0);
this.currentRecursionDepth = p.getRecursionLevel() + 1;
// recursively get the next
return nextMatching();
}
Aggregations