Search in sources :

Example 1 with ChannelReaderInputView

use of org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView in project flink by apache.

the class ChannelViewsTest method testWriteReadNotAll.

@Test
public void testWriteReadNotAll() throws Exception {
    final TestData.TupleGenerator generator = new TestData.TupleGenerator(SEED, KEY_MAX, VALUE_SHORT_LENGTH, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
    final FileIOChannel.ID channel = this.ioManager.createChannel();
    final TypeSerializer<Tuple2<Integer, String>> serializer = TestData.getIntStringTupleSerializer();
    // create the writer output view
    List<MemorySegment> memory = this.memoryManager.allocatePages(this.parentTask, NUM_MEMORY_SEGMENTS);
    final BlockChannelWriter<MemorySegment> writer = this.ioManager.createBlockChannelWriter(channel);
    final ChannelWriterOutputView outView = new ChannelWriterOutputView(writer, memory, MEMORY_PAGE_SIZE);
    // write a number of pairs
    final Tuple2<Integer, String> rec = new Tuple2<>();
    for (int i = 0; i < NUM_PAIRS_SHORT; i++) {
        generator.next(rec);
        serializer.serialize(rec, outView);
    }
    this.memoryManager.release(outView.close());
    // create the reader input view
    memory = this.memoryManager.allocatePages(this.parentTask, NUM_MEMORY_SEGMENTS);
    final BlockChannelReader<MemorySegment> reader = this.ioManager.createBlockChannelReader(channel);
    final ChannelReaderInputView inView = new ChannelReaderInputView(reader, memory, outView.getBlockCount(), true);
    generator.reset();
    // read and re-generate all records and compare them
    final Tuple2<Integer, String> readRec = new Tuple2<>();
    for (int i = 0; i < NUM_PAIRS_SHORT / 2; i++) {
        generator.next(rec);
        serializer.deserialize(readRec, inView);
        int k1 = rec.f0;
        String v1 = rec.f1;
        int k2 = readRec.f0;
        String v2 = readRec.f1;
        Assert.assertTrue("The re-generated and the read record do not match.", k1 == k2 && v1.equals(v2));
    }
    this.memoryManager.release(inView.close());
    reader.deleteChannel();
}
Also used : TestData(org.apache.flink.runtime.operators.testutils.TestData) FileIOChannel(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel) MemorySegment(org.apache.flink.core.memory.MemorySegment) ChannelWriterOutputView(org.apache.flink.runtime.io.disk.iomanager.ChannelWriterOutputView) ChannelReaderInputView(org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 2 with ChannelReaderInputView

use of org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView in project flink by apache.

the class MutableHashTable method prepareNextPartition.

protected boolean prepareNextPartition() throws IOException {
    // finalize and cleanup the partitions of the current table
    int buffersAvailable = 0;
    for (int i = 0; i < this.partitionsBeingBuilt.size(); i++) {
        final HashPartition<BT, PT> p = this.partitionsBeingBuilt.get(i);
        p.setFurtherPatitioning(this.furtherPartitioning);
        buffersAvailable += p.finalizeProbePhase(this.availableMemory, this.partitionsPending, this.buildSideOuterJoin);
    }
    this.partitionsBeingBuilt.clear();
    this.writeBehindBuffersAvailable += buffersAvailable;
    releaseTable();
    if (this.currentSpilledBuildSide != null) {
        this.currentSpilledBuildSide.closeAndDelete();
        this.currentSpilledBuildSide = null;
    }
    if (this.currentSpilledProbeSide != null) {
        this.currentSpilledProbeSide.closeAndDelete();
        this.currentSpilledProbeSide = null;
    }
    if (this.partitionsPending.isEmpty()) {
        // no more data
        return false;
    }
    // there are pending partitions
    final HashPartition<BT, PT> p = this.partitionsPending.get(0);
    if (p.probeSideRecordCounter == 0) {
        // unprobed spilled partitions are only re-processed for a build-side outer join;
        // there is no need to create a hash table since there are no probe-side records
        List<MemorySegment> memory = new ArrayList<MemorySegment>();
        MemorySegment seg1 = getNextBuffer();
        if (seg1 != null) {
            memory.add(seg1);
            MemorySegment seg2 = getNextBuffer();
            if (seg2 != null) {
                memory.add(seg2);
            }
        } else {
            throw new IllegalStateException("Attempting to begin reading spilled partition without any memory available");
        }
        this.currentSpilledBuildSide = this.ioManager.createBlockChannelReader(p.getBuildSideChannel().getChannelID());
        final ChannelReaderInputView inView = new HeaderlessChannelReaderInputView(currentSpilledBuildSide, memory, p.getBuildSideBlockCount(), p.getLastSegmentLimit(), false);
        final ChannelReaderInputViewIterator<BT> inIter = new ChannelReaderInputViewIterator<BT>(inView, this.availableMemory, this.buildSideSerializer);
        this.unmatchedBuildIterator = inIter;
        this.partitionsPending.remove(0);
        return true;
    }
    this.probeMatchedPhase = true;
    this.unmatchedBuildVisited = false;
    // build the next table; memory must be allocated after this call
    buildTableFromSpilledPartition(p);
    // set the probe side - gather memory segments for reading
    LinkedBlockingQueue<MemorySegment> returnQueue = new LinkedBlockingQueue<MemorySegment>();
    this.currentSpilledProbeSide = this.ioManager.createBlockChannelReader(p.getProbeSideChannel().getChannelID(), returnQueue);
    List<MemorySegment> memory = new ArrayList<MemorySegment>();
    MemorySegment seg1 = getNextBuffer();
    if (seg1 != null) {
        memory.add(seg1);
        MemorySegment seg2 = getNextBuffer();
        if (seg2 != null) {
            memory.add(seg2);
        }
    } else {
        throw new IllegalStateException("Attempting to begin probing of partition without any memory available");
    }
    ChannelReaderInputViewIterator<PT> probeReader = new ChannelReaderInputViewIterator<PT>(this.currentSpilledProbeSide, returnQueue, memory, this.availableMemory, this.probeSideSerializer, p.getProbeSideBlockCount());
    this.probeIterator.set(probeReader);
    // unregister the pending partition
    this.partitionsPending.remove(0);
    this.currentRecursionDepth = p.getRecursionLevel() + 1;
    // recursively get the next
    return nextRecord();
}
Also used : ArrayList(java.util.ArrayList) ChannelReaderInputViewIterator(org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) MemorySegment(org.apache.flink.core.memory.MemorySegment) HeaderlessChannelReaderInputView(org.apache.flink.runtime.io.disk.iomanager.HeaderlessChannelReaderInputView) ChannelReaderInputView(org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView) HeaderlessChannelReaderInputView(org.apache.flink.runtime.io.disk.iomanager.HeaderlessChannelReaderInputView)

Example 3 with ChannelReaderInputView

use of org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView in project flink by apache.

the class FixedLengthRecordSorterTest method testFlushPartialMemoryPage.

@Test
public void testFlushPartialMemoryPage() throws Exception {
    // Insert IntPair which would fill 2 memory pages.
    final int NUM_RECORDS = 2 * MEMORY_PAGE_SIZE / 8;
    final List<MemorySegment> memory = this.memoryManager.allocatePages(new DummyInvokable(), 3);
    FixedLengthRecordSorter<IntPair> sorter = newSortBuffer(memory);
    UniformIntPairGenerator generator = new UniformIntPairGenerator(Integer.MAX_VALUE, 1, false);
    // write the records
    IntPair record = new IntPair();
    int num = -1;
    do {
        generator.next(record);
        num++;
    } while (sorter.write(record) && num < NUM_RECORDS);
    FileIOChannel.ID channelID = this.ioManager.createChannelEnumerator().next();
    BlockChannelWriter<MemorySegment> blockChannelWriter = this.ioManager.createBlockChannelWriter(channelID);
    final List<MemorySegment> writeBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
    ChannelWriterOutputView outputView = new ChannelWriterOutputView(blockChannelWriter, writeBuffer, writeBuffer.get(0).size());
    sorter.writeToOutput(outputView, 1, NUM_RECORDS - 1);
    this.memoryManager.release(outputView.close());
    BlockChannelReader<MemorySegment> blockChannelReader = this.ioManager.createBlockChannelReader(channelID);
    final List<MemorySegment> readBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
    ChannelReaderInputView readerInputView = new ChannelReaderInputView(blockChannelReader, readBuffer, false);
    final List<MemorySegment> dataBuffer = this.memoryManager.allocatePages(new DummyInvokable(), 3);
    ChannelReaderInputViewIterator<IntPair> iterator = new ChannelReaderInputViewIterator(readerInputView, dataBuffer, this.serializer);
    record = iterator.next(record);
    int i = 1;
    while (record != null) {
        Assert.assertEquals(i, record.getKey());
        record = iterator.next(record);
        i++;
    }
    Assert.assertEquals(NUM_RECORDS, i);
    this.memoryManager.release(dataBuffer);
    // release the memory occupied by the buffers
    sorter.dispose();
    this.memoryManager.release(memory);
}
Also used : FileIOChannel(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel) ChannelReaderInputViewIterator(org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator) IntPair(org.apache.flink.runtime.operators.testutils.types.IntPair) MemorySegment(org.apache.flink.core.memory.MemorySegment) ChannelWriterOutputView(org.apache.flink.runtime.io.disk.iomanager.ChannelWriterOutputView) ChannelReaderInputView(org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) UniformIntPairGenerator(org.apache.flink.runtime.operators.testutils.UniformIntPairGenerator) Test(org.junit.Test)

Example 4 with ChannelReaderInputView

use of org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView in project flink by apache.

the class MutableHashTable method buildTableFromSpilledPartition.

protected void buildTableFromSpilledPartition(final HashPartition<BT, PT> p) throws IOException {
    final int nextRecursionLevel = p.getRecursionLevel() + 1;
    if (nextRecursionLevel > MAX_RECURSION_DEPTH) {
        throw new RuntimeException("Hash join exceeded maximum number of recursions, without reducing " + "partitions enough to be memory resident. Probably cause: Too many duplicate keys.");
    }
    // we distinguish two cases here:
    // 1) The partition fits entirely into main memory. That is the case if we have enough buffers for
    //    all partition segments, plus enough buffers to hold the table structure.
    //    --> We read the partition in as it is and create a hashtable that references only
    //        that single partition.
    // 2) We can not guarantee that enough memory segments are available and read the partition
    //    in, distributing its data among newly created partitions.
    final int totalBuffersAvailable = this.availableMemory.size() + this.writeBehindBuffersAvailable;
    if (totalBuffersAvailable != this.totalNumBuffers - this.numWriteBehindBuffers) {
        throw new RuntimeException("Hash Join bug in memory management: Memory buffers leaked.");
    }
    long numBuckets = p.getBuildSideRecordCount() / NUM_ENTRIES_PER_BUCKET + 1;
    // we need to consider the worst case where everything hashes to one bucket which needs to overflow by the same
    // number of total buckets again. Also, one buffer needs to remain for the probing
    final long totalBuffersNeeded = 2 * (numBuckets / (this.bucketsPerSegmentMask + 1)) + p.getBuildSideBlockCount() + 2;
    if (totalBuffersNeeded < totalBuffersAvailable) {
        // we are guaranteed to stay in memory
        ensureNumBuffersReturned(p.getBuildSideBlockCount());
        // first read the partition in
        final BulkBlockChannelReader reader = this.ioManager.createBulkBlockChannelReader(p.getBuildSideChannel().getChannelID(), this.availableMemory, p.getBuildSideBlockCount());
        // call waits until all is read
        if (keepBuildSidePartitions && p.recursionLevel == 0) {
            // keep the partitions
            reader.close();
        } else {
            reader.closeAndDelete();
        }
        final List<MemorySegment> partitionBuffers = reader.getFullSegments();
        final HashPartition<BT, PT> newPart = new HashPartition<BT, PT>(this.buildSideSerializer, this.probeSideSerializer, 0, nextRecursionLevel, partitionBuffers, p.getBuildSideRecordCount(), this.segmentSize, p.getLastSegmentLimit());
        this.partitionsBeingBuilt.add(newPart);
        // erect the buckets
        initTable((int) numBuckets, (byte) 1);
        // now, index the partition through a hash table
        final HashPartition<BT, PT>.PartitionIterator<BT, PT> pIter = newPart.getPartitionIterator(this.buildSideComparator);
        BT record = this.buildSideSerializer.createInstance();
        while ((record = pIter.next(record)) != null) {
            final int hashCode = hash(pIter.getCurrentHashCode(), nextRecursionLevel);
            final int posHashCode = hashCode % this.numBuckets;
            final long pointer = pIter.getPointer();
            // get the bucket for the given hash code
            final int bucketArrayPos = posHashCode >> this.bucketsPerSegmentBits;
            final int bucketInSegmentPos = (posHashCode & this.bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;
            final MemorySegment bucket = this.buckets[bucketArrayPos];
            insertBucketEntry(newPart, bucket, bucketInSegmentPos, hashCode, pointer, false);
        }
    } else {
        // we need to partition and partially spill
        final int avgRecordLenPartition = (int) (((long) p.getBuildSideBlockCount()) * this.segmentSize / p.getBuildSideRecordCount());
        final int bucketCount = getInitialTableSize(totalBuffersAvailable, this.segmentSize, getPartitioningFanOutNoEstimates(totalBuffersAvailable), avgRecordLenPartition);
        // compute in how many splits, we'd need to partition the result 
        final int splits = (int) (totalBuffersNeeded / totalBuffersAvailable) + 1;
        final int partitionFanOut = Math.min(10 * splits, /* being conservative */
        MAX_NUM_PARTITIONS);
        createPartitions(partitionFanOut, nextRecursionLevel);
        // set up the table structure. the write behind buffers are taken away, as are one buffer per partition
        initTable(bucketCount, (byte) partitionFanOut);
        // go over the complete input and insert every element into the hash table
        // first set up the reader with some memory.
        final List<MemorySegment> segments = new ArrayList<MemorySegment>(2);
        segments.add(getNextBuffer());
        segments.add(getNextBuffer());
        final BlockChannelReader<MemorySegment> inReader = this.ioManager.createBlockChannelReader(p.getBuildSideChannel().getChannelID());
        final ChannelReaderInputView inView = new HeaderlessChannelReaderInputView(inReader, segments, p.getBuildSideBlockCount(), p.getLastSegmentLimit(), false);
        final ChannelReaderInputViewIterator<BT> inIter = new ChannelReaderInputViewIterator<BT>(inView, this.availableMemory, this.buildSideSerializer);
        final TypeComparator<BT> btComparator = this.buildSideComparator;
        BT rec = this.buildSideSerializer.createInstance();
        while ((rec = inIter.next(rec)) != null) {
            final int hashCode = hash(btComparator.hash(rec), nextRecursionLevel);
            insertIntoTable(rec, hashCode);
        }
        if (keepBuildSidePartitions && p.recursionLevel == 0) {
            // keep the partitions
            inReader.close();
        } else {
            inReader.closeAndDelete();
        }
        // finalize the partitions
        for (int i = 0; i < this.partitionsBeingBuilt.size(); i++) {
            HashPartition<BT, PT> part = this.partitionsBeingBuilt.get(i);
            part.finalizeBuildPhase(this.ioManager, this.currentEnumerator, this.writeBehindBuffers);
        }
    }
}
Also used : BulkBlockChannelReader(org.apache.flink.runtime.io.disk.iomanager.BulkBlockChannelReader) ArrayList(java.util.ArrayList) ChannelReaderInputViewIterator(org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator) MemorySegment(org.apache.flink.core.memory.MemorySegment) HeaderlessChannelReaderInputView(org.apache.flink.runtime.io.disk.iomanager.HeaderlessChannelReaderInputView) ChannelReaderInputView(org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView) HeaderlessChannelReaderInputView(org.apache.flink.runtime.io.disk.iomanager.HeaderlessChannelReaderInputView)

Example 5 with ChannelReaderInputView

use of org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView in project flink by apache.

the class ChannelViewsTest method testReadTooMany.

@Test
public void testReadTooMany() throws Exception {
    final TestData.TupleGenerator generator = new TestData.TupleGenerator(SEED, KEY_MAX, VALUE_SHORT_LENGTH, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
    final FileIOChannel.ID channel = this.ioManager.createChannel();
    final TypeSerializer<Tuple2<Integer, String>> serializer = TestData.getIntStringTupleSerializer();
    // create the writer output view
    List<MemorySegment> memory = this.memoryManager.allocatePages(this.parentTask, NUM_MEMORY_SEGMENTS);
    final BlockChannelWriter<MemorySegment> writer = this.ioManager.createBlockChannelWriter(channel);
    final ChannelWriterOutputView outView = new ChannelWriterOutputView(writer, memory, MEMORY_PAGE_SIZE);
    // write a number of pairs
    final Tuple2<Integer, String> rec = new Tuple2<>();
    for (int i = 0; i < NUM_PAIRS_SHORT; i++) {
        generator.next(rec);
        serializer.serialize(rec, outView);
    }
    this.memoryManager.release(outView.close());
    // create the reader input view
    memory = this.memoryManager.allocatePages(this.parentTask, NUM_MEMORY_SEGMENTS);
    final BlockChannelReader<MemorySegment> reader = this.ioManager.createBlockChannelReader(channel);
    final ChannelReaderInputView inView = new ChannelReaderInputView(reader, memory, outView.getBlockCount(), true);
    generator.reset();
    // read and re-generate all records and compare them
    try {
        final Tuple2<Integer, String> readRec = new Tuple2<>();
        for (int i = 0; i < NUM_PAIRS_SHORT + 1; i++) {
            generator.next(rec);
            serializer.deserialize(readRec, inView);
            final int k1 = rec.f0;
            final String v1 = rec.f1;
            final int k2 = readRec.f0;
            final String v2 = readRec.f1;
            Assert.assertTrue("The re-generated and the read record do not match.", k1 == k2 && v1.equals(v2));
        }
        Assert.fail("Expected an EOFException which did not occur.");
    } catch (EOFException eofex) {
    // expected
    } catch (Throwable t) {
        // unexpected
        Assert.fail("Unexpected Exception: " + t.getMessage());
    }
    this.memoryManager.release(inView.close());
    reader.deleteChannel();
}
Also used : TestData(org.apache.flink.runtime.operators.testutils.TestData) FileIOChannel(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel) MemorySegment(org.apache.flink.core.memory.MemorySegment) ChannelWriterOutputView(org.apache.flink.runtime.io.disk.iomanager.ChannelWriterOutputView) ChannelReaderInputView(org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView) Tuple2(org.apache.flink.api.java.tuple.Tuple2) EOFException(java.io.EOFException) Test(org.junit.Test)

Aggregations

MemorySegment (org.apache.flink.core.memory.MemorySegment)10 ChannelReaderInputView (org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView)10 ChannelWriterOutputView (org.apache.flink.runtime.io.disk.iomanager.ChannelWriterOutputView)8 FileIOChannel (org.apache.flink.runtime.io.disk.iomanager.FileIOChannel)8 Test (org.junit.Test)8 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)6 TestData (org.apache.flink.runtime.operators.testutils.TestData)6 ChannelReaderInputViewIterator (org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator)4 ArrayList (java.util.ArrayList)2 HeaderlessChannelReaderInputView (org.apache.flink.runtime.io.disk.iomanager.HeaderlessChannelReaderInputView)2 DummyInvokable (org.apache.flink.runtime.operators.testutils.DummyInvokable)2 UniformIntPairGenerator (org.apache.flink.runtime.operators.testutils.UniformIntPairGenerator)2 IntPair (org.apache.flink.runtime.operators.testutils.types.IntPair)2 EOFException (java.io.EOFException)1 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)1 BulkBlockChannelReader (org.apache.flink.runtime.io.disk.iomanager.BulkBlockChannelReader)1