Search in sources :

Example 1 with SeekableFileChannelInputView

use of org.apache.flink.runtime.io.disk.SeekableFileChannelInputView in project flink by apache.

the class LargeRecordHandler method finishWriteAndSortKeys.

public MutableObjectIterator<T> finishWriteAndSortKeys(List<MemorySegment> memory) throws IOException {
    if (recordsOutFile == null || keysOutFile == null) {
        throw new IllegalStateException("The LargeRecordHandler has not spilled any records");
    }
    // close the writers and
    final int lastBlockBytesKeys;
    final int lastBlockBytesRecords;
    recordsOutFile.close();
    keysOutFile.close();
    lastBlockBytesKeys = keysOutFile.getBytesInLatestSegment();
    lastBlockBytesRecords = recordsOutFile.getBytesInLatestSegment();
    recordsOutFile = null;
    keysOutFile = null;
    final int pagesForReaders = Math.max(3 * MIN_SEGMENTS_FOR_KEY_SPILLING, Math.min(2 * MAX_SEGMENTS_FOR_KEY_SPILLING, memory.size() / 50));
    final int pagesForKeyReader = Math.min(pagesForReaders - MIN_SEGMENTS_FOR_KEY_SPILLING, MAX_SEGMENTS_FOR_KEY_SPILLING);
    final int pagesForRecordReader = pagesForReaders - pagesForKeyReader;
    // grab memory for the record reader
    ArrayList<MemorySegment> memForRecordReader = new ArrayList<MemorySegment>();
    ArrayList<MemorySegment> memForKeysReader = new ArrayList<MemorySegment>();
    for (int i = 0; i < pagesForRecordReader; i++) {
        memForRecordReader.add(memory.remove(memory.size() - 1));
    }
    for (int i = 0; i < pagesForKeyReader; i++) {
        memForKeysReader.add(memory.remove(memory.size() - 1));
    }
    keysReader = new FileChannelInputView(ioManager.createBlockChannelReader(keysChannel), memManager, memForKeysReader, lastBlockBytesKeys);
    InputViewIterator<Tuple> keyIterator = new InputViewIterator<Tuple>(keysReader, keySerializer);
    try {
        keySorter = ExternalSorter.newBuilder(memManager, memoryOwner, keySerializer, keyComparator, executionConfig).maxNumFileHandles(maxFilehandles).sortBuffers(1).enableSpilling(ioManager, 1.0f).memory(memory).objectReuse(this.executionConfig.isObjectReuseEnabled()).largeRecords(false).build(keyIterator);
    } catch (MemoryAllocationException e) {
        throw new IllegalStateException("We should not try allocating memory. Instead the sorter should use the provided memory.", e);
    }
    // wait for the sorter to sort the keys
    MutableObjectIterator<Tuple> result;
    try {
        result = keySorter.getIterator();
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
    recordsReader = new SeekableFileChannelInputView(ioManager, recordsChannel, memManager, memForRecordReader, lastBlockBytesRecords);
    return new FetchingIterator<T>(serializer, result, recordsReader, keySerializer, numKeyFields);
}
Also used : SeekableFileChannelInputView(org.apache.flink.runtime.io.disk.SeekableFileChannelInputView) MemoryAllocationException(org.apache.flink.runtime.memory.MemoryAllocationException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MemorySegment(org.apache.flink.core.memory.MemorySegment) InputViewIterator(org.apache.flink.runtime.io.disk.InputViewIterator) SeekableFileChannelInputView(org.apache.flink.runtime.io.disk.SeekableFileChannelInputView) FileChannelInputView(org.apache.flink.runtime.io.disk.FileChannelInputView) Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 2 with SeekableFileChannelInputView

use of org.apache.flink.runtime.io.disk.SeekableFileChannelInputView in project flink by apache.

the class LargeRecordHandlerITCase method fileTest.

@Test
public void fileTest() {
    final int PAGE_SIZE = 4 * 1024;
    final int NUM_PAGES = 4;
    final int NUM_RECORDS = 10;
    FileIOChannel.ID channel = null;
    try (final IOManager ioMan = new IOManagerAsync()) {
        final MemoryManager memMan = MemoryManagerBuilder.newBuilder().setMemorySize(NUM_PAGES * PAGE_SIZE).setPageSize(PAGE_SIZE).build();
        final AbstractInvokable owner = new DummyInvokable();
        final List<MemorySegment> memory = memMan.allocatePages(owner, NUM_PAGES);
        final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeVeryLongValue>(SomeVeryLongValue.class), BasicTypeInfo.BYTE_TYPE_INFO };
        final TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>> typeInfo = new TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>>(types);
        final TypeSerializer<Tuple3<Long, SomeVeryLongValue, Byte>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        channel = ioMan.createChannel();
        FileChannelOutputView out = new FileChannelOutputView(ioMan.createBlockChannelWriter(channel), memMan, memory, PAGE_SIZE);
        // add the test data
        Random rnd = new Random();
        List<Long> offsets = new ArrayList<Long>();
        for (int i = 0; i < NUM_RECORDS; i++) {
            offsets.add(out.getWriteOffset());
            long val = rnd.nextLong();
            Tuple3<Long, SomeVeryLongValue, Byte> next = new Tuple3<Long, SomeVeryLongValue, Byte>(val, new SomeVeryLongValue((int) val), (byte) val);
            serializer.serialize(next, out);
        }
        out.close();
        for (int i = 1; i < offsets.size(); i++) {
            assertTrue(offsets.get(i) > offsets.get(i - 1));
        }
        memMan.allocatePages(owner, memory, NUM_PAGES);
        SeekableFileChannelInputView in = new SeekableFileChannelInputView(ioMan, channel, memMan, memory, out.getBytesInLatestSegment());
        for (int i = 0; i < NUM_RECORDS; i++) {
            in.seek(offsets.get(i));
            Tuple3<Long, SomeVeryLongValue, Byte> next = serializer.deserialize(in);
            // key and value must be equal
            assertTrue(next.f0.intValue() == next.f1.val());
            assertTrue(next.f0.byteValue() == next.f2);
        }
        in.closeAndDelete();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ArrayList(java.util.ArrayList) FileIOChannel(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) Random(java.util.Random) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) ValueTypeInfo(org.apache.flink.api.java.typeutils.ValueTypeInfo) SeekableFileChannelInputView(org.apache.flink.runtime.io.disk.SeekableFileChannelInputView) FileChannelOutputView(org.apache.flink.runtime.io.disk.FileChannelOutputView) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) MemorySegment(org.apache.flink.core.memory.MemorySegment) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOException(java.io.IOException) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Aggregations

IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 MemorySegment (org.apache.flink.core.memory.MemorySegment)2 SeekableFileChannelInputView (org.apache.flink.runtime.io.disk.SeekableFileChannelInputView)2 Random (java.util.Random)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)1 Tuple (org.apache.flink.api.java.tuple.Tuple)1 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)1 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)1 ValueTypeInfo (org.apache.flink.api.java.typeutils.ValueTypeInfo)1 FileChannelInputView (org.apache.flink.runtime.io.disk.FileChannelInputView)1 FileChannelOutputView (org.apache.flink.runtime.io.disk.FileChannelOutputView)1 InputViewIterator (org.apache.flink.runtime.io.disk.InputViewIterator)1 FileIOChannel (org.apache.flink.runtime.io.disk.iomanager.FileIOChannel)1 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)1 IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)1 AbstractInvokable (org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable)1 MemoryAllocationException (org.apache.flink.runtime.memory.MemoryAllocationException)1 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)1