use of org.apache.flink.runtime.io.disk.SeekableFileChannelInputView in project flink by apache.
the class LargeRecordHandler method finishWriteAndSortKeys.
public MutableObjectIterator<T> finishWriteAndSortKeys(List<MemorySegment> memory) throws IOException {
if (recordsOutFile == null || keysOutFile == null) {
throw new IllegalStateException("The LargeRecordHandler has not spilled any records");
}
// close the writers and
final int lastBlockBytesKeys;
final int lastBlockBytesRecords;
recordsOutFile.close();
keysOutFile.close();
lastBlockBytesKeys = keysOutFile.getBytesInLatestSegment();
lastBlockBytesRecords = recordsOutFile.getBytesInLatestSegment();
recordsOutFile = null;
keysOutFile = null;
final int pagesForReaders = Math.max(3 * MIN_SEGMENTS_FOR_KEY_SPILLING, Math.min(2 * MAX_SEGMENTS_FOR_KEY_SPILLING, memory.size() / 50));
final int pagesForKeyReader = Math.min(pagesForReaders - MIN_SEGMENTS_FOR_KEY_SPILLING, MAX_SEGMENTS_FOR_KEY_SPILLING);
final int pagesForRecordReader = pagesForReaders - pagesForKeyReader;
// grab memory for the record reader
ArrayList<MemorySegment> memForRecordReader = new ArrayList<MemorySegment>();
ArrayList<MemorySegment> memForKeysReader = new ArrayList<MemorySegment>();
for (int i = 0; i < pagesForRecordReader; i++) {
memForRecordReader.add(memory.remove(memory.size() - 1));
}
for (int i = 0; i < pagesForKeyReader; i++) {
memForKeysReader.add(memory.remove(memory.size() - 1));
}
keysReader = new FileChannelInputView(ioManager.createBlockChannelReader(keysChannel), memManager, memForKeysReader, lastBlockBytesKeys);
InputViewIterator<Tuple> keyIterator = new InputViewIterator<Tuple>(keysReader, keySerializer);
try {
keySorter = ExternalSorter.newBuilder(memManager, memoryOwner, keySerializer, keyComparator, executionConfig).maxNumFileHandles(maxFilehandles).sortBuffers(1).enableSpilling(ioManager, 1.0f).memory(memory).objectReuse(this.executionConfig.isObjectReuseEnabled()).largeRecords(false).build(keyIterator);
} catch (MemoryAllocationException e) {
throw new IllegalStateException("We should not try allocating memory. Instead the sorter should use the provided memory.", e);
}
// wait for the sorter to sort the keys
MutableObjectIterator<Tuple> result;
try {
result = keySorter.getIterator();
} catch (InterruptedException e) {
throw new IOException(e);
}
recordsReader = new SeekableFileChannelInputView(ioManager, recordsChannel, memManager, memForRecordReader, lastBlockBytesRecords);
return new FetchingIterator<T>(serializer, result, recordsReader, keySerializer, numKeyFields);
}
use of org.apache.flink.runtime.io.disk.SeekableFileChannelInputView in project flink by apache.
the class LargeRecordHandlerITCase method fileTest.
@Test
public void fileTest() {
final int PAGE_SIZE = 4 * 1024;
final int NUM_PAGES = 4;
final int NUM_RECORDS = 10;
FileIOChannel.ID channel = null;
try (final IOManager ioMan = new IOManagerAsync()) {
final MemoryManager memMan = MemoryManagerBuilder.newBuilder().setMemorySize(NUM_PAGES * PAGE_SIZE).setPageSize(PAGE_SIZE).build();
final AbstractInvokable owner = new DummyInvokable();
final List<MemorySegment> memory = memMan.allocatePages(owner, NUM_PAGES);
final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeVeryLongValue>(SomeVeryLongValue.class), BasicTypeInfo.BYTE_TYPE_INFO };
final TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>> typeInfo = new TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>>(types);
final TypeSerializer<Tuple3<Long, SomeVeryLongValue, Byte>> serializer = typeInfo.createSerializer(new ExecutionConfig());
channel = ioMan.createChannel();
FileChannelOutputView out = new FileChannelOutputView(ioMan.createBlockChannelWriter(channel), memMan, memory, PAGE_SIZE);
// add the test data
Random rnd = new Random();
List<Long> offsets = new ArrayList<Long>();
for (int i = 0; i < NUM_RECORDS; i++) {
offsets.add(out.getWriteOffset());
long val = rnd.nextLong();
Tuple3<Long, SomeVeryLongValue, Byte> next = new Tuple3<Long, SomeVeryLongValue, Byte>(val, new SomeVeryLongValue((int) val), (byte) val);
serializer.serialize(next, out);
}
out.close();
for (int i = 1; i < offsets.size(); i++) {
assertTrue(offsets.get(i) > offsets.get(i - 1));
}
memMan.allocatePages(owner, memory, NUM_PAGES);
SeekableFileChannelInputView in = new SeekableFileChannelInputView(ioMan, channel, memMan, memory, out.getBytesInLatestSegment());
for (int i = 0; i < NUM_RECORDS; i++) {
in.seek(offsets.get(i));
Tuple3<Long, SomeVeryLongValue, Byte> next = serializer.deserialize(in);
// key and value must be equal
assertTrue(next.f0.intValue() == next.f1.val());
assertTrue(next.f0.byteValue() == next.f2);
}
in.closeAndDelete();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations