use of org.apache.flink.runtime.io.disk.FileChannelOutputView in project flink by apache.
the class LargeRecordHandler method addRecord.
// --------------------------------------------------------------------------------------------
@SuppressWarnings("unchecked")
public long addRecord(T record) throws IOException {
if (recordsOutFile == null) {
if (closed) {
throw new IllegalStateException("The large record handler has been closed.");
}
if (recordsReader != null) {
throw new IllegalStateException("The handler has already switched to sorting.");
}
LOG.debug("Initializing the large record spilling...");
// initialize the utilities
{
final TypeComparator<?>[] keyComps = comparator.getFlatComparators();
numKeyFields = keyComps.length;
Object[] keyHolder = new Object[numKeyFields];
comparator.extractKeys(record, keyHolder, 0);
TypeSerializer<?>[] keySers = new TypeSerializer<?>[numKeyFields];
TypeSerializer<?>[] tupleSers = new TypeSerializer<?>[numKeyFields + 1];
int[] keyPos = new int[numKeyFields];
for (int i = 0; i < numKeyFields; i++) {
keyPos[i] = i;
keySers[i] = createSerializer(keyHolder[i], i);
tupleSers[i] = keySers[i];
}
// add the long serializer for the offset
tupleSers[numKeyFields] = LongSerializer.INSTANCE;
keySerializer = new TupleSerializer<Tuple>((Class<Tuple>) Tuple.getTupleClass(numKeyFields + 1), tupleSers);
keyComparator = new TupleComparator<Tuple>(keyPos, keyComps, keySers);
keySerializerFactory = new RuntimeSerializerFactory<Tuple>(keySerializer, keySerializer.getTupleClass());
keyTuple = keySerializer.createInstance();
}
// initialize the spilling
final int totalNumSegments = memory.size();
final int segmentsForKeys = (totalNumSegments >= 2 * MAX_SEGMENTS_FOR_KEY_SPILLING) ? MAX_SEGMENTS_FOR_KEY_SPILLING : Math.max(MIN_SEGMENTS_FOR_KEY_SPILLING, totalNumSegments - MAX_SEGMENTS_FOR_KEY_SPILLING);
List<MemorySegment> recordsMemory = new ArrayList<MemorySegment>();
List<MemorySegment> keysMemory = new ArrayList<MemorySegment>();
for (int i = 0; i < segmentsForKeys; i++) {
keysMemory.add(memory.get(i));
}
for (int i = segmentsForKeys; i < totalNumSegments; i++) {
recordsMemory.add(memory.get(i));
}
recordsChannel = ioManager.createChannel();
keysChannel = ioManager.createChannel();
recordsOutFile = new FileChannelOutputView(ioManager.createBlockChannelWriter(recordsChannel), memManager, recordsMemory, memManager.getPageSize());
keysOutFile = new FileChannelOutputView(ioManager.createBlockChannelWriter(keysChannel), memManager, keysMemory, memManager.getPageSize());
}
final long offset = recordsOutFile.getWriteOffset();
if (offset < 0) {
throw new RuntimeException("wrong offset");
}
Object[] keyHolder = new Object[numKeyFields];
comparator.extractKeys(record, keyHolder, 0);
for (int i = 0; i < numKeyFields; i++) {
keyTuple.setField(keyHolder[i], i);
}
keyTuple.setField(offset, numKeyFields);
keySerializer.serialize(keyTuple, keysOutFile);
serializer.serialize(record, recordsOutFile);
recordCounter++;
return offset;
}
use of org.apache.flink.runtime.io.disk.FileChannelOutputView in project flink by apache.
the class LargeRecordHandlerITCase method fileTest.
@Test
public void fileTest() {
final IOManager ioMan = new IOManagerAsync();
final int PAGE_SIZE = 4 * 1024;
final int NUM_PAGES = 4;
final int NUM_RECORDS = 10;
FileIOChannel.ID channel = null;
try {
final MemoryManager memMan = new MemoryManager(NUM_PAGES * PAGE_SIZE, 1, PAGE_SIZE, MemoryType.HEAP, true);
final AbstractInvokable owner = new DummyInvokable();
final List<MemorySegment> memory = memMan.allocatePages(owner, NUM_PAGES);
final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeVeryLongValue>(SomeVeryLongValue.class), BasicTypeInfo.BYTE_TYPE_INFO };
final TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>> typeInfo = new TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>>(types);
final TypeSerializer<Tuple3<Long, SomeVeryLongValue, Byte>> serializer = typeInfo.createSerializer(new ExecutionConfig());
channel = ioMan.createChannel();
FileChannelOutputView out = new FileChannelOutputView(ioMan.createBlockChannelWriter(channel), memMan, memory, PAGE_SIZE);
// add the test data
Random rnd = new Random();
List<Long> offsets = new ArrayList<Long>();
for (int i = 0; i < NUM_RECORDS; i++) {
offsets.add(out.getWriteOffset());
long val = rnd.nextLong();
Tuple3<Long, SomeVeryLongValue, Byte> next = new Tuple3<Long, SomeVeryLongValue, Byte>(val, new SomeVeryLongValue((int) val), (byte) val);
serializer.serialize(next, out);
}
out.close();
for (int i = 1; i < offsets.size(); i++) {
assertTrue(offsets.get(i) > offsets.get(i - 1));
}
memMan.allocatePages(owner, memory, NUM_PAGES);
SeekableFileChannelInputView in = new SeekableFileChannelInputView(ioMan, channel, memMan, memory, out.getBytesInLatestSegment());
for (int i = 0; i < NUM_RECORDS; i++) {
in.seek(offsets.get(i));
Tuple3<Long, SomeVeryLongValue, Byte> next = serializer.deserialize(in);
// key and value must be equal
assertTrue(next.f0.intValue() == next.f1.val());
assertTrue(next.f0.byteValue() == next.f2);
}
in.closeAndDelete();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (channel != null) {
try {
ioMan.deleteChannel(channel);
} catch (IOException ignored) {
}
}
ioMan.shutdown();
}
}
Aggregations