Search in sources :

Example 1 with FileChannelOutputView

use of org.apache.flink.runtime.io.disk.FileChannelOutputView in project flink by apache.

the class LargeRecordHandler method addRecord.

// --------------------------------------------------------------------------------------------
@SuppressWarnings("unchecked")
public long addRecord(T record) throws IOException {
    if (recordsOutFile == null) {
        if (closed) {
            throw new IllegalStateException("The large record handler has been closed.");
        }
        if (recordsReader != null) {
            throw new IllegalStateException("The handler has already switched to sorting.");
        }
        LOG.debug("Initializing the large record spilling...");
        // initialize the utilities
        {
            final TypeComparator<?>[] keyComps = comparator.getFlatComparators();
            numKeyFields = keyComps.length;
            Object[] keyHolder = new Object[numKeyFields];
            comparator.extractKeys(record, keyHolder, 0);
            TypeSerializer<?>[] keySers = new TypeSerializer<?>[numKeyFields];
            TypeSerializer<?>[] tupleSers = new TypeSerializer<?>[numKeyFields + 1];
            int[] keyPos = new int[numKeyFields];
            for (int i = 0; i < numKeyFields; i++) {
                keyPos[i] = i;
                keySers[i] = createSerializer(keyHolder[i], i);
                tupleSers[i] = keySers[i];
            }
            // add the long serializer for the offset
            tupleSers[numKeyFields] = LongSerializer.INSTANCE;
            keySerializer = new TupleSerializer<Tuple>((Class<Tuple>) Tuple.getTupleClass(numKeyFields + 1), tupleSers);
            keyComparator = new TupleComparator<Tuple>(keyPos, keyComps, keySers);
            keySerializerFactory = new RuntimeSerializerFactory<Tuple>(keySerializer, keySerializer.getTupleClass());
            keyTuple = keySerializer.createInstance();
        }
        // initialize the spilling
        final int totalNumSegments = memory.size();
        final int segmentsForKeys = (totalNumSegments >= 2 * MAX_SEGMENTS_FOR_KEY_SPILLING) ? MAX_SEGMENTS_FOR_KEY_SPILLING : Math.max(MIN_SEGMENTS_FOR_KEY_SPILLING, totalNumSegments - MAX_SEGMENTS_FOR_KEY_SPILLING);
        List<MemorySegment> recordsMemory = new ArrayList<MemorySegment>();
        List<MemorySegment> keysMemory = new ArrayList<MemorySegment>();
        for (int i = 0; i < segmentsForKeys; i++) {
            keysMemory.add(memory.get(i));
        }
        for (int i = segmentsForKeys; i < totalNumSegments; i++) {
            recordsMemory.add(memory.get(i));
        }
        recordsChannel = ioManager.createChannel();
        keysChannel = ioManager.createChannel();
        recordsOutFile = new FileChannelOutputView(ioManager.createBlockChannelWriter(recordsChannel), memManager, recordsMemory, memManager.getPageSize());
        keysOutFile = new FileChannelOutputView(ioManager.createBlockChannelWriter(keysChannel), memManager, keysMemory, memManager.getPageSize());
    }
    final long offset = recordsOutFile.getWriteOffset();
    if (offset < 0) {
        throw new RuntimeException("wrong offset");
    }
    Object[] keyHolder = new Object[numKeyFields];
    comparator.extractKeys(record, keyHolder, 0);
    for (int i = 0; i < numKeyFields; i++) {
        keyTuple.setField(keyHolder[i], i);
    }
    keyTuple.setField(offset, numKeyFields);
    keySerializer.serialize(keyTuple, keysOutFile);
    serializer.serialize(record, recordsOutFile);
    recordCounter++;
    return offset;
}
Also used : FileChannelOutputView(org.apache.flink.runtime.io.disk.FileChannelOutputView) RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) ArrayList(java.util.ArrayList) TupleComparator(org.apache.flink.api.java.typeutils.runtime.TupleComparator) MemorySegment(org.apache.flink.core.memory.MemorySegment) TupleSerializer(org.apache.flink.api.java.typeutils.runtime.TupleSerializer) Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 2 with FileChannelOutputView

use of org.apache.flink.runtime.io.disk.FileChannelOutputView in project flink by apache.

the class LargeRecordHandlerITCase method fileTest.

@Test
public void fileTest() {
    final IOManager ioMan = new IOManagerAsync();
    final int PAGE_SIZE = 4 * 1024;
    final int NUM_PAGES = 4;
    final int NUM_RECORDS = 10;
    FileIOChannel.ID channel = null;
    try {
        final MemoryManager memMan = new MemoryManager(NUM_PAGES * PAGE_SIZE, 1, PAGE_SIZE, MemoryType.HEAP, true);
        final AbstractInvokable owner = new DummyInvokable();
        final List<MemorySegment> memory = memMan.allocatePages(owner, NUM_PAGES);
        final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeVeryLongValue>(SomeVeryLongValue.class), BasicTypeInfo.BYTE_TYPE_INFO };
        final TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>> typeInfo = new TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>>(types);
        final TypeSerializer<Tuple3<Long, SomeVeryLongValue, Byte>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        channel = ioMan.createChannel();
        FileChannelOutputView out = new FileChannelOutputView(ioMan.createBlockChannelWriter(channel), memMan, memory, PAGE_SIZE);
        // add the test data
        Random rnd = new Random();
        List<Long> offsets = new ArrayList<Long>();
        for (int i = 0; i < NUM_RECORDS; i++) {
            offsets.add(out.getWriteOffset());
            long val = rnd.nextLong();
            Tuple3<Long, SomeVeryLongValue, Byte> next = new Tuple3<Long, SomeVeryLongValue, Byte>(val, new SomeVeryLongValue((int) val), (byte) val);
            serializer.serialize(next, out);
        }
        out.close();
        for (int i = 1; i < offsets.size(); i++) {
            assertTrue(offsets.get(i) > offsets.get(i - 1));
        }
        memMan.allocatePages(owner, memory, NUM_PAGES);
        SeekableFileChannelInputView in = new SeekableFileChannelInputView(ioMan, channel, memMan, memory, out.getBytesInLatestSegment());
        for (int i = 0; i < NUM_RECORDS; i++) {
            in.seek(offsets.get(i));
            Tuple3<Long, SomeVeryLongValue, Byte> next = serializer.deserialize(in);
            // key and value must be equal
            assertTrue(next.f0.intValue() == next.f1.val());
            assertTrue(next.f0.byteValue() == next.f2);
        }
        in.closeAndDelete();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (channel != null) {
            try {
                ioMan.deleteChannel(channel);
            } catch (IOException ignored) {
            }
        }
        ioMan.shutdown();
    }
}
Also used : ArrayList(java.util.ArrayList) FileIOChannel(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) Random(java.util.Random) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) ValueTypeInfo(org.apache.flink.api.java.typeutils.ValueTypeInfo) SeekableFileChannelInputView(org.apache.flink.runtime.io.disk.SeekableFileChannelInputView) FileChannelOutputView(org.apache.flink.runtime.io.disk.FileChannelOutputView) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) IOException(java.io.IOException) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) MemorySegment(org.apache.flink.core.memory.MemorySegment) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOException(java.io.IOException) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)2 MemorySegment (org.apache.flink.core.memory.MemorySegment)2 FileChannelOutputView (org.apache.flink.runtime.io.disk.FileChannelOutputView)2 IOException (java.io.IOException)1 Random (java.util.Random)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)1 Tuple (org.apache.flink.api.java.tuple.Tuple)1 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)1 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)1 ValueTypeInfo (org.apache.flink.api.java.typeutils.ValueTypeInfo)1 RuntimeSerializerFactory (org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory)1 TupleComparator (org.apache.flink.api.java.typeutils.runtime.TupleComparator)1 TupleSerializer (org.apache.flink.api.java.typeutils.runtime.TupleSerializer)1 SeekableFileChannelInputView (org.apache.flink.runtime.io.disk.SeekableFileChannelInputView)1 FileIOChannel (org.apache.flink.runtime.io.disk.iomanager.FileIOChannel)1 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)1 IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)1 AbstractInvokable (org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable)1 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)1