Search in sources :

Example 1 with InputViewIterator

use of org.apache.flink.runtime.io.disk.InputViewIterator in project flink by apache.

the class LargeRecordHandler method finishWriteAndSortKeys.

public MutableObjectIterator<T> finishWriteAndSortKeys(List<MemorySegment> memory) throws IOException {
    if (recordsOutFile == null || keysOutFile == null) {
        throw new IllegalStateException("The LargeRecordHandler has not spilled any records");
    }
    // close the writers and
    final int lastBlockBytesKeys;
    final int lastBlockBytesRecords;
    recordsOutFile.close();
    keysOutFile.close();
    lastBlockBytesKeys = keysOutFile.getBytesInLatestSegment();
    lastBlockBytesRecords = recordsOutFile.getBytesInLatestSegment();
    recordsOutFile = null;
    keysOutFile = null;
    final int pagesForReaders = Math.max(3 * MIN_SEGMENTS_FOR_KEY_SPILLING, Math.min(2 * MAX_SEGMENTS_FOR_KEY_SPILLING, memory.size() / 50));
    final int pagesForKeyReader = Math.min(pagesForReaders - MIN_SEGMENTS_FOR_KEY_SPILLING, MAX_SEGMENTS_FOR_KEY_SPILLING);
    final int pagesForRecordReader = pagesForReaders - pagesForKeyReader;
    // grab memory for the record reader
    ArrayList<MemorySegment> memForRecordReader = new ArrayList<MemorySegment>();
    ArrayList<MemorySegment> memForKeysReader = new ArrayList<MemorySegment>();
    for (int i = 0; i < pagesForRecordReader; i++) {
        memForRecordReader.add(memory.remove(memory.size() - 1));
    }
    for (int i = 0; i < pagesForKeyReader; i++) {
        memForKeysReader.add(memory.remove(memory.size() - 1));
    }
    keysReader = new FileChannelInputView(ioManager.createBlockChannelReader(keysChannel), memManager, memForKeysReader, lastBlockBytesKeys);
    InputViewIterator<Tuple> keyIterator = new InputViewIterator<Tuple>(keysReader, keySerializer);
    try {
        keySorter = ExternalSorter.newBuilder(memManager, memoryOwner, keySerializer, keyComparator, executionConfig).maxNumFileHandles(maxFilehandles).sortBuffers(1).enableSpilling(ioManager, 1.0f).memory(memory).objectReuse(this.executionConfig.isObjectReuseEnabled()).largeRecords(false).build(keyIterator);
    } catch (MemoryAllocationException e) {
        throw new IllegalStateException("We should not try allocating memory. Instead the sorter should use the provided memory.", e);
    }
    // wait for the sorter to sort the keys
    MutableObjectIterator<Tuple> result;
    try {
        result = keySorter.getIterator();
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
    recordsReader = new SeekableFileChannelInputView(ioManager, recordsChannel, memManager, memForRecordReader, lastBlockBytesRecords);
    return new FetchingIterator<T>(serializer, result, recordsReader, keySerializer, numKeyFields);
}
Also used : SeekableFileChannelInputView(org.apache.flink.runtime.io.disk.SeekableFileChannelInputView) MemoryAllocationException(org.apache.flink.runtime.memory.MemoryAllocationException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MemorySegment(org.apache.flink.core.memory.MemorySegment) InputViewIterator(org.apache.flink.runtime.io.disk.InputViewIterator) SeekableFileChannelInputView(org.apache.flink.runtime.io.disk.SeekableFileChannelInputView) FileChannelInputView(org.apache.flink.runtime.io.disk.FileChannelInputView) Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 2 with InputViewIterator

use of org.apache.flink.runtime.io.disk.InputViewIterator in project flink by apache.

the class GenericWriteAheadSink method notifyOfCompletedCheckpoint.

@Override
public void notifyOfCompletedCheckpoint(long checkpointId) throws Exception {
    super.notifyOfCompletedCheckpoint(checkpointId);
    synchronized (pendingCheckpoints) {
        Iterator<PendingCheckpoint> pendingCheckpointIt = pendingCheckpoints.iterator();
        while (pendingCheckpointIt.hasNext()) {
            PendingCheckpoint pendingCheckpoint = pendingCheckpointIt.next();
            long pastCheckpointId = pendingCheckpoint.checkpointId;
            int subtaskId = pendingCheckpoint.subtaskId;
            long timestamp = pendingCheckpoint.timestamp;
            StreamStateHandle streamHandle = pendingCheckpoint.stateHandle;
            if (pastCheckpointId <= checkpointId) {
                try {
                    if (!committer.isCheckpointCommitted(subtaskId, pastCheckpointId)) {
                        try (FSDataInputStream in = streamHandle.openInputStream()) {
                            boolean success = sendValues(new ReusingMutableToRegularIteratorWrapper<>(new InputViewIterator<>(new DataInputViewStreamWrapper(in), serializer), serializer), timestamp);
                            if (success) {
                                // in case the checkpoint was successfully committed,
                                // discard its state from the backend and mark it for removal
                                // in case it failed, we retry on the next checkpoint
                                committer.commitCheckpoint(subtaskId, pastCheckpointId);
                                streamHandle.discardState();
                                pendingCheckpointIt.remove();
                            }
                        }
                    } else {
                        streamHandle.discardState();
                        pendingCheckpointIt.remove();
                    }
                } catch (Exception e) {
                    // we have to break here to prevent a new (later) checkpoint
                    // from being committed before this one
                    LOG.error("Could not commit checkpoint.", e);
                    break;
                }
            }
        }
    }
}
Also used : StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) IOException(java.io.IOException) InputViewIterator(org.apache.flink.runtime.io.disk.InputViewIterator)

Example 3 with InputViewIterator

use of org.apache.flink.runtime.io.disk.InputViewIterator in project flink by apache.

the class GenericWriteAheadSink method notifyCheckpointComplete.

@Override
public void notifyCheckpointComplete(long checkpointId) throws Exception {
    super.notifyCheckpointComplete(checkpointId);
    synchronized (pendingCheckpoints) {
        Iterator<PendingCheckpoint> pendingCheckpointIt = pendingCheckpoints.iterator();
        while (pendingCheckpointIt.hasNext()) {
            PendingCheckpoint pendingCheckpoint = pendingCheckpointIt.next();
            long pastCheckpointId = pendingCheckpoint.checkpointId;
            int subtaskId = pendingCheckpoint.subtaskId;
            long timestamp = pendingCheckpoint.timestamp;
            StreamStateHandle streamHandle = pendingCheckpoint.stateHandle;
            if (pastCheckpointId <= checkpointId) {
                try {
                    if (!committer.isCheckpointCommitted(subtaskId, pastCheckpointId)) {
                        try (FSDataInputStream in = streamHandle.openInputStream()) {
                            boolean success = sendValues(new ReusingMutableToRegularIteratorWrapper<>(new InputViewIterator<>(new DataInputViewStreamWrapper(in), serializer), serializer), pastCheckpointId, timestamp);
                            if (success) {
                                // in case the checkpoint was successfully committed,
                                // discard its state from the backend and mark it for removal
                                // in case it failed, we retry on the next checkpoint
                                committer.commitCheckpoint(subtaskId, pastCheckpointId);
                                streamHandle.discardState();
                                pendingCheckpointIt.remove();
                            }
                        }
                    } else {
                        streamHandle.discardState();
                        pendingCheckpointIt.remove();
                    }
                } catch (Exception e) {
                    // we have to break here to prevent a new (later) checkpoint
                    // from being committed before this one
                    LOG.error("Could not commit checkpoint.", e);
                    break;
                }
            }
        }
    }
}
Also used : StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) IOException(java.io.IOException) InputViewIterator(org.apache.flink.runtime.io.disk.InputViewIterator)

Aggregations

IOException (java.io.IOException)3 InputViewIterator (org.apache.flink.runtime.io.disk.InputViewIterator)3 FSDataInputStream (org.apache.flink.core.fs.FSDataInputStream)2 DataInputViewStreamWrapper (org.apache.flink.core.memory.DataInputViewStreamWrapper)2 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)2 ArrayList (java.util.ArrayList)1 Tuple (org.apache.flink.api.java.tuple.Tuple)1 MemorySegment (org.apache.flink.core.memory.MemorySegment)1 FileChannelInputView (org.apache.flink.runtime.io.disk.FileChannelInputView)1 SeekableFileChannelInputView (org.apache.flink.runtime.io.disk.SeekableFileChannelInputView)1 MemoryAllocationException (org.apache.flink.runtime.memory.MemoryAllocationException)1