use of org.apache.flink.runtime.io.disk.InputViewIterator in project flink by apache.
the class LargeRecordHandler method finishWriteAndSortKeys.
public MutableObjectIterator<T> finishWriteAndSortKeys(List<MemorySegment> memory) throws IOException {
if (recordsOutFile == null || keysOutFile == null) {
throw new IllegalStateException("The LargeRecordHandler has not spilled any records");
}
// close the writers and
final int lastBlockBytesKeys;
final int lastBlockBytesRecords;
recordsOutFile.close();
keysOutFile.close();
lastBlockBytesKeys = keysOutFile.getBytesInLatestSegment();
lastBlockBytesRecords = recordsOutFile.getBytesInLatestSegment();
recordsOutFile = null;
keysOutFile = null;
final int pagesForReaders = Math.max(3 * MIN_SEGMENTS_FOR_KEY_SPILLING, Math.min(2 * MAX_SEGMENTS_FOR_KEY_SPILLING, memory.size() / 50));
final int pagesForKeyReader = Math.min(pagesForReaders - MIN_SEGMENTS_FOR_KEY_SPILLING, MAX_SEGMENTS_FOR_KEY_SPILLING);
final int pagesForRecordReader = pagesForReaders - pagesForKeyReader;
// grab memory for the record reader
ArrayList<MemorySegment> memForRecordReader = new ArrayList<MemorySegment>();
ArrayList<MemorySegment> memForKeysReader = new ArrayList<MemorySegment>();
for (int i = 0; i < pagesForRecordReader; i++) {
memForRecordReader.add(memory.remove(memory.size() - 1));
}
for (int i = 0; i < pagesForKeyReader; i++) {
memForKeysReader.add(memory.remove(memory.size() - 1));
}
keysReader = new FileChannelInputView(ioManager.createBlockChannelReader(keysChannel), memManager, memForKeysReader, lastBlockBytesKeys);
InputViewIterator<Tuple> keyIterator = new InputViewIterator<Tuple>(keysReader, keySerializer);
try {
keySorter = ExternalSorter.newBuilder(memManager, memoryOwner, keySerializer, keyComparator, executionConfig).maxNumFileHandles(maxFilehandles).sortBuffers(1).enableSpilling(ioManager, 1.0f).memory(memory).objectReuse(this.executionConfig.isObjectReuseEnabled()).largeRecords(false).build(keyIterator);
} catch (MemoryAllocationException e) {
throw new IllegalStateException("We should not try allocating memory. Instead the sorter should use the provided memory.", e);
}
// wait for the sorter to sort the keys
MutableObjectIterator<Tuple> result;
try {
result = keySorter.getIterator();
} catch (InterruptedException e) {
throw new IOException(e);
}
recordsReader = new SeekableFileChannelInputView(ioManager, recordsChannel, memManager, memForRecordReader, lastBlockBytesRecords);
return new FetchingIterator<T>(serializer, result, recordsReader, keySerializer, numKeyFields);
}
use of org.apache.flink.runtime.io.disk.InputViewIterator in project flink by apache.
the class GenericWriteAheadSink method notifyOfCompletedCheckpoint.
@Override
public void notifyOfCompletedCheckpoint(long checkpointId) throws Exception {
super.notifyOfCompletedCheckpoint(checkpointId);
synchronized (pendingCheckpoints) {
Iterator<PendingCheckpoint> pendingCheckpointIt = pendingCheckpoints.iterator();
while (pendingCheckpointIt.hasNext()) {
PendingCheckpoint pendingCheckpoint = pendingCheckpointIt.next();
long pastCheckpointId = pendingCheckpoint.checkpointId;
int subtaskId = pendingCheckpoint.subtaskId;
long timestamp = pendingCheckpoint.timestamp;
StreamStateHandle streamHandle = pendingCheckpoint.stateHandle;
if (pastCheckpointId <= checkpointId) {
try {
if (!committer.isCheckpointCommitted(subtaskId, pastCheckpointId)) {
try (FSDataInputStream in = streamHandle.openInputStream()) {
boolean success = sendValues(new ReusingMutableToRegularIteratorWrapper<>(new InputViewIterator<>(new DataInputViewStreamWrapper(in), serializer), serializer), timestamp);
if (success) {
// in case the checkpoint was successfully committed,
// discard its state from the backend and mark it for removal
// in case it failed, we retry on the next checkpoint
committer.commitCheckpoint(subtaskId, pastCheckpointId);
streamHandle.discardState();
pendingCheckpointIt.remove();
}
}
} else {
streamHandle.discardState();
pendingCheckpointIt.remove();
}
} catch (Exception e) {
// we have to break here to prevent a new (later) checkpoint
// from being committed before this one
LOG.error("Could not commit checkpoint.", e);
break;
}
}
}
}
}
use of org.apache.flink.runtime.io.disk.InputViewIterator in project flink by apache.
the class GenericWriteAheadSink method notifyCheckpointComplete.
@Override
public void notifyCheckpointComplete(long checkpointId) throws Exception {
super.notifyCheckpointComplete(checkpointId);
synchronized (pendingCheckpoints) {
Iterator<PendingCheckpoint> pendingCheckpointIt = pendingCheckpoints.iterator();
while (pendingCheckpointIt.hasNext()) {
PendingCheckpoint pendingCheckpoint = pendingCheckpointIt.next();
long pastCheckpointId = pendingCheckpoint.checkpointId;
int subtaskId = pendingCheckpoint.subtaskId;
long timestamp = pendingCheckpoint.timestamp;
StreamStateHandle streamHandle = pendingCheckpoint.stateHandle;
if (pastCheckpointId <= checkpointId) {
try {
if (!committer.isCheckpointCommitted(subtaskId, pastCheckpointId)) {
try (FSDataInputStream in = streamHandle.openInputStream()) {
boolean success = sendValues(new ReusingMutableToRegularIteratorWrapper<>(new InputViewIterator<>(new DataInputViewStreamWrapper(in), serializer), serializer), pastCheckpointId, timestamp);
if (success) {
// in case the checkpoint was successfully committed,
// discard its state from the backend and mark it for removal
// in case it failed, we retry on the next checkpoint
committer.commitCheckpoint(subtaskId, pastCheckpointId);
streamHandle.discardState();
pendingCheckpointIt.remove();
}
}
} else {
streamHandle.discardState();
pendingCheckpointIt.remove();
}
} catch (Exception e) {
// we have to break here to prevent a new (later) checkpoint
// from being committed before this one
LOG.error("Could not commit checkpoint.", e);
break;
}
}
}
}
}
Aggregations