Search in sources :

Example 1 with RocksIteratorWrapper

use of org.apache.flink.contrib.streaming.state.RocksIteratorWrapper in project flink by apache.

the class RocksDBIncrementalRestoreOperation method restoreWithRescaling.

/**
 * Recovery from multi incremental states with rescaling. For rescaling, this method creates a
 * temporary RocksDB instance for a key-groups shard. All contents from the temporary instance
 * are copied into the real restore instance and then the temporary instance is discarded.
 */
private void restoreWithRescaling(Collection<KeyedStateHandle> restoreStateHandles) throws Exception {
    // Prepare for restore with rescaling
    KeyedStateHandle initialHandle = RocksDBIncrementalCheckpointUtils.chooseTheBestStateHandleForInitial(restoreStateHandles, keyGroupRange);
    // Init base DB instance
    if (initialHandle != null) {
        restoreStateHandles.remove(initialHandle);
        initDBWithRescaling(initialHandle);
    } else {
        this.rocksHandle.openDB();
    }
    // Transfer remaining key-groups from temporary instance into base DB
    byte[] startKeyGroupPrefixBytes = new byte[keyGroupPrefixBytes];
    CompositeKeySerializationUtils.serializeKeyGroup(keyGroupRange.getStartKeyGroup(), startKeyGroupPrefixBytes);
    byte[] stopKeyGroupPrefixBytes = new byte[keyGroupPrefixBytes];
    CompositeKeySerializationUtils.serializeKeyGroup(keyGroupRange.getEndKeyGroup() + 1, stopKeyGroupPrefixBytes);
    for (KeyedStateHandle rawStateHandle : restoreStateHandles) {
        if (!(rawStateHandle instanceof IncrementalRemoteKeyedStateHandle)) {
            throw unexpectedStateHandleException(IncrementalRemoteKeyedStateHandle.class, rawStateHandle.getClass());
        }
        logger.info("Starting to restore from state handle: {} with rescaling.", rawStateHandle);
        Path temporaryRestoreInstancePath = instanceBasePath.getAbsoluteFile().toPath().resolve(UUID.randomUUID().toString());
        try (RestoredDBInstance tmpRestoreDBInfo = restoreDBInstanceFromStateHandle((IncrementalRemoteKeyedStateHandle) rawStateHandle, temporaryRestoreInstancePath);
            RocksDBWriteBatchWrapper writeBatchWrapper = new RocksDBWriteBatchWrapper(this.rocksHandle.getDb(), writeBatchSize)) {
            List<ColumnFamilyDescriptor> tmpColumnFamilyDescriptors = tmpRestoreDBInfo.columnFamilyDescriptors;
            List<ColumnFamilyHandle> tmpColumnFamilyHandles = tmpRestoreDBInfo.columnFamilyHandles;
            // family handle
            for (int i = 0; i < tmpColumnFamilyDescriptors.size(); ++i) {
                ColumnFamilyHandle tmpColumnFamilyHandle = tmpColumnFamilyHandles.get(i);
                ColumnFamilyHandle targetColumnFamilyHandle = this.rocksHandle.getOrRegisterStateColumnFamilyHandle(null, tmpRestoreDBInfo.stateMetaInfoSnapshots.get(i)).columnFamilyHandle;
                try (RocksIteratorWrapper iterator = RocksDBOperationUtils.getRocksIterator(tmpRestoreDBInfo.db, tmpColumnFamilyHandle, tmpRestoreDBInfo.readOptions)) {
                    iterator.seek(startKeyGroupPrefixBytes);
                    while (iterator.isValid()) {
                        if (RocksDBIncrementalCheckpointUtils.beforeThePrefixBytes(iterator.key(), stopKeyGroupPrefixBytes)) {
                            writeBatchWrapper.put(targetColumnFamilyHandle, iterator.key(), iterator.value());
                        } else {
                            // we can just break here.
                            break;
                        }
                        iterator.next();
                    }
                }
            // releases native iterator resources
            }
            logger.info("Finished restoring from state handle: {} with rescaling.", rawStateHandle);
        } finally {
            cleanUpPathQuietly(temporaryRestoreInstancePath);
        }
    }
}
Also used : Path(java.nio.file.Path) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) RocksDBWriteBatchWrapper(org.apache.flink.contrib.streaming.state.RocksDBWriteBatchWrapper) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) IncrementalKeyedStateHandle(org.apache.flink.runtime.state.IncrementalKeyedStateHandle) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) IncrementalLocalKeyedStateHandle(org.apache.flink.runtime.state.IncrementalLocalKeyedStateHandle) ColumnFamilyDescriptor(org.rocksdb.ColumnFamilyDescriptor) ColumnFamilyHandle(org.rocksdb.ColumnFamilyHandle) RocksIteratorWrapper(org.apache.flink.contrib.streaming.state.RocksIteratorWrapper)

Example 2 with RocksIteratorWrapper

use of org.apache.flink.contrib.streaming.state.RocksIteratorWrapper in project flink by apache.

the class RocksDBFullSnapshotResources method createKVStateIterators.

private List<Tuple2<RocksIteratorWrapper, Integer>> createKVStateIterators(CloseableRegistry closeableRegistry, ReadOptions readOptions) throws IOException {
    final List<Tuple2<RocksIteratorWrapper, Integer>> kvStateIterators = new ArrayList<>(metaData.size());
    int kvStateId = 0;
    for (MetaData metaDataEntry : metaData) {
        RocksIteratorWrapper rocksIteratorWrapper = createRocksIteratorWrapper(db, metaDataEntry.rocksDbKvStateInfo.columnFamilyHandle, metaDataEntry.stateSnapshotTransformer, readOptions);
        kvStateIterators.add(Tuple2.of(rocksIteratorWrapper, kvStateId));
        closeableRegistry.registerCloseable(rocksIteratorWrapper);
        ++kvStateId;
    }
    return kvStateIterators;
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) ArrayList(java.util.ArrayList) RocksIteratorWrapper(org.apache.flink.contrib.streaming.state.RocksIteratorWrapper)

Example 3 with RocksIteratorWrapper

use of org.apache.flink.contrib.streaming.state.RocksIteratorWrapper in project flink by apache.

the class RocksStatesPerKeyGroupMergeIterator method buildIteratorHeap.

private PriorityQueue<SingleStateIterator> buildIteratorHeap(List<Tuple2<RocksIteratorWrapper, Integer>> kvStateIterators, List<SingleStateIterator> heapPriorityQueueIterators) throws IOException {
    Comparator<SingleStateIterator> iteratorComparator = COMPARATORS.get(keyGroupPrefixByteCount - 1);
    PriorityQueue<SingleStateIterator> iteratorPriorityQueue = new PriorityQueue<>(kvStateIterators.size() + heapPriorityQueueIterators.size(), iteratorComparator);
    for (Tuple2<RocksIteratorWrapper, Integer> rocksIteratorWithKVStateId : kvStateIterators) {
        final RocksIteratorWrapper rocksIterator = rocksIteratorWithKVStateId.f0;
        rocksIterator.seekToFirst();
        if (rocksIterator.isValid()) {
            RocksSingleStateIterator wrappingIterator = new RocksSingleStateIterator(rocksIterator, rocksIteratorWithKVStateId.f1);
            iteratorPriorityQueue.offer(wrappingIterator);
            closeableRegistry.registerCloseable(wrappingIterator);
            closeableRegistry.unregisterCloseable(rocksIterator);
        } else {
            if (closeableRegistry.unregisterCloseable(rocksIterator)) {
                IOUtils.closeQuietly(rocksIterator);
            }
        }
    }
    for (SingleStateIterator heapQueueIterator : heapPriorityQueueIterators) {
        if (heapQueueIterator.isValid()) {
            iteratorPriorityQueue.offer(heapQueueIterator);
            closeableRegistry.registerCloseable(heapQueueIterator);
        } else {
            IOUtils.closeQuietly(heapQueueIterator);
        }
    }
    return iteratorPriorityQueue;
}
Also used : PriorityQueue(java.util.PriorityQueue) RocksIteratorWrapper(org.apache.flink.contrib.streaming.state.RocksIteratorWrapper)

Aggregations

RocksIteratorWrapper (org.apache.flink.contrib.streaming.state.RocksIteratorWrapper)3 Path (java.nio.file.Path)1 ArrayList (java.util.ArrayList)1 PriorityQueue (java.util.PriorityQueue)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 RocksDBWriteBatchWrapper (org.apache.flink.contrib.streaming.state.RocksDBWriteBatchWrapper)1 IncrementalKeyedStateHandle (org.apache.flink.runtime.state.IncrementalKeyedStateHandle)1 IncrementalLocalKeyedStateHandle (org.apache.flink.runtime.state.IncrementalLocalKeyedStateHandle)1 IncrementalRemoteKeyedStateHandle (org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle)1 KeyedStateHandle (org.apache.flink.runtime.state.KeyedStateHandle)1 ColumnFamilyDescriptor (org.rocksdb.ColumnFamilyDescriptor)1 ColumnFamilyHandle (org.rocksdb.ColumnFamilyHandle)1