Search in sources :

Example 1 with RocksDBWriteBatchWrapper

use of org.apache.flink.contrib.streaming.state.RocksDBWriteBatchWrapper in project flink by apache.

the class RocksDBIncrementalRestoreOperation method restoreWithRescaling.

/**
 * Recovery from multi incremental states with rescaling. For rescaling, this method creates a
 * temporary RocksDB instance for a key-groups shard. All contents from the temporary instance
 * are copied into the real restore instance and then the temporary instance is discarded.
 */
private void restoreWithRescaling(Collection<KeyedStateHandle> restoreStateHandles) throws Exception {
    // Prepare for restore with rescaling
    KeyedStateHandle initialHandle = RocksDBIncrementalCheckpointUtils.chooseTheBestStateHandleForInitial(restoreStateHandles, keyGroupRange);
    // Init base DB instance
    if (initialHandle != null) {
        restoreStateHandles.remove(initialHandle);
        initDBWithRescaling(initialHandle);
    } else {
        this.rocksHandle.openDB();
    }
    // Transfer remaining key-groups from temporary instance into base DB
    byte[] startKeyGroupPrefixBytes = new byte[keyGroupPrefixBytes];
    CompositeKeySerializationUtils.serializeKeyGroup(keyGroupRange.getStartKeyGroup(), startKeyGroupPrefixBytes);
    byte[] stopKeyGroupPrefixBytes = new byte[keyGroupPrefixBytes];
    CompositeKeySerializationUtils.serializeKeyGroup(keyGroupRange.getEndKeyGroup() + 1, stopKeyGroupPrefixBytes);
    for (KeyedStateHandle rawStateHandle : restoreStateHandles) {
        if (!(rawStateHandle instanceof IncrementalRemoteKeyedStateHandle)) {
            throw unexpectedStateHandleException(IncrementalRemoteKeyedStateHandle.class, rawStateHandle.getClass());
        }
        logger.info("Starting to restore from state handle: {} with rescaling.", rawStateHandle);
        Path temporaryRestoreInstancePath = instanceBasePath.getAbsoluteFile().toPath().resolve(UUID.randomUUID().toString());
        try (RestoredDBInstance tmpRestoreDBInfo = restoreDBInstanceFromStateHandle((IncrementalRemoteKeyedStateHandle) rawStateHandle, temporaryRestoreInstancePath);
            RocksDBWriteBatchWrapper writeBatchWrapper = new RocksDBWriteBatchWrapper(this.rocksHandle.getDb(), writeBatchSize)) {
            List<ColumnFamilyDescriptor> tmpColumnFamilyDescriptors = tmpRestoreDBInfo.columnFamilyDescriptors;
            List<ColumnFamilyHandle> tmpColumnFamilyHandles = tmpRestoreDBInfo.columnFamilyHandles;
            // family handle
            for (int i = 0; i < tmpColumnFamilyDescriptors.size(); ++i) {
                ColumnFamilyHandle tmpColumnFamilyHandle = tmpColumnFamilyHandles.get(i);
                ColumnFamilyHandle targetColumnFamilyHandle = this.rocksHandle.getOrRegisterStateColumnFamilyHandle(null, tmpRestoreDBInfo.stateMetaInfoSnapshots.get(i)).columnFamilyHandle;
                try (RocksIteratorWrapper iterator = RocksDBOperationUtils.getRocksIterator(tmpRestoreDBInfo.db, tmpColumnFamilyHandle, tmpRestoreDBInfo.readOptions)) {
                    iterator.seek(startKeyGroupPrefixBytes);
                    while (iterator.isValid()) {
                        if (RocksDBIncrementalCheckpointUtils.beforeThePrefixBytes(iterator.key(), stopKeyGroupPrefixBytes)) {
                            writeBatchWrapper.put(targetColumnFamilyHandle, iterator.key(), iterator.value());
                        } else {
                            // we can just break here.
                            break;
                        }
                        iterator.next();
                    }
                }
            // releases native iterator resources
            }
            logger.info("Finished restoring from state handle: {} with rescaling.", rawStateHandle);
        } finally {
            cleanUpPathQuietly(temporaryRestoreInstancePath);
        }
    }
}
Also used : Path(java.nio.file.Path) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) RocksDBWriteBatchWrapper(org.apache.flink.contrib.streaming.state.RocksDBWriteBatchWrapper) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) IncrementalKeyedStateHandle(org.apache.flink.runtime.state.IncrementalKeyedStateHandle) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) IncrementalLocalKeyedStateHandle(org.apache.flink.runtime.state.IncrementalLocalKeyedStateHandle) ColumnFamilyDescriptor(org.rocksdb.ColumnFamilyDescriptor) ColumnFamilyHandle(org.rocksdb.ColumnFamilyHandle) RocksIteratorWrapper(org.apache.flink.contrib.streaming.state.RocksIteratorWrapper)

Example 2 with RocksDBWriteBatchWrapper

use of org.apache.flink.contrib.streaming.state.RocksDBWriteBatchWrapper in project flink by apache.

the class RocksDBHeapTimersFullRestoreOperation method restoreKVStateData.

/**
 * Restore the KV-state / ColumnFamily data for all key-groups referenced by the current state
 * handle.
 */
private void restoreKVStateData(ThrowingIterator<KeyGroup> keyGroups, Map<Integer, ColumnFamilyHandle> columnFamilies, Map<Integer, HeapPriorityQueueSnapshotRestoreWrapper<?>> restoredPQStates) throws IOException, RocksDBException, StateMigrationException {
    // for all key-groups in the current state handle...
    try (RocksDBWriteBatchWrapper writeBatchWrapper = new RocksDBWriteBatchWrapper(this.rocksHandle.getDb(), writeBatchSize)) {
        HeapPriorityQueueSnapshotRestoreWrapper<HeapPriorityQueueElement> restoredPQ = null;
        ColumnFamilyHandle handle = null;
        while (keyGroups.hasNext()) {
            KeyGroup keyGroup = keyGroups.next();
            try (ThrowingIterator<KeyGroupEntry> groupEntries = keyGroup.getKeyGroupEntries()) {
                int oldKvStateId = -1;
                while (groupEntries.hasNext()) {
                    KeyGroupEntry groupEntry = groupEntries.next();
                    int kvStateId = groupEntry.getKvStateId();
                    if (kvStateId != oldKvStateId) {
                        oldKvStateId = kvStateId;
                        handle = columnFamilies.get(kvStateId);
                        restoredPQ = getRestoredPQ(restoredPQStates, kvStateId);
                    }
                    if (restoredPQ != null) {
                        restoreQueueElement(restoredPQ, groupEntry);
                    } else if (handle != null) {
                        writeBatchWrapper.put(handle, groupEntry.getKey(), groupEntry.getValue());
                    } else {
                        throw new IllegalStateException("Unknown state id: " + kvStateId);
                    }
                }
            }
        }
    }
}
Also used : KeyGroupEntry(org.apache.flink.runtime.state.restore.KeyGroupEntry) KeyGroup(org.apache.flink.runtime.state.restore.KeyGroup) RocksDBWriteBatchWrapper(org.apache.flink.contrib.streaming.state.RocksDBWriteBatchWrapper) HeapPriorityQueueElement(org.apache.flink.runtime.state.heap.HeapPriorityQueueElement) ColumnFamilyHandle(org.rocksdb.ColumnFamilyHandle)

Example 3 with RocksDBWriteBatchWrapper

use of org.apache.flink.contrib.streaming.state.RocksDBWriteBatchWrapper in project flink by apache.

the class RocksDBFullRestoreOperation method restoreKVStateData.

/**
 * Restore the KV-state / ColumnFamily data for all key-groups referenced by the current state
 * handle.
 */
private void restoreKVStateData(ThrowingIterator<KeyGroup> keyGroups, Map<Integer, ColumnFamilyHandle> columnFamilies) throws IOException, RocksDBException, StateMigrationException {
    // for all key-groups in the current state handle...
    try (RocksDBWriteBatchWrapper writeBatchWrapper = new RocksDBWriteBatchWrapper(this.rocksHandle.getDb(), writeBatchSize)) {
        ColumnFamilyHandle handle = null;
        while (keyGroups.hasNext()) {
            KeyGroup keyGroup = keyGroups.next();
            try (ThrowingIterator<KeyGroupEntry> groupEntries = keyGroup.getKeyGroupEntries()) {
                int oldKvStateId = -1;
                while (groupEntries.hasNext()) {
                    KeyGroupEntry groupEntry = groupEntries.next();
                    int kvStateId = groupEntry.getKvStateId();
                    if (kvStateId != oldKvStateId) {
                        oldKvStateId = kvStateId;
                        handle = columnFamilies.get(kvStateId);
                    }
                    writeBatchWrapper.put(handle, groupEntry.getKey(), groupEntry.getValue());
                }
            }
        }
    }
}
Also used : KeyGroupEntry(org.apache.flink.runtime.state.restore.KeyGroupEntry) KeyGroup(org.apache.flink.runtime.state.restore.KeyGroup) RocksDBWriteBatchWrapper(org.apache.flink.contrib.streaming.state.RocksDBWriteBatchWrapper) ColumnFamilyHandle(org.rocksdb.ColumnFamilyHandle)

Aggregations

RocksDBWriteBatchWrapper (org.apache.flink.contrib.streaming.state.RocksDBWriteBatchWrapper)3 ColumnFamilyHandle (org.rocksdb.ColumnFamilyHandle)3 KeyGroup (org.apache.flink.runtime.state.restore.KeyGroup)2 KeyGroupEntry (org.apache.flink.runtime.state.restore.KeyGroupEntry)2 Path (java.nio.file.Path)1 RocksIteratorWrapper (org.apache.flink.contrib.streaming.state.RocksIteratorWrapper)1 IncrementalKeyedStateHandle (org.apache.flink.runtime.state.IncrementalKeyedStateHandle)1 IncrementalLocalKeyedStateHandle (org.apache.flink.runtime.state.IncrementalLocalKeyedStateHandle)1 IncrementalRemoteKeyedStateHandle (org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle)1 KeyedStateHandle (org.apache.flink.runtime.state.KeyedStateHandle)1 HeapPriorityQueueElement (org.apache.flink.runtime.state.heap.HeapPriorityQueueElement)1 ColumnFamilyDescriptor (org.rocksdb.ColumnFamilyDescriptor)1