use of org.rocksdb.ColumnFamilyDescriptor in project flink by apache.
the class RocksDBKeyedStateBackend method getColumnFamily.
// ------------------------------------------------------------------------
// State factories
// ------------------------------------------------------------------------
/**
* Creates a column family handle for use with a k/v state. When restoring from a snapshot
* we don't restore the individual k/v states, just the global RocksDB data base and the
* list of column families. When a k/v state is first requested we check here whether we
* already have a column family for that and return it or create a new one if it doesn't exist.
*
* <p>This also checks whether the {@link StateDescriptor} for a state matches the one
* that we checkpointed, i.e. is already in the map of column families.
*/
@SuppressWarnings("rawtypes, unchecked")
protected <N, S> ColumnFamilyHandle getColumnFamily(StateDescriptor<?, S> descriptor, TypeSerializer<N> namespaceSerializer) throws IOException {
Tuple2<ColumnFamilyHandle, RegisteredBackendStateMetaInfo<?, ?>> stateInfo = kvStateInformation.get(descriptor.getName());
RegisteredBackendStateMetaInfo<N, S> newMetaInfo = new RegisteredBackendStateMetaInfo<>(descriptor.getType(), descriptor.getName(), namespaceSerializer, descriptor.getSerializer());
if (stateInfo != null) {
if (newMetaInfo.isCompatibleWith(stateInfo.f1)) {
stateInfo.f1 = newMetaInfo;
return stateInfo.f0;
} else {
throw new IOException("Trying to access state using wrong meta info, was " + stateInfo.f1 + " trying access with " + newMetaInfo);
}
}
ColumnFamilyDescriptor columnDescriptor = new ColumnFamilyDescriptor(descriptor.getName().getBytes(ConfigConstants.DEFAULT_CHARSET), columnOptions);
try {
ColumnFamilyHandle columnFamily = db.createColumnFamily(columnDescriptor);
Tuple2<ColumnFamilyHandle, RegisteredBackendStateMetaInfo<N, S>> tuple = new Tuple2<>(columnFamily, newMetaInfo);
Map rawAccess = kvStateInformation;
rawAccess.put(descriptor.getName(), tuple);
return columnFamily;
} catch (RocksDBException e) {
throw new IOException("Error creating ColumnFamilyHandle.", e);
}
}
use of org.rocksdb.ColumnFamilyDescriptor in project flink by apache.
the class RocksDBMergeIteratorTest method testMergeIterator.
public void testMergeIterator(int maxParallelism) throws Exception {
Random random = new Random(1234);
File tmpDir = CommonTestUtils.createTempDirectory();
RocksDB rocksDB = RocksDB.open(tmpDir.getAbsolutePath());
try {
List<Tuple2<RocksIterator, Integer>> rocksIteratorsWithKVStateId = new ArrayList<>();
List<Tuple2<ColumnFamilyHandle, Integer>> columnFamilyHandlesWithKeyCount = new ArrayList<>();
int totalKeysExpected = 0;
for (int c = 0; c < NUM_KEY_VAL_STATES; ++c) {
ColumnFamilyHandle handle = rocksDB.createColumnFamily(new ColumnFamilyDescriptor(("column-" + c).getBytes(ConfigConstants.DEFAULT_CHARSET)));
ByteArrayOutputStreamWithPos bos = new ByteArrayOutputStreamWithPos();
DataOutputStream dos = new DataOutputStream(bos);
int numKeys = random.nextInt(MAX_NUM_KEYS + 1);
for (int i = 0; i < numKeys; ++i) {
if (maxParallelism <= Byte.MAX_VALUE) {
dos.writeByte(i);
} else {
dos.writeShort(i);
}
dos.writeInt(i);
byte[] key = bos.toByteArray();
byte[] val = new byte[] { 42 };
rocksDB.put(handle, key, val);
bos.reset();
}
columnFamilyHandlesWithKeyCount.add(new Tuple2<>(handle, numKeys));
totalKeysExpected += numKeys;
}
int id = 0;
for (Tuple2<ColumnFamilyHandle, Integer> columnFamilyHandle : columnFamilyHandlesWithKeyCount) {
rocksIteratorsWithKVStateId.add(new Tuple2<>(rocksDB.newIterator(columnFamilyHandle.f0), id));
++id;
}
RocksDBKeyedStateBackend.RocksDBMergeIterator mergeIterator = new RocksDBKeyedStateBackend.RocksDBMergeIterator(rocksIteratorsWithKVStateId, maxParallelism <= Byte.MAX_VALUE ? 1 : 2);
int prevKVState = -1;
int prevKey = -1;
int prevKeyGroup = -1;
int totalKeysActual = 0;
while (mergeIterator.isValid()) {
ByteBuffer bb = ByteBuffer.wrap(mergeIterator.key());
int keyGroup = maxParallelism > Byte.MAX_VALUE ? bb.getShort() : bb.get();
int key = bb.getInt();
Assert.assertTrue(keyGroup >= prevKeyGroup);
Assert.assertTrue(key >= prevKey);
Assert.assertEquals(prevKeyGroup != keyGroup, mergeIterator.isNewKeyGroup());
Assert.assertEquals(prevKVState != mergeIterator.kvStateId(), mergeIterator.isNewKeyValueState());
prevKeyGroup = keyGroup;
prevKVState = mergeIterator.kvStateId();
//System.out.println(keyGroup + " " + key + " " + mergeIterator.kvStateId());
mergeIterator.next();
++totalKeysActual;
}
Assert.assertEquals(totalKeysExpected, totalKeysActual);
for (Tuple2<ColumnFamilyHandle, Integer> handleWithCount : columnFamilyHandlesWithKeyCount) {
rocksDB.dropColumnFamily(handleWithCount.f0);
}
} finally {
rocksDB.close();
}
}
use of org.rocksdb.ColumnFamilyDescriptor in project bookkeeper by apache.
the class RocksdbKVStore method openRocksdb.
protected static Pair<RocksDB, List<ColumnFamilyHandle>> openRocksdb(File dir, DBOptions options, ColumnFamilyOptions cfOpts) throws StateStoreException {
// make sure the db directory's parent dir is created
ColumnFamilyDescriptor metaDesc = new ColumnFamilyDescriptor(METADATA_CF, cfOpts);
ColumnFamilyDescriptor dataDesc = new ColumnFamilyDescriptor(DATA_CF, cfOpts);
try {
Files.createDirectories(dir.toPath());
File dbDir = new File(dir, "current");
if (!dbDir.exists()) {
// empty state
String uuid = UUID.randomUUID().toString();
Path checkpointPath = Paths.get(dir.getAbsolutePath(), "checkpoints", uuid);
Files.createDirectories(checkpointPath);
Files.createSymbolicLink(Paths.get(dbDir.getAbsolutePath()), checkpointPath);
}
List<ColumnFamilyHandle> cfHandles = Lists.newArrayListWithExpectedSize(2);
RocksDB db = RocksDB.open(options, dbDir.getAbsolutePath(), Lists.newArrayList(metaDesc, dataDesc), cfHandles);
return Pair.of(db, cfHandles);
} catch (IOException ioe) {
log.error("Failed to create parent directory {} for opening rocksdb", dir.getParentFile().toPath(), ioe);
throw new StateStoreException(ioe);
} catch (RocksDBException dbe) {
log.error("Failed to open rocksdb at dir {}", dir.getAbsolutePath(), dbe);
throw new StateStoreException(dbe);
}
}
use of org.rocksdb.ColumnFamilyDescriptor in project flink by apache.
the class RocksDBIncrementalRestoreOperation method restoreWithRescaling.
/**
* Recovery from multi incremental states with rescaling. For rescaling, this method creates a
* temporary RocksDB instance for a key-groups shard. All contents from the temporary instance
* are copied into the real restore instance and then the temporary instance is discarded.
*/
private void restoreWithRescaling(Collection<KeyedStateHandle> restoreStateHandles) throws Exception {
// Prepare for restore with rescaling
KeyedStateHandle initialHandle = RocksDBIncrementalCheckpointUtils.chooseTheBestStateHandleForInitial(restoreStateHandles, keyGroupRange);
// Init base DB instance
if (initialHandle != null) {
restoreStateHandles.remove(initialHandle);
initDBWithRescaling(initialHandle);
} else {
this.rocksHandle.openDB();
}
// Transfer remaining key-groups from temporary instance into base DB
byte[] startKeyGroupPrefixBytes = new byte[keyGroupPrefixBytes];
CompositeKeySerializationUtils.serializeKeyGroup(keyGroupRange.getStartKeyGroup(), startKeyGroupPrefixBytes);
byte[] stopKeyGroupPrefixBytes = new byte[keyGroupPrefixBytes];
CompositeKeySerializationUtils.serializeKeyGroup(keyGroupRange.getEndKeyGroup() + 1, stopKeyGroupPrefixBytes);
for (KeyedStateHandle rawStateHandle : restoreStateHandles) {
if (!(rawStateHandle instanceof IncrementalRemoteKeyedStateHandle)) {
throw unexpectedStateHandleException(IncrementalRemoteKeyedStateHandle.class, rawStateHandle.getClass());
}
logger.info("Starting to restore from state handle: {} with rescaling.", rawStateHandle);
Path temporaryRestoreInstancePath = instanceBasePath.getAbsoluteFile().toPath().resolve(UUID.randomUUID().toString());
try (RestoredDBInstance tmpRestoreDBInfo = restoreDBInstanceFromStateHandle((IncrementalRemoteKeyedStateHandle) rawStateHandle, temporaryRestoreInstancePath);
RocksDBWriteBatchWrapper writeBatchWrapper = new RocksDBWriteBatchWrapper(this.rocksHandle.getDb(), writeBatchSize)) {
List<ColumnFamilyDescriptor> tmpColumnFamilyDescriptors = tmpRestoreDBInfo.columnFamilyDescriptors;
List<ColumnFamilyHandle> tmpColumnFamilyHandles = tmpRestoreDBInfo.columnFamilyHandles;
// family handle
for (int i = 0; i < tmpColumnFamilyDescriptors.size(); ++i) {
ColumnFamilyHandle tmpColumnFamilyHandle = tmpColumnFamilyHandles.get(i);
ColumnFamilyHandle targetColumnFamilyHandle = this.rocksHandle.getOrRegisterStateColumnFamilyHandle(null, tmpRestoreDBInfo.stateMetaInfoSnapshots.get(i)).columnFamilyHandle;
try (RocksIteratorWrapper iterator = RocksDBOperationUtils.getRocksIterator(tmpRestoreDBInfo.db, tmpColumnFamilyHandle, tmpRestoreDBInfo.readOptions)) {
iterator.seek(startKeyGroupPrefixBytes);
while (iterator.isValid()) {
if (RocksDBIncrementalCheckpointUtils.beforeThePrefixBytes(iterator.key(), stopKeyGroupPrefixBytes)) {
writeBatchWrapper.put(targetColumnFamilyHandle, iterator.key(), iterator.value());
} else {
// we can just break here.
break;
}
iterator.next();
}
}
// releases native iterator resources
}
logger.info("Finished restoring from state handle: {} with rescaling.", rawStateHandle);
} finally {
cleanUpPathQuietly(temporaryRestoreInstancePath);
}
}
}
use of org.rocksdb.ColumnFamilyDescriptor in project flink by apache.
the class RocksDBIncrementalRestoreOperation method createColumnFamilyDescriptors.
/**
* This method recreates and registers all {@link ColumnFamilyDescriptor} from Flink's state
* meta data snapshot.
*/
private List<ColumnFamilyDescriptor> createColumnFamilyDescriptors(List<StateMetaInfoSnapshot> stateMetaInfoSnapshots, boolean registerTtlCompactFilter) {
List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>(stateMetaInfoSnapshots.size());
for (StateMetaInfoSnapshot stateMetaInfoSnapshot : stateMetaInfoSnapshots) {
RegisteredStateMetaInfoBase metaInfoBase = RegisteredStateMetaInfoBase.fromMetaInfoSnapshot(stateMetaInfoSnapshot);
ColumnFamilyDescriptor columnFamilyDescriptor = RocksDBOperationUtils.createColumnFamilyDescriptor(metaInfoBase, this.rocksHandle.getColumnFamilyOptionsFactory(), registerTtlCompactFilter ? this.rocksHandle.getTtlCompactFiltersManager() : null, this.rocksHandle.getWriteBufferManagerCapacity());
columnFamilyDescriptors.add(columnFamilyDescriptor);
}
return columnFamilyDescriptors;
}
Aggregations