Search in sources :

Example 1 with ColumnFamilyDescriptor

use of org.rocksdb.ColumnFamilyDescriptor in project flink by apache.

the class RocksDBKeyedStateBackend method getColumnFamily.

// ------------------------------------------------------------------------
//  State factories
// ------------------------------------------------------------------------
/**
	 * Creates a column family handle for use with a k/v state. When restoring from a snapshot
	 * we don't restore the individual k/v states, just the global RocksDB data base and the
	 * list of column families. When a k/v state is first requested we check here whether we
	 * already have a column family for that and return it or create a new one if it doesn't exist.
	 *
	 * <p>This also checks whether the {@link StateDescriptor} for a state matches the one
	 * that we checkpointed, i.e. is already in the map of column families.
	 */
@SuppressWarnings("rawtypes, unchecked")
protected <N, S> ColumnFamilyHandle getColumnFamily(StateDescriptor<?, S> descriptor, TypeSerializer<N> namespaceSerializer) throws IOException {
    Tuple2<ColumnFamilyHandle, RegisteredBackendStateMetaInfo<?, ?>> stateInfo = kvStateInformation.get(descriptor.getName());
    RegisteredBackendStateMetaInfo<N, S> newMetaInfo = new RegisteredBackendStateMetaInfo<>(descriptor.getType(), descriptor.getName(), namespaceSerializer, descriptor.getSerializer());
    if (stateInfo != null) {
        if (newMetaInfo.isCompatibleWith(stateInfo.f1)) {
            stateInfo.f1 = newMetaInfo;
            return stateInfo.f0;
        } else {
            throw new IOException("Trying to access state using wrong meta info, was " + stateInfo.f1 + " trying access with " + newMetaInfo);
        }
    }
    ColumnFamilyDescriptor columnDescriptor = new ColumnFamilyDescriptor(descriptor.getName().getBytes(ConfigConstants.DEFAULT_CHARSET), columnOptions);
    try {
        ColumnFamilyHandle columnFamily = db.createColumnFamily(columnDescriptor);
        Tuple2<ColumnFamilyHandle, RegisteredBackendStateMetaInfo<N, S>> tuple = new Tuple2<>(columnFamily, newMetaInfo);
        Map rawAccess = kvStateInformation;
        rawAccess.put(descriptor.getName(), tuple);
        return columnFamily;
    } catch (RocksDBException e) {
        throw new IOException("Error creating ColumnFamilyHandle.", e);
    }
}
Also used : RocksDBException(org.rocksdb.RocksDBException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RegisteredBackendStateMetaInfo(org.apache.flink.runtime.state.RegisteredBackendStateMetaInfo) IOException(java.io.IOException) ColumnFamilyDescriptor(org.rocksdb.ColumnFamilyDescriptor) Map(java.util.Map) HashMap(java.util.HashMap) ColumnFamilyHandle(org.rocksdb.ColumnFamilyHandle)

Example 2 with ColumnFamilyDescriptor

use of org.rocksdb.ColumnFamilyDescriptor in project flink by apache.

the class RocksDBMergeIteratorTest method testMergeIterator.

public void testMergeIterator(int maxParallelism) throws Exception {
    Random random = new Random(1234);
    File tmpDir = CommonTestUtils.createTempDirectory();
    RocksDB rocksDB = RocksDB.open(tmpDir.getAbsolutePath());
    try {
        List<Tuple2<RocksIterator, Integer>> rocksIteratorsWithKVStateId = new ArrayList<>();
        List<Tuple2<ColumnFamilyHandle, Integer>> columnFamilyHandlesWithKeyCount = new ArrayList<>();
        int totalKeysExpected = 0;
        for (int c = 0; c < NUM_KEY_VAL_STATES; ++c) {
            ColumnFamilyHandle handle = rocksDB.createColumnFamily(new ColumnFamilyDescriptor(("column-" + c).getBytes(ConfigConstants.DEFAULT_CHARSET)));
            ByteArrayOutputStreamWithPos bos = new ByteArrayOutputStreamWithPos();
            DataOutputStream dos = new DataOutputStream(bos);
            int numKeys = random.nextInt(MAX_NUM_KEYS + 1);
            for (int i = 0; i < numKeys; ++i) {
                if (maxParallelism <= Byte.MAX_VALUE) {
                    dos.writeByte(i);
                } else {
                    dos.writeShort(i);
                }
                dos.writeInt(i);
                byte[] key = bos.toByteArray();
                byte[] val = new byte[] { 42 };
                rocksDB.put(handle, key, val);
                bos.reset();
            }
            columnFamilyHandlesWithKeyCount.add(new Tuple2<>(handle, numKeys));
            totalKeysExpected += numKeys;
        }
        int id = 0;
        for (Tuple2<ColumnFamilyHandle, Integer> columnFamilyHandle : columnFamilyHandlesWithKeyCount) {
            rocksIteratorsWithKVStateId.add(new Tuple2<>(rocksDB.newIterator(columnFamilyHandle.f0), id));
            ++id;
        }
        RocksDBKeyedStateBackend.RocksDBMergeIterator mergeIterator = new RocksDBKeyedStateBackend.RocksDBMergeIterator(rocksIteratorsWithKVStateId, maxParallelism <= Byte.MAX_VALUE ? 1 : 2);
        int prevKVState = -1;
        int prevKey = -1;
        int prevKeyGroup = -1;
        int totalKeysActual = 0;
        while (mergeIterator.isValid()) {
            ByteBuffer bb = ByteBuffer.wrap(mergeIterator.key());
            int keyGroup = maxParallelism > Byte.MAX_VALUE ? bb.getShort() : bb.get();
            int key = bb.getInt();
            Assert.assertTrue(keyGroup >= prevKeyGroup);
            Assert.assertTrue(key >= prevKey);
            Assert.assertEquals(prevKeyGroup != keyGroup, mergeIterator.isNewKeyGroup());
            Assert.assertEquals(prevKVState != mergeIterator.kvStateId(), mergeIterator.isNewKeyValueState());
            prevKeyGroup = keyGroup;
            prevKVState = mergeIterator.kvStateId();
            //System.out.println(keyGroup + " " + key + " " + mergeIterator.kvStateId());
            mergeIterator.next();
            ++totalKeysActual;
        }
        Assert.assertEquals(totalKeysExpected, totalKeysActual);
        for (Tuple2<ColumnFamilyHandle, Integer> handleWithCount : columnFamilyHandlesWithKeyCount) {
            rocksDB.dropColumnFamily(handleWithCount.f0);
        }
    } finally {
        rocksDB.close();
    }
}
Also used : RocksDB(org.rocksdb.RocksDB) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ColumnFamilyDescriptor(org.rocksdb.ColumnFamilyDescriptor) ByteBuffer(java.nio.ByteBuffer) ColumnFamilyHandle(org.rocksdb.ColumnFamilyHandle) Random(java.util.Random) Tuple2(org.apache.flink.api.java.tuple.Tuple2) File(java.io.File) ByteArrayOutputStreamWithPos(org.apache.flink.core.memory.ByteArrayOutputStreamWithPos)

Example 3 with ColumnFamilyDescriptor

use of org.rocksdb.ColumnFamilyDescriptor in project bookkeeper by apache.

the class RocksdbKVStore method openRocksdb.

protected static Pair<RocksDB, List<ColumnFamilyHandle>> openRocksdb(File dir, DBOptions options, ColumnFamilyOptions cfOpts) throws StateStoreException {
    // make sure the db directory's parent dir is created
    ColumnFamilyDescriptor metaDesc = new ColumnFamilyDescriptor(METADATA_CF, cfOpts);
    ColumnFamilyDescriptor dataDesc = new ColumnFamilyDescriptor(DATA_CF, cfOpts);
    try {
        Files.createDirectories(dir.toPath());
        File dbDir = new File(dir, "current");
        if (!dbDir.exists()) {
            // empty state
            String uuid = UUID.randomUUID().toString();
            Path checkpointPath = Paths.get(dir.getAbsolutePath(), "checkpoints", uuid);
            Files.createDirectories(checkpointPath);
            Files.createSymbolicLink(Paths.get(dbDir.getAbsolutePath()), checkpointPath);
        }
        List<ColumnFamilyHandle> cfHandles = Lists.newArrayListWithExpectedSize(2);
        RocksDB db = RocksDB.open(options, dbDir.getAbsolutePath(), Lists.newArrayList(metaDesc, dataDesc), cfHandles);
        return Pair.of(db, cfHandles);
    } catch (IOException ioe) {
        log.error("Failed to create parent directory {} for opening rocksdb", dir.getParentFile().toPath(), ioe);
        throw new StateStoreException(ioe);
    } catch (RocksDBException dbe) {
        log.error("Failed to open rocksdb at dir {}", dir.getAbsolutePath(), dbe);
        throw new StateStoreException(dbe);
    }
}
Also used : Path(java.nio.file.Path) StateStoreException(org.apache.bookkeeper.statelib.api.exceptions.StateStoreException) InvalidStateStoreException(org.apache.bookkeeper.statelib.api.exceptions.InvalidStateStoreException) RocksDBException(org.rocksdb.RocksDBException) RocksDB(org.rocksdb.RocksDB) IOException(java.io.IOException) ColumnFamilyDescriptor(org.rocksdb.ColumnFamilyDescriptor) File(java.io.File) ColumnFamilyHandle(org.rocksdb.ColumnFamilyHandle)

Example 4 with ColumnFamilyDescriptor

use of org.rocksdb.ColumnFamilyDescriptor in project flink by apache.

the class RocksDBIncrementalRestoreOperation method restoreWithRescaling.

/**
 * Recovery from multi incremental states with rescaling. For rescaling, this method creates a
 * temporary RocksDB instance for a key-groups shard. All contents from the temporary instance
 * are copied into the real restore instance and then the temporary instance is discarded.
 */
private void restoreWithRescaling(Collection<KeyedStateHandle> restoreStateHandles) throws Exception {
    // Prepare for restore with rescaling
    KeyedStateHandle initialHandle = RocksDBIncrementalCheckpointUtils.chooseTheBestStateHandleForInitial(restoreStateHandles, keyGroupRange);
    // Init base DB instance
    if (initialHandle != null) {
        restoreStateHandles.remove(initialHandle);
        initDBWithRescaling(initialHandle);
    } else {
        this.rocksHandle.openDB();
    }
    // Transfer remaining key-groups from temporary instance into base DB
    byte[] startKeyGroupPrefixBytes = new byte[keyGroupPrefixBytes];
    CompositeKeySerializationUtils.serializeKeyGroup(keyGroupRange.getStartKeyGroup(), startKeyGroupPrefixBytes);
    byte[] stopKeyGroupPrefixBytes = new byte[keyGroupPrefixBytes];
    CompositeKeySerializationUtils.serializeKeyGroup(keyGroupRange.getEndKeyGroup() + 1, stopKeyGroupPrefixBytes);
    for (KeyedStateHandle rawStateHandle : restoreStateHandles) {
        if (!(rawStateHandle instanceof IncrementalRemoteKeyedStateHandle)) {
            throw unexpectedStateHandleException(IncrementalRemoteKeyedStateHandle.class, rawStateHandle.getClass());
        }
        logger.info("Starting to restore from state handle: {} with rescaling.", rawStateHandle);
        Path temporaryRestoreInstancePath = instanceBasePath.getAbsoluteFile().toPath().resolve(UUID.randomUUID().toString());
        try (RestoredDBInstance tmpRestoreDBInfo = restoreDBInstanceFromStateHandle((IncrementalRemoteKeyedStateHandle) rawStateHandle, temporaryRestoreInstancePath);
            RocksDBWriteBatchWrapper writeBatchWrapper = new RocksDBWriteBatchWrapper(this.rocksHandle.getDb(), writeBatchSize)) {
            List<ColumnFamilyDescriptor> tmpColumnFamilyDescriptors = tmpRestoreDBInfo.columnFamilyDescriptors;
            List<ColumnFamilyHandle> tmpColumnFamilyHandles = tmpRestoreDBInfo.columnFamilyHandles;
            // family handle
            for (int i = 0; i < tmpColumnFamilyDescriptors.size(); ++i) {
                ColumnFamilyHandle tmpColumnFamilyHandle = tmpColumnFamilyHandles.get(i);
                ColumnFamilyHandle targetColumnFamilyHandle = this.rocksHandle.getOrRegisterStateColumnFamilyHandle(null, tmpRestoreDBInfo.stateMetaInfoSnapshots.get(i)).columnFamilyHandle;
                try (RocksIteratorWrapper iterator = RocksDBOperationUtils.getRocksIterator(tmpRestoreDBInfo.db, tmpColumnFamilyHandle, tmpRestoreDBInfo.readOptions)) {
                    iterator.seek(startKeyGroupPrefixBytes);
                    while (iterator.isValid()) {
                        if (RocksDBIncrementalCheckpointUtils.beforeThePrefixBytes(iterator.key(), stopKeyGroupPrefixBytes)) {
                            writeBatchWrapper.put(targetColumnFamilyHandle, iterator.key(), iterator.value());
                        } else {
                            // we can just break here.
                            break;
                        }
                        iterator.next();
                    }
                }
            // releases native iterator resources
            }
            logger.info("Finished restoring from state handle: {} with rescaling.", rawStateHandle);
        } finally {
            cleanUpPathQuietly(temporaryRestoreInstancePath);
        }
    }
}
Also used : Path(java.nio.file.Path) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) RocksDBWriteBatchWrapper(org.apache.flink.contrib.streaming.state.RocksDBWriteBatchWrapper) IncrementalRemoteKeyedStateHandle(org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle) IncrementalKeyedStateHandle(org.apache.flink.runtime.state.IncrementalKeyedStateHandle) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) IncrementalLocalKeyedStateHandle(org.apache.flink.runtime.state.IncrementalLocalKeyedStateHandle) ColumnFamilyDescriptor(org.rocksdb.ColumnFamilyDescriptor) ColumnFamilyHandle(org.rocksdb.ColumnFamilyHandle) RocksIteratorWrapper(org.apache.flink.contrib.streaming.state.RocksIteratorWrapper)

Example 5 with ColumnFamilyDescriptor

use of org.rocksdb.ColumnFamilyDescriptor in project flink by apache.

the class RocksDBIncrementalRestoreOperation method createColumnFamilyDescriptors.

/**
 * This method recreates and registers all {@link ColumnFamilyDescriptor} from Flink's state
 * meta data snapshot.
 */
private List<ColumnFamilyDescriptor> createColumnFamilyDescriptors(List<StateMetaInfoSnapshot> stateMetaInfoSnapshots, boolean registerTtlCompactFilter) {
    List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>(stateMetaInfoSnapshots.size());
    for (StateMetaInfoSnapshot stateMetaInfoSnapshot : stateMetaInfoSnapshots) {
        RegisteredStateMetaInfoBase metaInfoBase = RegisteredStateMetaInfoBase.fromMetaInfoSnapshot(stateMetaInfoSnapshot);
        ColumnFamilyDescriptor columnFamilyDescriptor = RocksDBOperationUtils.createColumnFamilyDescriptor(metaInfoBase, this.rocksHandle.getColumnFamilyOptionsFactory(), registerTtlCompactFilter ? this.rocksHandle.getTtlCompactFiltersManager() : null, this.rocksHandle.getWriteBufferManagerCapacity());
        columnFamilyDescriptors.add(columnFamilyDescriptor);
    }
    return columnFamilyDescriptors;
}
Also used : RegisteredStateMetaInfoBase(org.apache.flink.runtime.state.RegisteredStateMetaInfoBase) ArrayList(java.util.ArrayList) StateMetaInfoSnapshot(org.apache.flink.runtime.state.metainfo.StateMetaInfoSnapshot) ColumnFamilyDescriptor(org.rocksdb.ColumnFamilyDescriptor)

Aggregations

ColumnFamilyDescriptor (org.rocksdb.ColumnFamilyDescriptor)29 ColumnFamilyHandle (org.rocksdb.ColumnFamilyHandle)25 ArrayList (java.util.ArrayList)16 RocksDB (org.rocksdb.RocksDB)13 RocksDBException (org.rocksdb.RocksDBException)11 DBOptions (org.rocksdb.DBOptions)10 ColumnFamilyOptions (org.rocksdb.ColumnFamilyOptions)9 File (java.io.File)7 WriteOptions (org.rocksdb.WriteOptions)5 IOException (java.io.IOException)3 Map (java.util.Map)3 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 Test (org.junit.Test)3 ReadOptions (org.rocksdb.ReadOptions)3 DataOutputStream (java.io.DataOutputStream)2 ByteBuffer (java.nio.ByteBuffer)2 Path (java.nio.file.Path)2 Random (java.util.Random)2 ByteArrayOutputStreamWithPos (org.apache.flink.core.memory.ByteArrayOutputStreamWithPos)2 RegisteredStateMetaInfoBase (org.apache.flink.runtime.state.RegisteredStateMetaInfoBase)2