use of voldemort.cluster.Cluster in project voldemort by voldemort.
the class StorageService method registerEngine.
/**
* Register the given engine with the storage repository
*
* @param engine Register the storage engine
* @param isReadOnly Boolean indicating if this store is read-only
* @param storeType The type of the store
* @param storeDef store definition for the store to be registered
*/
public void registerEngine(StorageEngine<ByteArray, byte[], byte[]> engine, boolean isReadOnly, String storeType, StoreDefinition storeDef) {
Cluster cluster = this.metadata.getCluster();
storeRepository.addStorageEngine(engine);
/* Now add any store wrappers that are enabled */
Store<ByteArray, byte[], byte[]> store = engine;
boolean isMetadata = store.getName().compareTo(MetadataStore.METADATA_STORE_NAME) == 0;
boolean isSlop = storeType.compareTo("slop") == 0;
boolean isView = storeType.compareTo(ViewStorageConfiguration.TYPE_NAME) == 0;
if (voldemortConfig.isVerboseLoggingEnabled())
store = new LoggingStore<ByteArray, byte[], byte[]>(store, cluster.getName(), SystemTime.INSTANCE);
if (!isSlop) {
if (!isReadOnly && !isMetadata && !isView) {
// wrap store to enforce retention policy
if (voldemortConfig.isEnforceRetentionPolicyOnRead() && storeDef != null) {
RetentionEnforcingStore retentionEnforcingStore = new RetentionEnforcingStore(store, storeDef, voldemortConfig.isDeleteExpiredValuesOnRead(), SystemTime.INSTANCE);
metadata.addMetadataStoreListener(store.getName(), retentionEnforcingStore);
store = retentionEnforcingStore;
}
if (voldemortConfig.isEnableRebalanceService()) {
ProxyPutStats proxyPutStats = new ProxyPutStats(aggregatedProxyPutStats);
if (voldemortConfig.isJmxEnabled()) {
JmxUtils.registerMbean(proxyPutStats, JmxUtils.createObjectName("voldemort.store.rebalancing", engine.getName() + "-proxy-puts"));
}
store = new RedirectingStore(store, metadata, storeRepository, failureDetector, storeFactory, proxyPutWorkerPool, proxyPutStats);
if (voldemortConfig.isJmxEnabled()) {
MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
ObjectName name = null;
if (this.voldemortConfig.isEnableJmxClusterName())
name = JmxUtils.createObjectName(cluster.getName() + "." + JmxUtils.getPackageName(RedirectingStore.class), store.getName());
else
name = JmxUtils.createObjectName(JmxUtils.getPackageName(RedirectingStore.class), store.getName());
synchronized (mbeanServer) {
if (mbeanServer.isRegistered(name))
JmxUtils.unregisterMbean(mbeanServer, name);
JmxUtils.registerMbean(mbeanServer, JmxUtils.createModelMBean(store), name);
}
}
}
}
if (voldemortConfig.isMetadataCheckingEnabled() && !isMetadata) {
store = new InvalidMetadataCheckingStore(metadata.getNodeId(), store, metadata);
}
}
if (voldemortConfig.isStatTrackingEnabled()) {
StatTrackingStore statStore = new StatTrackingStore(store, this.storeStats);
store = statStore;
if (voldemortConfig.isJmxEnabled()) {
MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
ObjectName name = null;
if (this.voldemortConfig.isEnableJmxClusterName())
name = JmxUtils.createObjectName(metadata.getCluster().getName() + "." + JmxUtils.getPackageName(store.getClass()), store.getName());
else
name = JmxUtils.createObjectName(JmxUtils.getPackageName(store.getClass()), store.getName());
synchronized (mbeanServer) {
if (mbeanServer.isRegistered(name))
JmxUtils.unregisterMbean(mbeanServer, name);
JmxUtils.registerMbean(mbeanServer, JmxUtils.createModelMBean(new StoreStatsJmx(statStore.getStats())), name);
}
}
// metadata store)
if (voldemortConfig.isEnableQuotaLimiting() && !isMetadata) {
StoreStats currentStoreStats = statStore.getStats();
FileBackedCachingStorageEngine quotaStore = (FileBackedCachingStorageEngine) storeRepository.getStorageEngine(SystemStoreConstants.SystemStoreName.voldsys$_store_quotas.toString());
QuotaLimitStats quotaStats = new QuotaLimitStats(this.aggregatedQuotaStats);
QuotaLimitingStore rateLimitingStore = new QuotaLimitingStore(store, currentStoreStats, quotaStats, quotaStore, metadata);
if (voldemortConfig.isJmxEnabled()) {
JmxUtils.registerMbean(quotaStats, JmxUtils.createObjectName("voldemort.store.quota", store.getName() + "-quota-limit-stats"));
}
store = rateLimitingStore;
}
}
storeRepository.addLocalStore(store);
}
use of voldemort.cluster.Cluster in project voldemort by voldemort.
the class BlockingSlopPusherJob method run.
/**
* Loop over entries in the slop table and attempt to push them to the
* deserving server
*/
public void run() {
// don't try to run slop pusher job when rebalancing
if (metadataStore.getServerStateUnlocked().equals(MetadataStore.VoldemortState.REBALANCING_MASTER_SERVER)) {
logger.error("Cannot run slop pusher job since Voldemort server is rebalancing");
return;
}
logger.info("Started blocking slop pusher job at " + new Date());
Cluster cluster = metadataStore.getCluster();
failureDetector.getConfig().setCluster(cluster);
Set<String> storeNames = StoreDefinitionUtils.getStoreNamesSet(metadataStore.getStoreDefList());
ClosableIterator<Pair<ByteArray, Versioned<Slop>>> iterator = null;
Map<Integer, Long> attemptedByNode = Maps.newHashMapWithExpectedSize(cluster.getNumberOfNodes());
Map<Integer, Long> succeededByNode = Maps.newHashMapWithExpectedSize(cluster.getNumberOfNodes());
long slopsPushed = 0L;
long attemptedPushes = 0L;
for (Node node : cluster.getNodes()) {
attemptedByNode.put(node.getId(), 0L);
succeededByNode.put(node.getId(), 0L);
}
acquireRepairPermit();
try {
SlopStorageEngine slopStorageEngine = storeRepo.getSlopStore();
StorageEngine<ByteArray, Slop, byte[]> slopStore = slopStorageEngine.asSlopStore();
EventThrottler throttler = new EventThrottler(maxWriteBytesPerSec);
iterator = slopStore.entries();
while (iterator.hasNext()) {
if (Thread.interrupted())
throw new InterruptedException("Slop pusher job cancelled");
try {
Pair<ByteArray, Versioned<Slop>> keyAndVal;
try {
keyAndVal = iterator.next();
} catch (Exception e) {
logger.error("Exception in iterator, escaping the loop ", e);
break;
}
Versioned<Slop> versioned = keyAndVal.getSecond();
Slop slop = versioned.getValue();
int nodeId = slop.getNodeId();
// check for dead slops
if (isSlopDead(cluster, storeNames, versioned.getValue())) {
handleDeadSlop(slopStorageEngine, keyAndVal);
// the next slop.
continue;
}
Node node = cluster.getNodeById(nodeId);
attemptedPushes++;
if (attemptedPushes % 10000 == 0) {
logger.info("Attempted pushing " + attemptedPushes + " slops");
}
Long attempted = attemptedByNode.get(nodeId);
attemptedByNode.put(nodeId, attempted + 1L);
if (failureDetector.isAvailable(node)) {
Store<ByteArray, byte[], byte[]> store = storeRepo.getNodeStore(slop.getStoreName(), node.getId());
Long startNs = System.nanoTime();
int nBytes = 0;
try {
nBytes = slop.getKey().length();
if (slop.getOperation() == Operation.PUT) {
store.put(slop.getKey(), new Versioned<byte[]>(slop.getValue(), versioned.getVersion()), slop.getTransforms());
nBytes += slop.getValue().length + ((VectorClock) versioned.getVersion()).sizeInBytes() + 1;
} else if (slop.getOperation() == Operation.DELETE) {
nBytes += ((VectorClock) versioned.getVersion()).sizeInBytes() + 1;
store.delete(slop.getKey(), versioned.getVersion());
} else {
logger.error("Unknown slop operation: " + slop.getOperation());
continue;
}
failureDetector.recordSuccess(node, deltaMs(startNs));
slopStore.delete(slop.makeKey(), versioned.getVersion());
slopsPushed++;
// Increment succeeded
Long succeeded = succeededByNode.get(nodeId);
succeededByNode.put(nodeId, succeeded + 1L);
// Throttle the bytes...
throttler.maybeThrottle(nBytes);
} catch (ObsoleteVersionException e) {
// okay it is old, just delete it
slopStore.delete(slop.makeKey(), versioned.getVersion());
slopsPushed++;
// Increment succeeded
Long succeeded = succeededByNode.get(nodeId);
succeededByNode.put(nodeId, succeeded + 1L);
// Throttle the bytes...
throttler.maybeThrottle(nBytes);
} catch (UnreachableStoreException e) {
failureDetector.recordException(node, deltaMs(startNs), e);
}
}
} catch (Exception e) {
logger.error(e, e);
}
}
// Only if we reached here do we update stats
logger.log(attemptedPushes > 0 ? Level.INFO : Level.DEBUG, "Attempted " + attemptedPushes + " hinted handoff pushes of which " + slopsPushed + " succeeded.");
Map<Integer, Long> outstanding = Maps.newHashMapWithExpectedSize(cluster.getNumberOfNodes());
for (int nodeId : succeededByNode.keySet()) {
outstanding.put(nodeId, attemptedByNode.get(nodeId) - succeededByNode.get(nodeId));
}
slopStorageEngine.resetStats(outstanding);
} catch (Exception e) {
logger.error(e, e);
} finally {
try {
if (iterator != null)
iterator.close();
} catch (Exception e) {
logger.error("Failed to close iterator.", e);
}
this.repairPermits.release(this.getClass().getCanonicalName());
}
}
use of voldemort.cluster.Cluster in project voldemort by voldemort.
the class AdminServiceRequestHandler method handleRebalanceStateChange.
public VAdminProto.RebalanceStateChangeResponse handleRebalanceStateChange(VAdminProto.RebalanceStateChangeRequest request) {
VAdminProto.RebalanceStateChangeResponse.Builder response = VAdminProto.RebalanceStateChangeResponse.newBuilder();
synchronized (rebalancer) {
try {
// Retrieve all values first
List<RebalanceTaskInfo> rebalanceTaskInfo = Lists.newArrayList();
for (RebalanceTaskInfoMap map : request.getRebalanceTaskListList()) {
rebalanceTaskInfo.add(ProtoUtils.decodeRebalanceTaskInfoMap(map));
}
Cluster cluster = new ClusterMapper().readCluster(new StringReader(request.getClusterString()));
List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new StringReader(request.getStoresString()));
boolean swapRO = request.getSwapRo();
boolean changeClusterMetadata = request.getChangeClusterMetadata();
boolean changeRebalanceState = request.getChangeRebalanceState();
boolean rollback = request.getRollback();
rebalancer.rebalanceStateChange(cluster, storeDefs, rebalanceTaskInfo, swapRO, changeClusterMetadata, changeRebalanceState, rollback);
} catch (VoldemortException e) {
response.setError(ProtoUtils.encodeError(errorCodeMapper, e));
logger.error("handleRebalanceStateChange failed for request(" + request.toString() + ")", e);
}
}
return response.build();
}
use of voldemort.cluster.Cluster in project voldemort by voldemort.
the class Rebalancer method rebalanceStateChange.
/**
* Support four different stages <br>
* For normal operation:
*
* <pre>
* | swapRO | changeClusterMetadata | changeRebalanceState | Order |
* | f | t | t | rebalance -> cluster |
* | f | f | t | rebalance |
* | t | t | f | cluster -> swap |
* | t | t | t | rebalance -> cluster -> swap|
* </pre>
*
* In general we need to do [ cluster change -> swap -> rebalance state
* change ]
*
* NOTE: The update of the cluster metadata and the rebalancer state is not
* "atomic". Ergo, there could theoretically be a race where a client picks
* up new cluster metadata sends a request based on that, but the proxy
* bridges have not been setup and we either miss a proxy put or return a
* null for get/getalls
*
* TODO:refactor The rollback logic here is too convoluted. Specifically,
* the independent updates to each key could be split up into their own
* methods.
*
* @param cluster Cluster metadata to change
* @param rebalanceTaskInfo List of rebalance partitions info
* @param swapRO Boolean to indicate swapping of RO store
* @param changeClusterAndStoresMetadata Boolean to indicate a change of
* cluster metadata
* @param changeRebalanceState Boolean to indicate a change in rebalance
* state
* @param rollback Boolean to indicate that we are rolling back or not
*/
public void rebalanceStateChange(Cluster cluster, List<StoreDefinition> storeDefs, List<RebalanceTaskInfo> rebalanceTaskInfo, boolean swapRO, boolean changeClusterAndStoresMetadata, boolean changeRebalanceState, boolean rollback) {
Cluster currentCluster = metadataStore.getCluster();
List<StoreDefinition> currentStoreDefs = metadataStore.getStoreDefList();
logger.info("Server doing rebalance state change with options [ cluster metadata change - " + changeClusterAndStoresMetadata + " ], [ changing rebalancing state - " + changeRebalanceState + " ], [ changing swapping RO - " + swapRO + " ], [ rollback - " + rollback + " ]");
// Variables to track what has completed
List<RebalanceTaskInfo> completedRebalanceTaskInfo = Lists.newArrayList();
List<String> swappedStoreNames = Lists.newArrayList();
boolean completedClusterAndStoresChange = false;
boolean completedRebalanceSourceClusterChange = false;
Cluster previousRebalancingSourceCluster = null;
List<StoreDefinition> previousRebalancingSourceStores = null;
try {
// CHANGE REBALANCING STATE
if (changeRebalanceState) {
try {
previousRebalancingSourceCluster = metadataStore.getRebalancingSourceCluster();
previousRebalancingSourceStores = metadataStore.getRebalancingSourceStores();
if (!rollback) {
// Save up the current cluster and stores def for
// Redirecting store
changeClusterAndStores(MetadataStore.REBALANCING_SOURCE_CLUSTER_XML, currentCluster, // for Redirecting store
MetadataStore.REBALANCING_SOURCE_STORES_XML, currentStoreDefs);
completedRebalanceSourceClusterChange = true;
for (RebalanceTaskInfo info : rebalanceTaskInfo) {
metadataStore.addRebalancingState(info);
completedRebalanceTaskInfo.add(info);
}
} else {
// Reset the rebalancing source cluster back to null
changeClusterAndStores(MetadataStore.REBALANCING_SOURCE_CLUSTER_XML, null, // stores back to null
MetadataStore.REBALANCING_SOURCE_STORES_XML, null);
completedRebalanceSourceClusterChange = true;
for (RebalanceTaskInfo info : rebalanceTaskInfo) {
metadataStore.deleteRebalancingState(info);
completedRebalanceTaskInfo.add(info);
}
}
} catch (Exception e) {
throw new VoldemortException(e);
}
}
// CHANGE CLUSTER METADATA AND STORE METADATA
if (changeClusterAndStoresMetadata) {
logger.info("Switching cluster metadata from " + currentCluster + " to " + cluster);
logger.info("Switching stores metadata from " + currentStoreDefs + " to " + storeDefs);
changeClusterAndStores(MetadataStore.CLUSTER_KEY, cluster, MetadataStore.STORES_KEY, storeDefs);
completedClusterAndStoresChange = true;
}
// SWAP RO DATA FOR ALL STORES
if (swapRO) {
swapROStores(swappedStoreNames, false);
}
} catch (VoldemortException e) {
logger.error("Got exception while changing state, now rolling back changes", e);
// ROLLBACK CLUSTER AND STORES CHANGE
if (completedClusterAndStoresChange) {
try {
logger.info("Rolling back cluster.xml to " + currentCluster);
logger.info("Rolling back stores.xml to " + currentStoreDefs);
changeClusterAndStores(MetadataStore.CLUSTER_KEY, currentCluster, MetadataStore.STORES_KEY, currentStoreDefs);
} catch (Exception exception) {
logger.error("Error while rolling back cluster metadata to " + currentCluster + " Stores metadata to " + currentStoreDefs, exception);
}
}
// SWAP RO DATA FOR ALL COMPLETED STORES
if (swappedStoreNames.size() > 0) {
try {
swapROStores(swappedStoreNames, true);
} catch (Exception exception) {
logger.error("Error while swapping back to old state ", exception);
}
}
// CHANGE BACK ALL REBALANCING STATES FOR COMPLETED ONES
if (completedRebalanceTaskInfo.size() > 0) {
if (!rollback) {
for (RebalanceTaskInfo info : completedRebalanceTaskInfo) {
try {
metadataStore.deleteRebalancingState(info);
} catch (Exception exception) {
logger.error("Error while deleting back rebalance info during error rollback " + info, exception);
}
}
} else {
for (RebalanceTaskInfo info : completedRebalanceTaskInfo) {
try {
metadataStore.addRebalancingState(info);
} catch (Exception exception) {
logger.error("Error while adding back rebalance info during error rollback " + info, exception);
}
}
}
}
// REBALANCING_SOURCE_STORES_XML
if (completedRebalanceSourceClusterChange) {
logger.info("Reverting the REBALANCING_SOURCE_CLUSTER_XML back to " + previousRebalancingSourceCluster);
logger.info("Reverting the REBALANCING_SOURCE_STORES_XML back to " + previousRebalancingSourceStores);
changeClusterAndStores(MetadataStore.REBALANCING_SOURCE_CLUSTER_XML, previousRebalancingSourceCluster, MetadataStore.REBALANCING_SOURCE_STORES_XML, previousRebalancingSourceStores);
}
throw e;
}
}
use of voldemort.cluster.Cluster in project voldemort by voldemort.
the class AbstractNonZonedRebalanceTest method testRebalanceCleanPrimary.
@Test(timeout = 600000)
public void testRebalanceCleanPrimary() throws Exception {
logger.info("Starting testRebalanceCleanPrimary");
try {
Cluster currentCluster = ServerTestUtils.getLocalCluster(3, new int[][] { { 0 }, { 1, 3 }, { 2 } });
Cluster finalCluster = UpdateClusterUtils.createUpdatedCluster(currentCluster, 2, Lists.newArrayList(3));
// start servers 0 , 1, 2
Map<String, String> configProps = new HashMap<String, String>();
configProps.put("enable.repair", "true");
List<Integer> serverList = Arrays.asList(0, 1, 2);
currentCluster = startServers(currentCluster, rwStoreDefFileWithReplication, serverList, configProps);
String bootstrapUrl = getBootstrapUrl(currentCluster, 0);
final ClusterTestUtils.RebalanceKit rebalanceKit = ClusterTestUtils.getRebalanceKit(bootstrapUrl, finalCluster);
try {
AdminClient adminClient = rebalanceKit.controller.getAdminClient();
populateData(currentCluster, rwStoreDefWithReplication, adminClient, false);
// Figure out the positive keys to check
List<ByteArray> positiveTestKeyList = sampleKeysFromPartition(adminClient, 1, rwStoreDefWithReplication.getName(), Arrays.asList(1), 20);
rebalanceAndCheck(rebalanceKit.plan, rebalanceKit.controller, Arrays.asList(0, 1, 2));
checkConsistentMetadata(finalCluster, serverList);
// Do the cleanup operation
for (int i = 0; i < 3; i++) {
adminClient.storeMntOps.repairJob(i);
}
// wait for the repairs to complete
for (int i = 0; i < 3; i++) {
ServerTestUtils.waitForAsyncOperationOnServer(serverMap.get(i), "Repair", 5000);
}
// do the positive tests
checkForKeyExistence(adminClient, 1, rwStoreDefWithReplication.getName(), positiveTestKeyList);
logger.info("[Primary] Successful clean after Rebalancing");
} finally {
// stop servers
stopServer(serverList);
}
} catch (AssertionError ae) {
logger.error("Assertion broken in testRebalanceCleanPrimary ", ae);
throw ae;
}
}
Aggregations