use of voldemort.routing.StoreRoutingPlan in project voldemort by voldemort.
the class ClusterForkLiftToolTest method testForkLiftOverWrite.
@Test
public void testForkLiftOverWrite() throws Exception {
StoreRoutingPlan srcStoreInstance = new StoreRoutingPlan(srcCluster, globallyResolvingStoreDef);
// populate data on the source cluster..
for (Map.Entry<String, String> entry : kvPairs.entrySet()) {
srcGloballyResolvingStoreClient.put(entry.getKey(), entry.getValue());
}
// generate a conflict on the primary and a secondary
List<Integer> nodeList = srcStoreInstance.getReplicationNodeList(srcStoreInstance.getMasterPartitionId(conflictKey.getBytes("UTF-8")));
VectorClock losingClock = new VectorClock(Lists.newArrayList(new ClockEntry((short) 0, 5)), System.currentTimeMillis());
VectorClock winningClock = new VectorClock(Lists.newArrayList(new ClockEntry((short) 1, 5)), losingClock.getTimestamp() + 1);
srcAdminClient.storeOps.putNodeKeyValue(GLOBALLY_RESOLVING_STORE_NAME, new NodeValue<ByteArray, byte[]>(nodeList.get(0), new ByteArray(conflictKey.getBytes("UTF-8")), new Versioned<byte[]>("losing value".getBytes("UTF-8"), losingClock)));
srcAdminClient.storeOps.putNodeKeyValue(GLOBALLY_RESOLVING_STORE_NAME, new NodeValue<ByteArray, byte[]>(nodeList.get(1), new ByteArray(conflictKey.getBytes("UTF-8")), new Versioned<byte[]>("winning value".getBytes("UTF-8"), winningClock)));
// *** do a write to destination cluster ***
// This is the main test , where after fork lift this value should be
// overwritten. This is the only difference.
dstGloballyResolvingStoreClient.put(firstKey, "before forklift");
// Make the current thread sleep , so when the new clock is generated
// using milliSeconds it is greater and must be overwritten.
Thread.sleep(2);
// perform the forklifting..
ClusterForkLiftTool forkLiftTool = new ClusterForkLiftTool(srcBootStrapUrl, dstBootStrapUrl, // OverWrite
true, // ignore
false, // mismatch
10000, 1, 1000, Lists.newArrayList(GLOBALLY_RESOLVING_STORE_NAME), null, ClusterForkLiftTool.ForkLiftTaskMode.global_resolution);
forkLiftTool.run();
// do a write to destination cluster
dstGloballyResolvingStoreClient.put(lastKey, "after forklift");
// verify data on the destination is as expected
for (Map.Entry<String, String> entry : kvPairs.entrySet()) {
String dstClusterValue = dstGloballyResolvingStoreClient.get(entry.getKey()).getValue();
if (entry.getKey().equals(lastKey)) {
assertEquals("can't update value after forklift", dstClusterValue, "after forklift");
} else if (entry.getKey().equals(conflictKey)) {
assertEquals("Conflict resolution incorrect", dstClusterValue, "winning value");
} else {
if (!dstClusterValue.equals(entry.getValue())) {
assertEquals("fork lift data missing", dstClusterValue, entry.getValue());
}
}
}
}
use of voldemort.routing.StoreRoutingPlan in project voldemort by voldemort.
the class ClusterForkLiftToolTest method testGloballyResolvingForkLift.
@Test
public void testGloballyResolvingForkLift() throws Exception {
StoreRoutingPlan srcStoreInstance = new StoreRoutingPlan(srcCluster, globallyResolvingStoreDef);
// populate data on the source cluster..
for (Map.Entry<String, String> entry : kvPairs.entrySet()) {
srcGloballyResolvingStoreClient.put(entry.getKey(), entry.getValue());
}
// generate a conflict on the primary and a secondary
List<Integer> nodeList = srcStoreInstance.getReplicationNodeList(srcStoreInstance.getMasterPartitionId(conflictKey.getBytes("UTF-8")));
VectorClock losingClock = new VectorClock(Lists.newArrayList(new ClockEntry((short) 0, 5)), System.currentTimeMillis());
VectorClock winningClock = new VectorClock(Lists.newArrayList(new ClockEntry((short) 1, 5)), losingClock.getTimestamp() + 1);
srcAdminClient.storeOps.putNodeKeyValue(GLOBALLY_RESOLVING_STORE_NAME, new NodeValue<ByteArray, byte[]>(nodeList.get(0), new ByteArray(conflictKey.getBytes("UTF-8")), new Versioned<byte[]>("losing value".getBytes("UTF-8"), losingClock)));
srcAdminClient.storeOps.putNodeKeyValue(GLOBALLY_RESOLVING_STORE_NAME, new NodeValue<ByteArray, byte[]>(nodeList.get(1), new ByteArray(conflictKey.getBytes("UTF-8")), new Versioned<byte[]>("winning value".getBytes("UTF-8"), winningClock)));
// do a write to destination cluster
dstGloballyResolvingStoreClient.put(firstKey, "before forklift");
// perform the forklifting..
ClusterForkLiftTool forkLiftTool = new ClusterForkLiftTool(srcBootStrapUrl, dstBootStrapUrl, false, // ignoreSchemaMismatch
false, 10000, 1, 1000, Lists.newArrayList(GLOBALLY_RESOLVING_STORE_NAME), null, ClusterForkLiftTool.ForkLiftTaskMode.global_resolution);
forkLiftTool.run();
// do a write to destination cluster
dstGloballyResolvingStoreClient.put(lastKey, "after forklift");
// verify data on the destination is as expected
for (Map.Entry<String, String> entry : kvPairs.entrySet()) {
if (entry.getKey().equals(firstKey)) {
assertEquals("Online write overwritten", dstGloballyResolvingStoreClient.get(firstKey).getValue(), "before forklift");
} else if (entry.getKey().equals(lastKey)) {
assertEquals("can't update value after forklift", dstGloballyResolvingStoreClient.get(lastKey).getValue(), "after forklift");
} else if (entry.getKey().equals(conflictKey)) {
assertEquals("Conflict resolution incorrect", dstGloballyResolvingStoreClient.get(conflictKey).getValue(), "winning value");
} else {
assertEquals("fork lift data missing", dstGloballyResolvingStoreClient.get(entry.getKey()).getValue(), entry.getValue());
}
}
}
use of voldemort.routing.StoreRoutingPlan in project voldemort by voldemort.
the class RepairJob method operate.
@Override
public void operate() throws Exception {
for (StoreDefinition storeDef : metadataStore.getStoreDefList()) {
if (isWritableStore(storeDef)) {
// Lets generate routing strategy for this storage engine
StoreRoutingPlan routingPlan = new StoreRoutingPlan(metadataStore.getCluster(), storeDef);
logger.info("Repairing store " + storeDef.getName());
StorageEngine<ByteArray, byte[], byte[]> engine = storeRepo.getStorageEngine(storeDef.getName());
iterator = engine.keys();
long itemsScanned = 0;
long numDeletedKeys = 0;
while (iterator.hasNext()) {
ByteArray key = iterator.next();
if (!routingPlan.checkKeyBelongsToNode(key.get(), metadataStore.getNodeId())) {
/**
* Blow away the entire key with all its versions..
*/
engine.delete(key, null);
numDeletedKeys = this.numKeysUpdatedThisRun.incrementAndGet();
}
itemsScanned = this.numKeysScannedThisRun.incrementAndGet();
throttler.maybeThrottle(1);
if (itemsScanned % STAT_RECORDS_INTERVAL == 0) {
logger.info("#Scanned:" + itemsScanned + " #Deleted:" + numDeletedKeys);
}
}
closeIterator(iterator);
logger.info("Completed store " + storeDef.getName() + " #Scanned:" + itemsScanned + " #Deleted:" + numDeletedKeys);
}
}
}
use of voldemort.routing.StoreRoutingPlan in project voldemort by voldemort.
the class AdminRebalanceTest method testRebalanceNodeRO.
@Test(timeout = 60000)
public void testRebalanceNodeRO() throws IOException {
try {
startFourNodeRO();
int numChunks = 5;
for (StoreDefinition storeDef : Lists.newArrayList(storeDef1, storeDef2)) {
buildROStore(storeDef, numChunks);
}
// Set into rebalancing state
for (RebalanceTaskInfo partitionPlan : plans) {
getServer(partitionPlan.getStealerId()).getMetadataStore().put(MetadataStore.SERVER_STATE_KEY, MetadataStore.VoldemortState.REBALANCING_MASTER_SERVER);
getServer(partitionPlan.getStealerId()).getMetadataStore().put(MetadataStore.REBALANCING_STEAL_INFO, new RebalancerState(Lists.newArrayList(RebalanceTaskInfo.create(partitionPlan.toJsonString()))));
getServer(partitionPlan.getStealerId()).getMetadataStore().put(MetadataStore.REBALANCING_SOURCE_CLUSTER_XML, partitionPlan.getInitialCluster());
}
// Actually run it
try {
for (RebalanceTaskInfo currentPlan : plans) {
int asyncId = adminClient.rebalanceOps.rebalanceNode(currentPlan);
assertNotSame("Got a valid rebalanceAsyncId", -1, asyncId);
getAdminClient().rpcOps.waitForCompletion(currentPlan.getStealerId(), asyncId, 300, TimeUnit.SECONDS);
// Test that plan has been removed from the list
assertFalse(getServer(currentPlan.getStealerId()).getMetadataStore().getRebalancerState().getAll().contains(currentPlan));
}
} catch (Exception e) {
e.printStackTrace();
fail("Should not throw any exceptions");
}
// Check if files have been copied
for (StoreDefinition storeDef : Lists.newArrayList(storeDef1, storeDef2)) {
String storeName = storeDef.getName();
for (RebalanceTaskInfo currentPlan : plans) {
MetadataStore metadataStore = getServer(currentPlan.getStealerId()).getMetadataStore();
int nodeId = metadataStore.getNodeId();
int zoneId = metadataStore.getCluster().getNodeById(nodeId).getZoneId();
StoreRoutingPlan storeRoutingPlan = new StoreRoutingPlan(metadataStore.getCluster(), storeDef);
File currentDir = new File(((ReadOnlyStorageEngine) getStore(currentPlan.getStealerId(), storeName)).getCurrentDirPath());
if (currentPlan.getPartitionStores().contains(storeDef.getName())) {
for (Integer partitionId : currentPlan.getStoreToPartitionIds().get(storeName)) {
int zoneNary = -1;
// it means we don't want to consider that partition.
try {
zoneNary = storeRoutingPlan.getZoneNaryForNodesPartition(zoneId, nodeId, partitionId);
} catch (VoldemortException ve) {
continue;
}
if (zoneNary < storeDef.getReplicationFactor()) {
for (int chunkId = 0; chunkId < numChunks; chunkId++) {
assertTrue(new File(currentDir, partitionId + "_" + zoneNary + "_" + chunkId + ".data").exists());
assertTrue(new File(currentDir, partitionId + "_" + zoneNary + "_" + chunkId + ".index").exists());
}
}
}
}
}
}
// All servers should be back to normal state
for (VoldemortServer server : servers) {
assertEquals(server.getMetadataStore().getRebalancerState(), new RebalancerState(new ArrayList<RebalanceTaskInfo>()));
assertEquals(server.getMetadataStore().getServerStateUnlocked(), MetadataStore.VoldemortState.NORMAL_SERVER);
}
// Test the "cluster + swap" changes
// Test 1) Fail some swap by adding a dummy store
servers[2].getMetadataStore().put(MetadataStore.STORES_KEY, Lists.newArrayList(storeDef1, storeDef2, new StoreDefinitionBuilder().setName("test3").setType(ReadOnlyStorageConfiguration.TYPE_NAME).setKeySerializer(new SerializerDefinition("string")).setValueSerializer(new SerializerDefinition("string")).setRoutingPolicy(RoutingTier.CLIENT).setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY).setReplicationFactor(2).setPreferredReads(1).setRequiredReads(1).setPreferredWrites(1).setRequiredWrites(1).build()));
try {
adminClient.rebalanceOps.rebalanceStateChange(currentCluster, finalCluster, servers[2].getMetadataStore().getStoreDefList(), servers[2].getMetadataStore().getStoreDefList(), plans, true, true, false, true, true);
fail("Should have thrown an exception since one node doesn't have the store");
} catch (VoldemortException e) {
}
servers[2].getMetadataStore().put(MetadataStore.STORES_KEY, Lists.newArrayList(storeDef1, storeDef2));
// Test that all servers are still using the old cluster and have
// swapped successfully
checkRO(currentCluster);
// Test 2) All passes scenario
adminClient.rebalanceOps.rebalanceStateChange(currentCluster, finalCluster, servers[2].getMetadataStore().getStoreDefList(), servers[2].getMetadataStore().getStoreDefList(), plans, true, true, false, true, true);
checkRO(finalCluster);
// mmap-ed. Should fail...
for (RebalanceTaskInfo partitionPlan : plans) {
getServer(partitionPlan.getStealerId()).getMetadataStore().put(MetadataStore.SERVER_STATE_KEY, MetadataStore.VoldemortState.REBALANCING_MASTER_SERVER);
getServer(partitionPlan.getStealerId()).getMetadataStore().put(MetadataStore.REBALANCING_STEAL_INFO, new RebalancerState(Lists.newArrayList(RebalanceTaskInfo.create(partitionPlan.toJsonString()))));
}
// Actually run it
try {
int asyncId = adminClient.rebalanceOps.rebalanceNode(plans.get(0));
getAdminClient().rpcOps.waitForCompletion(plans.get(0).getStealerId(), asyncId, 300, TimeUnit.SECONDS);
fail("Should throw an exception");
} catch (Exception e) {
}
} finally {
shutDown();
}
}
use of voldemort.routing.StoreRoutingPlan in project voldemort by voldemort.
the class RebalanceBatchPlan method constructBatchPlan.
/**
* Determine the batch plan and return it. The batch plan has the following
* properties:
*
* 1) A stealer node does not steal any partition-stores it already hosts.
*
* 2) Use current policy to decide which node to steal from: see getDonorId
* method.
*
* Currently, this batch plan avoids all unnecessary cross zone moves,
* distributes cross zone moves into new zones evenly across existing zones,
* and copies replicaFactor partition-stores into any new zone.
*
* @return the batch plan
*/
private List<RebalanceTaskInfo> constructBatchPlan() {
// Construct all store routing plans once.
HashMap<String, StoreRoutingPlan> currentStoreRoutingPlans = new HashMap<String, StoreRoutingPlan>();
for (StoreDefinition storeDef : currentStoreDefs) {
currentStoreRoutingPlans.put(storeDef.getName(), new StoreRoutingPlan(currentCluster, storeDef));
}
HashMap<String, StoreRoutingPlan> finalStoreRoutingPlans = new HashMap<String, StoreRoutingPlan>();
for (StoreDefinition storeDef : finalStoreDefs) {
finalStoreRoutingPlans.put(storeDef.getName(), new StoreRoutingPlan(finalCluster, storeDef));
}
RebalanceTaskInfoBuilder rpiBuilder = new RebalanceTaskInfoBuilder();
// For every node in the final cluster ...
for (Node stealerNode : finalCluster.getNodes()) {
int stealerZoneId = stealerNode.getZoneId();
int stealerNodeId = stealerNode.getId();
// Consider all store definitions ...
for (StoreDefinition storeDef : finalStoreDefs) {
StoreRoutingPlan currentSRP = currentStoreRoutingPlans.get(storeDef.getName());
StoreRoutingPlan finalSRP = finalStoreRoutingPlans.get(storeDef.getName());
for (int stealerPartitionId : finalSRP.getZoneNAryPartitionIds(stealerNodeId)) {
// Optimization for RW stores: Do not steal a partition-store you already host!
if (!storeDef.getType().equalsIgnoreCase(ReadOnlyStorageConfiguration.TYPE_NAME)) {
if (currentSRP.getReplicationNodeList(stealerPartitionId).contains(stealerNodeId)) {
continue;
}
}
// Determine which node to steal from. This will find node that hosts the appropriate zone-nary
int donorNodeId = getDonorId(currentSRP, finalSRP, stealerZoneId, stealerNodeId, stealerPartitionId);
// and stealerNodeId are the same, hence this check is needed for RO stores
if (donorNodeId != stealerNodeId) {
rpiBuilder.addPartitionStoreMove(stealerNodeId, donorNodeId, storeDef.getName(), stealerPartitionId);
}
}
}
}
return rpiBuilder.buildRebalanceTaskInfos();
}
Aggregations