use of voldemort.versioning.ClockEntry in project voldemort by voldemort.
the class ClusterForkLiftToolTest method testPrimaryResolvingForkLift.
@Test
public void testPrimaryResolvingForkLift() throws Exception {
StoreRoutingPlan srcStoreInstance = new StoreRoutingPlan(srcCluster, primaryResolvingStoreDef);
// populate data on the source cluster..
for (Map.Entry<String, String> entry : kvPairs.entrySet()) {
srcPrimaryResolvingStoreClient.put(entry.getKey(), entry.getValue());
}
// generate a conflict on the master partition
int masterNode = srcStoreInstance.getNodeIdForPartitionId(srcStoreInstance.getMasterPartitionId(conflictKey.getBytes("UTF-8")));
VectorClock losingClock = new VectorClock(Lists.newArrayList(new ClockEntry((short) 0, 5)), System.currentTimeMillis());
VectorClock winningClock = new VectorClock(Lists.newArrayList(new ClockEntry((short) 1, 5)), losingClock.getTimestamp() + 1);
srcAdminClient.storeOps.putNodeKeyValue(PRIMARY_RESOLVING_STORE_NAME, new NodeValue<ByteArray, byte[]>(masterNode, new ByteArray(conflictKey.getBytes("UTF-8")), new Versioned<byte[]>("losing value".getBytes("UTF-8"), losingClock)));
srcAdminClient.storeOps.putNodeKeyValue(PRIMARY_RESOLVING_STORE_NAME, new NodeValue<ByteArray, byte[]>(masterNode, new ByteArray(conflictKey.getBytes("UTF-8")), new Versioned<byte[]>("winning value".getBytes("UTF-8"), winningClock)));
// do a write to destination cluster
dstPrimaryResolvingStoreClient.put(firstKey, "before forklift");
// perform the forklifting..
ClusterForkLiftTool forkLiftTool = new ClusterForkLiftTool(srcBootStrapUrl, dstBootStrapUrl, false, // ignoreSchemaMismatch
false, 10000, 1, 1000, Lists.newArrayList(PRIMARY_RESOLVING_STORE_NAME), null, ClusterForkLiftTool.ForkLiftTaskMode.primary_resolution);
forkLiftTool.run();
// do a write to destination cluster
dstPrimaryResolvingStoreClient.put(lastKey, "after forklift");
// verify data on the destination is as expected
for (Map.Entry<String, String> entry : kvPairs.entrySet()) {
if (entry.getKey().equals(firstKey)) {
assertEquals("Online write overwritten", dstPrimaryResolvingStoreClient.get(firstKey).getValue(), "before forklift");
} else if (entry.getKey().equals(lastKey)) {
assertEquals("can't update value after forklift", dstPrimaryResolvingStoreClient.get(lastKey).getValue(), "after forklift");
} else if (entry.getKey().equals(conflictKey)) {
assertEquals("Conflict resolution incorrect", dstPrimaryResolvingStoreClient.get(conflictKey).getValue(), "winning value");
} else {
assertEquals("fork lift data missing", dstPrimaryResolvingStoreClient.get(entry.getKey()).getValue(), entry.getValue());
}
}
}
use of voldemort.versioning.ClockEntry in project voldemort by voldemort.
the class ClusterForkLiftToolTest method testNoresolutionForkLift.
@Test
public void testNoresolutionForkLift() throws Exception {
int versions = 0;
StoreRoutingPlan srcStoreInstance = new StoreRoutingPlan(srcCluster, nonResolvingStoreDef);
// generate a conflict on the master partition
int masterNode = srcStoreInstance.getNodeIdForPartitionId(srcStoreInstance.getMasterPartitionId(conflictKey.getBytes("UTF-8")));
VectorClock losingClock = new VectorClock(Lists.newArrayList(new ClockEntry((short) 0, 5)), System.currentTimeMillis());
VectorClock winningClock = new VectorClock(Lists.newArrayList(new ClockEntry((short) 1, 5)), losingClock.getTimestamp() + 1);
srcAdminClient.storeOps.putNodeKeyValue(MULTIPLE_VERSIONS_STORE_NAME, new NodeValue<ByteArray, byte[]>(masterNode, new ByteArray(conflictKey.getBytes("UTF-8")), new Versioned<byte[]>("losing value".getBytes("UTF-8"), losingClock)));
srcAdminClient.storeOps.putNodeKeyValue(MULTIPLE_VERSIONS_STORE_NAME, new NodeValue<ByteArray, byte[]>(masterNode, new ByteArray(conflictKey.getBytes("UTF-8")), new Versioned<byte[]>("winning value".getBytes("UTF-8"), winningClock)));
// perform the forklifting..
ClusterForkLiftTool forkLiftTool = new ClusterForkLiftTool(srcBootStrapUrl, dstBootStrapUrl, false, // ignoreSchemaMismatch
false, 10000, 1, 1000, Lists.newArrayList(MULTIPLE_VERSIONS_STORE_NAME), null, ClusterForkLiftTool.ForkLiftTaskMode.no_resolution);
forkLiftTool.run();
AdminClient dstAdminClient = new AdminClient(dstBootStrapUrl);
for (Node node : dstAdminClient.getAdminClientCluster().getNodes()) {
Iterator<Pair<ByteArray, Versioned<byte[]>>> entryItr = dstAdminClient.bulkFetchOps.fetchEntries(node.getId(), MULTIPLE_VERSIONS_STORE_NAME, node.getPartitionIds(), null, true);
while (entryItr.hasNext()) {
Pair<ByteArray, Versioned<byte[]>> record = entryItr.next();
ByteArray key = record.getFirst();
Versioned<byte[]> versioned = record.getSecond();
versions++;
}
}
assertEquals("Both conflicting versions present", versions, 2);
}
use of voldemort.versioning.ClockEntry in project voldemort by voldemort.
the class VersionedPutPruneJob method pruneNonReplicaEntries.
/**
* Remove all non replica clock entries from the list of versioned values
* provided
*
* @param vals list of versioned values to prune replicas from
* @param keyReplicas list of current replicas for the given key
* @param didPrune flag to mark if we did actually prune something
* @return pruned list
*/
public static List<Versioned<byte[]>> pruneNonReplicaEntries(List<Versioned<byte[]>> vals, List<Integer> keyReplicas, MutableBoolean didPrune) {
List<Versioned<byte[]>> prunedVals = new ArrayList<Versioned<byte[]>>(vals.size());
for (Versioned<byte[]> val : vals) {
VectorClock clock = (VectorClock) val.getVersion();
List<ClockEntry> clockEntries = new ArrayList<ClockEntry>();
for (ClockEntry clockEntry : clock.getEntries()) {
if (keyReplicas.contains((int) clockEntry.getNodeId())) {
clockEntries.add(clockEntry);
} else {
didPrune.setValue(true);
}
}
prunedVals.add(new Versioned<byte[]>(val.getValue(), new VectorClock(clockEntries, clock.getTimestamp())));
}
return prunedVals;
}
use of voldemort.versioning.ClockEntry in project voldemort by voldemort.
the class AbstractZonedRebalanceTest method testProxyPutDuringRebalancing.
@Test(timeout = 600000)
public void testProxyPutDuringRebalancing() throws Exception {
logger.info("Starting testProxyPutDuringRebalancing");
try {
Cluster currentCluster = ServerTestUtils.getLocalZonedCluster(6, 2, new int[] { 0, 0, 0, 1, 1, 1 }, new int[][] { { 0 }, { 1, 6 }, { 2 }, { 3 }, { 4, 7 }, { 5 } });
Cluster finalCluster = UpdateClusterUtils.createUpdatedCluster(currentCluster, 2, Lists.newArrayList(7));
finalCluster = UpdateClusterUtils.createUpdatedCluster(finalCluster, 5, Lists.newArrayList(6));
/**
* Original partition map
*
* [s0 : p0] [s1 : p1, p6] [s2 : p2]
*
* [s3 : p3] [s4 : p4, p7] [s5 : p5]
*
* final server partition ownership
*
* [s0 : p0] [s1 : p1] [s2 : p2, p7]
*
* [s3 : p3] [s4 : p4] [s5 : p5, p6]
*
* Note that rwStoreDefFileWithReplication is a "2/1/1" store def.
*
* Original server n-ary partition ownership
*
* [s0 : p0, p3-7] [s1 : p0-p7] [s2 : p1-2]
*
* [s3 : p0-3, p6-7] [s4 : p0-p7] [s5 : p4-5]
*
* final server n-ary partition ownership
*
* [s0 : p0, p2-7] [s1 : p0-1] [s2 : p1-p7]
*
* [s3 : p0-3, p5-7] [s4 : p0-4, p7] [s5 : p4-6]
*/
List<Integer> serverList = Arrays.asList(0, 1, 2, 3, 4, 5);
Map<String, String> configProps = new HashMap<String, String>();
configProps.put("admin.max.threads", "5");
final Cluster updatedCurrentCluster = startServers(currentCluster, rwStoreDefFileWithReplication, serverList, configProps);
ExecutorService executors = Executors.newFixedThreadPool(2);
final AtomicBoolean rebalancingComplete = new AtomicBoolean(false);
final List<Exception> exceptions = Collections.synchronizedList(new ArrayList<Exception>());
// Its is imperative that we test in a single shot since multiple
// batches would mean the proxy bridges being torn down and
// established multiple times and we cannot test against the source
// cluster topology then. getRebalanceKit uses batch size of
// infinite, so this should be fine.
String bootstrapUrl = getBootstrapUrl(updatedCurrentCluster, 0);
int maxParallel = 2;
final ClusterTestUtils.RebalanceKit rebalanceKit = ClusterTestUtils.getRebalanceKit(bootstrapUrl, maxParallel, finalCluster);
populateData(currentCluster, rwStoreDefWithReplication);
final AdminClient adminClient = rebalanceKit.controller.getAdminClient();
// the plan would cause these partitions to move:
// Partition : Donor -> stealer
//
// p2 (Z-SEC) : s1 -> s0
// p3-6 (Z-PRI) : s1 -> s2
// p7 (Z-PRI) : s0 -> s2
//
// p5 (Z-SEC): s4 -> s3
// p6 (Z-PRI): s4 -> s5
//
// :. rebalancing will run on servers 0, 2, 3, & 5
final List<ByteArray> movingKeysList = sampleKeysFromPartition(adminClient, 1, rwStoreDefWithReplication.getName(), Arrays.asList(6), 20);
assertTrue("Empty list of moving keys...", movingKeysList.size() > 0);
final AtomicBoolean rebalancingStarted = new AtomicBoolean(false);
final AtomicBoolean proxyWritesDone = new AtomicBoolean(false);
final HashMap<String, String> baselineTuples = new HashMap<String, String>(testEntries);
final HashMap<String, VectorClock> baselineVersions = new HashMap<String, VectorClock>();
for (String key : baselineTuples.keySet()) {
baselineVersions.put(key, new VectorClock());
}
final CountDownLatch latch = new CountDownLatch(2);
// start get operation.
executors.execute(new Runnable() {
@Override
public void run() {
SocketStoreClientFactory factory = null;
try {
// wait for the rebalancing to begin
List<VoldemortServer> serverList = Lists.newArrayList(serverMap.get(0), serverMap.get(2), serverMap.get(3), serverMap.get(5));
while (!rebalancingComplete.get()) {
Iterator<VoldemortServer> serverIterator = serverList.iterator();
while (serverIterator.hasNext()) {
VoldemortServer server = serverIterator.next();
if (ByteUtils.getString(server.getMetadataStore().get(MetadataStore.SERVER_STATE_KEY, null).get(0).getValue(), "UTF-8").compareTo(VoldemortState.REBALANCING_MASTER_SERVER.toString()) == 0) {
logger.info("Server " + server.getIdentityNode().getId() + " transitioned into REBALANCING MODE");
serverIterator.remove();
}
}
if (serverList.size() == 0) {
rebalancingStarted.set(true);
break;
}
}
if (rebalancingStarted.get()) {
factory = new SocketStoreClientFactory(new ClientConfig().setBootstrapUrls(getBootstrapUrl(updatedCurrentCluster, 0)).setEnableLazy(false).setSocketTimeout(120, TimeUnit.SECONDS).setClientZoneId(1));
final StoreClient<String, String> storeClientRW = new DefaultStoreClient<String, String>(testStoreNameRW, null, factory, 3);
// now with zero vector clock
for (ByteArray movingKey : movingKeysList) {
try {
String keyStr = ByteUtils.getString(movingKey.get(), "UTF-8");
String valStr = "proxy_write";
storeClientRW.put(keyStr, valStr);
baselineTuples.put(keyStr, valStr);
// all these keys will have [5:1] vector
// clock is node 5 is the new pseudo master
baselineVersions.get(keyStr).incrementVersion(5, System.currentTimeMillis());
proxyWritesDone.set(true);
if (rebalancingComplete.get()) {
break;
}
} catch (InvalidMetadataException e) {
// let this go
logger.error("Encountered an invalid metadata exception.. ", e);
}
}
}
} catch (Exception e) {
logger.error("Exception in proxy write thread..", e);
exceptions.add(e);
} finally {
if (factory != null)
factory.close();
latch.countDown();
}
}
});
executors.execute(new Runnable() {
@Override
public void run() {
try {
rebalanceKit.rebalance();
} catch (Exception e) {
logger.error("Error in rebalancing... ", e);
exceptions.add(e);
} finally {
rebalancingComplete.set(true);
latch.countDown();
}
}
});
latch.await();
executors.shutdown();
executors.awaitTermination(300, TimeUnit.SECONDS);
assertEquals("Client did not see all server transition into rebalancing state", rebalancingStarted.get(), true);
assertEquals("Not enough time to begin proxy writing", proxyWritesDone.get(), true);
checkEntriesPostRebalance(updatedCurrentCluster, finalCluster, Lists.newArrayList(rwStoreDefWithReplication), Arrays.asList(0, 1, 2, 3, 4, 5), baselineTuples, baselineVersions);
checkConsistentMetadata(finalCluster, serverList);
// check No Exception
if (exceptions.size() > 0) {
for (Exception e : exceptions) {
e.printStackTrace();
}
fail("Should not see any exceptions.");
}
// check that the proxy writes were made to the original donor, node
// 1
List<ClockEntry> clockEntries = new ArrayList<ClockEntry>(serverList.size());
for (Integer nodeid : serverList) clockEntries.add(new ClockEntry(nodeid.shortValue(), System.currentTimeMillis()));
VectorClock clusterXmlClock = new VectorClock(clockEntries, System.currentTimeMillis());
for (Integer nodeid : serverList) adminClient.metadataMgmtOps.updateRemoteCluster(nodeid, currentCluster, clusterXmlClock);
adminClient.setAdminClientCluster(currentCluster);
checkForTupleEquivalence(adminClient, 1, testStoreNameRW, movingKeysList, baselineTuples, baselineVersions);
// stop servers
try {
stopServer(serverList);
} catch (Exception e) {
throw new RuntimeException(e);
}
} catch (AssertionError ae) {
logger.error("Assertion broken in testProxyPutDuringRebalancing ", ae);
throw ae;
}
}
use of voldemort.versioning.ClockEntry in project voldemort by voldemort.
the class AbstractNonZonedRebalanceTest method testProxyPutDuringRebalancing.
@Test(timeout = 600000)
public void testProxyPutDuringRebalancing() throws Exception {
logger.info("Starting testProxyPutDuringRebalancing");
try {
Cluster currentCluster = ServerTestUtils.getLocalCluster(3, new int[][] { { 0 }, { 1, 3 }, { 2 } });
Cluster finalCluster = UpdateClusterUtils.createUpdatedCluster(currentCluster, 2, Lists.newArrayList(3));
// start servers 0,1,2 only
final List<Integer> serverList = Arrays.asList(0, 1, 2);
Map<String, String> configProps = new HashMap<String, String>();
configProps.put("admin.max.threads", "5");
final Cluster updatedCurrentCluster = startServers(currentCluster, rwStoreDefFileWithReplication, serverList, configProps);
ExecutorService executors = Executors.newFixedThreadPool(2);
final AtomicBoolean rebalancingComplete = new AtomicBoolean(false);
final List<Exception> exceptions = Collections.synchronizedList(new ArrayList<Exception>());
// Its is imperative that we test in a single shot since multiple
// batches would mean the proxy bridges being torn down and
// established multiple times and we cannot test against the source
// cluster topology then.
String bootstrapUrl = getBootstrapUrl(currentCluster, 0);
int maxParallel = 2;
final ClusterTestUtils.RebalanceKit rebalanceKit = ClusterTestUtils.getRebalanceKit(bootstrapUrl, maxParallel, finalCluster);
populateData(updatedCurrentCluster, rwStoreDefWithReplication, rebalanceKit.controller.getAdminClient(), false);
final AdminClient adminClient = rebalanceKit.controller.getAdminClient();
// the plan would cause these partitions to move
// Partition : Donor -> Stealer
// p2 (SEC) : s1 -> s0
// p3 (PRI) : s1 -> s2
final List<ByteArray> movingKeysList = sampleKeysFromPartition(adminClient, 1, rwStoreDefWithReplication.getName(), Arrays.asList(2, 3), 20);
assertTrue("Empty list of moving keys...", movingKeysList.size() > 0);
final AtomicBoolean rebalancingStarted = new AtomicBoolean(false);
final AtomicBoolean proxyWritesDone = new AtomicBoolean(false);
final HashMap<String, String> baselineTuples = new HashMap<String, String>(testEntries);
final HashMap<String, VectorClock> baselineVersions = new HashMap<String, VectorClock>();
for (String key : baselineTuples.keySet()) {
baselineVersions.put(key, new VectorClock());
}
final CountDownLatch latch = new CountDownLatch(2);
// start get operation.
executors.execute(new Runnable() {
@Override
public void run() {
SocketStoreClientFactory factory = null;
try {
// wait for the rebalancing to begin.
List<VoldemortServer> serverList = Lists.newArrayList(serverMap.get(0), serverMap.get(2));
while (!rebalancingComplete.get()) {
Iterator<VoldemortServer> serverIterator = serverList.iterator();
while (serverIterator.hasNext()) {
VoldemortServer server = serverIterator.next();
if (ByteUtils.getString(server.getMetadataStore().get(MetadataStore.SERVER_STATE_KEY, null).get(0).getValue(), "UTF-8").compareTo(VoldemortState.REBALANCING_MASTER_SERVER.toString()) == 0) {
logger.info("Server " + server.getIdentityNode().getId() + " transitioned into REBALANCING MODE");
serverIterator.remove();
}
}
if (serverList.size() == 0) {
rebalancingStarted.set(true);
break;
}
}
if (!rebalancingComplete.get()) {
factory = new SocketStoreClientFactory(new ClientConfig().setBootstrapUrls(getBootstrapUrl(updatedCurrentCluster, 0)).setEnableLazy(false).setSocketTimeout(120, TimeUnit.SECONDS));
final StoreClient<String, String> storeClientRW = new DefaultStoreClient<String, String>(testStoreNameRW, null, factory, 3);
// zero vector clock
for (ByteArray movingKey : movingKeysList) {
try {
if (rebalancingComplete.get()) {
break;
}
String keyStr = ByteUtils.getString(movingKey.get(), "UTF-8");
String valStr = "proxy_write";
storeClientRW.put(keyStr, valStr);
baselineTuples.put(keyStr, valStr);
// all these keys will have [2:1] vector
// clock
// is node 2 is the pseudo master in both
// moves
baselineVersions.get(keyStr).incrementVersion(2, System.currentTimeMillis());
proxyWritesDone.set(true);
} catch (InvalidMetadataException e) {
// let this go
logger.error("Encountered an invalid metadata exception.. ", e);
}
}
}
} catch (Exception e) {
logger.error("Exception in proxy put thread", e);
exceptions.add(e);
} finally {
if (factory != null)
factory.close();
latch.countDown();
}
}
});
executors.execute(new Runnable() {
@Override
public void run() {
try {
rebalanceKit.rebalance();
} catch (Exception e) {
logger.error("Error in rebalancing... ", e);
exceptions.add(e);
} finally {
rebalancingComplete.set(true);
latch.countDown();
}
}
});
latch.await();
executors.shutdown();
executors.awaitTermination(300, TimeUnit.SECONDS);
assertEquals("Client did not see all server transition into rebalancing state", rebalancingStarted.get(), true);
assertEquals("Not enough time to begin proxy writing", proxyWritesDone.get(), true);
checkEntriesPostRebalance(updatedCurrentCluster, finalCluster, Lists.newArrayList(rwStoreDefWithReplication), Arrays.asList(0, 1, 2), baselineTuples, baselineVersions);
checkConsistentMetadata(finalCluster, serverList);
// check No Exception
if (exceptions.size() > 0) {
for (Exception e : exceptions) {
e.printStackTrace();
}
fail("Should not see any exceptions.");
}
// check that the proxy writes were made to the original donor, node
// 1
List<ClockEntry> clockEntries = new ArrayList<ClockEntry>(serverList.size());
for (Integer nodeid : serverList) clockEntries.add(new ClockEntry(nodeid.shortValue(), System.currentTimeMillis()));
VectorClock clusterXmlClock = new VectorClock(clockEntries, System.currentTimeMillis());
for (Integer nodeid : serverList) adminClient.metadataMgmtOps.updateRemoteCluster(nodeid, currentCluster, clusterXmlClock);
adminClient.setAdminClientCluster(currentCluster);
checkForTupleEquivalence(adminClient, 1, testStoreNameRW, movingKeysList, baselineTuples, baselineVersions);
// stop servers
try {
stopServer(serverList);
} catch (Exception e) {
throw new RuntimeException(e);
}
} catch (AssertionError ae) {
logger.error("Assertion broken in testProxyPutDuringRebalancing ", ae);
throw ae;
}
}
Aggregations