use of voldemort.versioning.VectorClock in project voldemort by voldemort.
the class ThreadPoolRoutedStore method put.
@Override
public void put(final ByteArray key, final Versioned<byte[]> versioned, final byte[] transforms) throws VoldemortException {
long startNs = System.nanoTime();
StoreUtils.assertValidKey(key);
final List<Node> nodes = availableNodes(routingStrategy.routeRequest(key.get()));
// quickly fail if there aren't enough nodes to meet the requirement
final int numNodes = nodes.size();
if (numNodes < this.storeDef.getRequiredWrites())
throw new InsufficientOperationalNodesException("Only " + numNodes + " nodes in preference list, but " + this.storeDef.getRequiredWrites() + " writes required.");
// A count of the number of successful operations
final AtomicInteger successes = new AtomicInteger(0);
// A list of thrown exceptions, indicating the number of failures
final List<Exception> failures = new CopyOnWriteArrayList<Exception>();
// If requiredWrites > 0 then do a single blocking write to the first
// live node in the preference list if this node throws an
// ObsoleteVersionException allow it to propagate
Node master = null;
int currentNode = 0;
Versioned<byte[]> versionedCopy = null;
for (; currentNode < numNodes; currentNode++) {
Node current = nodes.get(currentNode);
long startNsLocal = System.nanoTime();
try {
versionedCopy = incremented(versioned, current.getId());
innerStores.get(current.getId()).put(key, versionedCopy, transforms);
successes.getAndIncrement();
recordSuccess(current, startNsLocal);
master = current;
break;
} catch (UnreachableStoreException e) {
recordException(current, startNsLocal, e);
failures.add(e);
} catch (VoldemortApplicationException e) {
throw e;
} catch (Exception e) {
failures.add(e);
}
}
if (successes.get() < 1)
throw new InsufficientOperationalNodesException("No master node succeeded!", failures);
else
currentNode++;
// A semaphore indicating the number of completed operations
// Once inititialized all permits are acquired, after that
// permits are released when an operation is completed.
// semaphore.acquire(n) waits for n operations to complete
final Versioned<byte[]> finalVersionedCopy = versionedCopy;
final Semaphore semaphore = new Semaphore(0, false);
// Add the operations to the pool
int attempts = 0;
for (; currentNode < numNodes; currentNode++) {
attempts++;
final Node node = nodes.get(currentNode);
this.executor.execute(new Runnable() {
@Override
public void run() {
long startNsLocal = System.nanoTime();
try {
innerStores.get(node.getId()).put(key, finalVersionedCopy, transforms);
successes.incrementAndGet();
recordSuccess(node, startNsLocal);
} catch (UnreachableStoreException e) {
recordException(node, startNsLocal, e);
failures.add(e);
} catch (ObsoleteVersionException e) {
// ignore this completely here
// this means that a higher version was able
// to write on this node and should be termed as clean
// success.
} catch (VoldemortApplicationException e) {
throw e;
} catch (Exception e) {
logger.warn("Error in PUT on node " + node.getId() + "(" + node.getHost() + ")", e);
failures.add(e);
} finally {
// signal that the operation is complete
semaphore.release();
}
}
});
}
// Block until we get enough completions
int blockCount = Math.min(storeDef.getPreferredWrites() - 1, attempts);
boolean noTimeout = blockOnPut(startNs, semaphore, 0, blockCount, successes, storeDef.getPreferredWrites());
if (successes.get() < storeDef.getRequiredWrites()) {
/*
* We don't have enough required writes, but we haven't timed out
* yet, so block a little more if there are healthy nodes that can
* help us achieve our target.
*/
if (noTimeout) {
int startingIndex = blockCount - 1;
blockCount = Math.max(storeDef.getPreferredWrites() - 1, attempts);
blockOnPut(startNs, semaphore, startingIndex, blockCount, successes, storeDef.getRequiredWrites());
}
if (successes.get() < storeDef.getRequiredWrites())
throw new InsufficientOperationalNodesException(successes.get() + " writes succeeded, but " + this.storeDef.getRequiredWrites() + " are required.", failures);
}
// Okay looks like it worked, increment the version for the caller
VectorClock versionedClock = (VectorClock) versioned.getVersion();
versionedClock.incrementVersion(master.getId(), time.getMilliseconds());
}
use of voldemort.versioning.VectorClock in project voldemort by voldemort.
the class IncrementClock method execute.
public void execute(Pipeline pipeline) {
if (logger.isTraceEnabled())
logger.trace(pipeline.getOperation().getSimpleName() + " versioning data - was: " + versioned.getVersion());
// Okay looks like it worked, increment the version for the caller
VectorClock versionedClock = (VectorClock) versioned.getVersion();
versionedClock.incrementVersion(pipelineData.getMaster().getId(), time.getMilliseconds());
if (logger.isTraceEnabled())
logger.trace(pipeline.getOperation().getSimpleName() + " versioned data - now: " + versioned.getVersion());
pipeline.addEvent(completeEvent);
}
use of voldemort.versioning.VectorClock in project voldemort by voldemort.
the class PerformPutHintedHandoff method execute.
@Override
public void execute(Pipeline pipeline) {
Versioned<byte[]> versionedCopy = pipelineData.getVersionedCopy();
for (Node slopFinalDestinationNode : pipelineData.getSynchronizer().getDelegatedSlopDestinations()) {
int failedNodeId = slopFinalDestinationNode.getId();
if (versionedCopy == null) {
VectorClock clock = (VectorClock) versioned.getVersion();
versionedCopy = new Versioned<byte[]>(versioned.getValue(), clock.incremented(failedNodeId, time.getMilliseconds()));
}
Version version = versionedCopy.getVersion();
if (logger.isTraceEnabled())
logger.trace("Performing parallel hinted handoff for node " + slopFinalDestinationNode + ", store " + pipelineData.getStoreName() + " key " + key + ", version " + version);
Slop slop = new Slop(pipelineData.getStoreName(), Slop.Operation.PUT, key, versionedCopy.getValue(), transforms, failedNodeId, new Date());
hintedHandoff.sendHintParallel(slopFinalDestinationNode, version, slop);
}
pipeline.addEvent(completeEvent);
}
use of voldemort.versioning.VectorClock in project voldemort by voldemort.
the class RetentionEnforcingStore method filterExpiredEntries.
/**
* Performs the filtering of the expired entries based on retention time.
* Optionally, deletes them also
*
* @param key the key whose value is to be deleted if needed
* @param vals set of values to be filtered out
* @return filtered list of values which are currently valid
*/
private List<Versioned<byte[]>> filterExpiredEntries(ByteArray key, List<Versioned<byte[]>> vals) {
Iterator<Versioned<byte[]>> valsIterator = vals.iterator();
while (valsIterator.hasNext()) {
Versioned<byte[]> val = valsIterator.next();
VectorClock clock = (VectorClock) val.getVersion();
// omit if expired
if (clock.getTimestamp() < (time.getMilliseconds() - this.retentionTimeMs)) {
valsIterator.remove();
// delete stale value if configured
if (deleteExpiredEntries) {
getInnerStore().delete(key, clock);
}
}
}
return vals;
}
use of voldemort.versioning.VectorClock in project voldemort by voldemort.
the class RoutedStoreTest method testReadRepairWithFailuresZZZ.
/**
* Test to ensure that read repair happens correctly across zones in case of
* inconsistent writes in a 3 zone cluster.
*
* @throws Exception
*/
@Test
public void testReadRepairWithFailuresZZZ() throws Exception {
cluster = VoldemortTestConstants.getSixNodeClusterWith3Zones();
HashMap<Integer, Integer> zoneReplicationFactor = Maps.newHashMap();
zoneReplicationFactor.put(0, cluster.getNumberOfNodesInZone(0));
zoneReplicationFactor.put(1, cluster.getNumberOfNodesInZone(0));
zoneReplicationFactor.put(2, cluster.getNumberOfNodesInZone(0));
// PR = RR = 6
// PW = RW = 4
// Zone Reads = # Zones - 1
// Zone Writes = 1
// Threads = 1
RoutedStore routedStore = getStore(cluster, 6, 4, cluster.getNumberOfZones() - 1, 1, 1, zoneReplicationFactor);
Store<ByteArray, byte[], byte[]> store = new InconsistencyResolvingStore<ByteArray, byte[], byte[]>(routedStore, new VectorClockInconsistencyResolver<byte[]>());
BaseStoreRoutingPlan routingPlan = new BaseStoreRoutingPlan(cluster, this.storeDef);
List<Integer> replicatingNodes = routingPlan.getReplicationNodeList(aKey.get());
try {
// Do the initial put with all nodes up
store.put(aKey, new Versioned<byte[]>(aValue), null);
List<Version> initialVersions = store.getVersions(aKey);
assertEquals(6, initialVersions.size());
Version mainVersion = initialVersions.get(0);
for (int i = 1; i < initialVersions.size(); i++) {
assertEquals(mainVersion, initialVersions.get(i));
}
// Do another put with all nodes in the zone 0 marked as
// unavailable. This will force the put to use a different pseudo
// master than before.
byte[] anotherValue = "john".getBytes();
// In this cluster, nodes 0 and 1 are in Zone 0. Mark them
// unavailable
recordException(failureDetector, cluster.getNodeById(0));
recordException(failureDetector, cluster.getNodeById(1));
Version newVersion = ((VectorClock) mainVersion).clone();
store.put(aKey, new Versioned<byte[]>(anotherValue, newVersion), null);
waitForOperationToComplete(500);
// Mark the nodes in Zone 0 as available and do a get. The Required
// reads = 4 and Zone count reads = 2 will force the client to read
// from all the zones and do the essential read repairs.
recordSuccess(failureDetector, cluster.getNodeById(0));
recordSuccess(failureDetector, cluster.getNodeById(1));
List<Versioned<byte[]>> versioneds = store.get(aKey, null);
assertEquals(1, versioneds.size());
assertEquals(new ByteArray(anotherValue), new ByteArray(versioneds.get(0).getValue()));
// Read repairs are done asynchronously, so we sleep for a short
// period. It may be a good idea to use a synchronous executor
// service.
Thread.sleep(500);
for (Map.Entry<Integer, Store<ByteArray, byte[], byte[]>> innerStoreEntry : routedStore.getInnerStores().entrySet()) {
// Only look at the nodes in the pref list
if (replicatingNodes.contains(innerStoreEntry.getKey())) {
List<Versioned<byte[]>> innerVersioneds = innerStoreEntry.getValue().get(aKey, null);
assertEquals(1, versioneds.size());
assertEquals(new ByteArray(anotherValue), new ByteArray(innerVersioneds.get(0).getValue()));
}
}
} catch (VoldemortException ve) {
fail("Unexpected error occurred : " + ve);
}
}
Aggregations