use of org.infinispan.commands.statetransfer.StateResponseCommand in project infinispan by infinispan.
the class OutboundTransferTask method sendEntries.
private CompletionStage<Void> sendEntries(List<InternalCacheEntry<Object, Object>> entries, boolean isLast) {
Map<Integer, StateChunk> chunks = new HashMap<>();
for (InternalCacheEntry<Object, Object> ice : entries) {
int segmentId = keyPartitioner.getSegment(ice.getKey());
if (segments.contains(segmentId)) {
StateChunk chunk = chunks.computeIfAbsent(segmentId, segment -> new StateChunk(segment, new ArrayList<>(), isLast));
chunk.getCacheEntries().add(ice);
}
}
if (isLast) {
for (PrimitiveIterator.OfInt iter = segments.iterator(); iter.hasNext(); ) {
int segmentId = iter.nextInt();
chunks.computeIfAbsent(segmentId, segment -> new StateChunk(segment, Collections.emptyList(), true));
}
}
if (chunks.isEmpty())
return CompletableFutures.completedNull();
if (log.isTraceEnabled()) {
if (isLast) {
log.tracef("Sending last chunk to node %s containing %d cache entries from segments %s", destination, entries.size(), segments);
} else {
log.tracef("Sending to node %s %d cache entries from segments %s", destination, entries.size(), chunks.keySet());
}
}
StateResponseCommand cmd = commandsFactory.buildStateResponseCommand(topologyId, chunks.values(), applyState, pushTransfer);
try {
return rpcManager.invokeCommand(destination, cmd, SingleResponseCollector.validOnly(), rpcOptions).handle((response, throwable) -> {
if (throwable == null) {
onChunkReplicated.accept(chunks.values());
return null;
}
logSendException(throwable);
cancel();
return null;
});
} catch (IllegalLifecycleStateException e) {
// Manager is shutting down, ignore the error
cancel();
} catch (Exception e) {
logSendException(e);
cancel();
}
return CompletableFutures.completedNull();
}
use of org.infinispan.commands.statetransfer.StateResponseCommand in project infinispan by infinispan.
the class BaseTxStateTransferOverwriteTest method doTestWhereCommitOccursAfterStateTransferBeginsBeforeCompletion.
/**
* When L1 is enabled this test should not be ran when a previous value is present as it will cause timeouts. Due
* to how locking works with L1 this cannot occur when the previous value exists.
*/
protected void doTestWhereCommitOccursAfterStateTransferBeginsBeforeCompletion(final TestWriteOperation op) throws Exception {
if (l1Enabled() && op.getPreviousValue() != null) {
fail("This test cannot be ran with L1 when a previous value is set");
}
// Test scenario:
// cache0,1,2 are in the cluster, an owner leaves
// Key k is in the cache, and is transferred to the non owner
// A user operation also modifies key k causing an invalidation
// on the non owner which is getting the state transfer
final AdvancedCache<Object, Object> primaryOwnerCache = cache(0, cacheName).getAdvancedCache();
final AdvancedCache<Object, Object> backupOwnerCache = cache(1, cacheName).getAdvancedCache();
final AdvancedCache<Object, Object> nonOwnerCache = cache(2, cacheName).getAdvancedCache();
final MagicKey key = new MagicKey(primaryOwnerCache, backupOwnerCache);
// Prepare for replace/remove: put a previous value in cache0
final Object previousValue = op.getPreviousValue();
if (previousValue != null) {
primaryOwnerCache.put(key, previousValue);
assertEquals(previousValue, primaryOwnerCache.get(key));
log.tracef("Previous value inserted: %s = %s", key, previousValue);
assertEquals(previousValue, nonOwnerCache.get(key));
if (l1Enabled()) {
assertIsInL1(nonOwnerCache, key);
}
}
int preJoinTopologyId = primaryOwnerCache.getDistributionManager().getCacheTopology().getTopologyId();
// Block any state response commands on cache0
CheckPoint checkPoint = new CheckPoint();
ControlledRpcManager blockingRpcManager0 = ControlledRpcManager.replaceRpcManager(primaryOwnerCache);
ControlledRpcManager blockingRpcManager2 = ControlledRpcManager.replaceRpcManager(nonOwnerCache);
// The execution of the write/prepare/commit commands is controlled with the BlockingInterceptor
blockingRpcManager0.excludeCommands(BackupWriteCommand.class, PrepareCommand.class, CommitCommand.class, TxCompletionNotificationCommand.class);
blockingRpcManager2.excludeCommands(BackupAckCommand.class);
// Block the rebalance confirmation on cache0
int rebalanceTopologyId = preJoinTopologyId + 2;
blockRebalanceConfirmation(primaryOwnerCache.getCacheManager(), checkPoint, rebalanceTopologyId);
assertEquals(primaryOwnerCache.getCacheManager().getCoordinator(), primaryOwnerCache.getCacheManager().getAddress());
// Remove the leaver
log.trace("Stopping the cache");
backupOwnerCache.getCacheManager().stop();
// Wait for the write CH to contain the joiner everywhere
eventuallyEquals(2, () -> primaryOwnerCache.getRpcManager().getMembers().size());
eventuallyEquals(2, () -> nonOwnerCache.getRpcManager().getMembers().size());
assertEquals(primaryOwnerCache.getCacheManager().getCoordinator(), primaryOwnerCache.getCacheManager().getAddress());
// Wait for both nodes to start state transfer
if (transactional) {
blockingRpcManager0.expectCommand(StateTransferGetTransactionsCommand.class).send().receiveAll();
blockingRpcManager2.expectCommand(StateTransferGetTransactionsCommand.class).send().receiveAll();
}
ControlledRpcManager.BlockedRequest<StateTransferStartCommand> blockedStateRequest0 = blockingRpcManager0.expectCommand(StateTransferStartCommand.class);
ControlledRpcManager.BlockedRequest<StateTransferStartCommand> blockedStateRequest2 = blockingRpcManager2.expectCommand(StateTransferStartCommand.class);
// Unblock the state request from node 2
// Don't wait for response, because node 2 might be sending the first state response on the request thread
blockedStateRequest2.send().receiveAllAsync();
// Wait for cache0 to collect the state to send to node 2 (including our previous value).
ControlledRpcManager.BlockedRequest<StateResponseCommand> blockedStateResponse0 = blockingRpcManager0.expectCommand(StateResponseCommand.class);
// Every PutKeyValueCommand will be blocked before committing the entry on cache1
CyclicBarrier beforeCommitCache1Barrier = new CyclicBarrier(2);
BlockingInterceptor<?> blockingInterceptor1 = new BlockingInterceptor<>(beforeCommitCache1Barrier, op.getCommandClass(), true, false);
extractInterceptorChain(nonOwnerCache).addInterceptorAfter(blockingInterceptor1, EntryWrappingInterceptor.class);
// Put/Replace/Remove from cache0 with cache0 as primary owner, cache1 will become a backup owner for the retry
// The put command will be blocked on cache1 just before committing the entry.
Future<Object> future = fork(() -> op.perform(primaryOwnerCache, key));
// Wait for the entry to be wrapped on node 2
// The replicated command could be either a non-tx BackupWriteCommand or a PrepareCommand
beforeCommitCache1Barrier.await(10, TimeUnit.SECONDS);
// Remove the interceptor so we don't mess up any other state transfer puts
removeAllBlockingInterceptorsFromCache(nonOwnerCache);
// Allow the state to be applied on cache1 (writing the old value for our entry)
blockedStateResponse0.send().receiveAll();
// Wait for second in line to finish applying the state, but don't allow the rebalance confirmation to be processed.
// (It would change the topology and it would trigger a retry for the command.)
// Don't wait for response, because node 2 might be sending the first state response on the request thread
blockedStateRequest0.send().receiveAllAsync();
blockingRpcManager2.expectCommand(StateResponseCommand.class).send().receiveAll();
checkPoint.awaitStrict("pre_rebalance_confirmation_" + rebalanceTopologyId + "_from_" + primaryOwnerCache.getCacheManager().getAddress(), 10, SECONDS);
// Now allow the command to commit on cache1
beforeCommitCache1Barrier.await(10, TimeUnit.SECONDS);
// Wait for the command to finish and check that it didn't fail
Object result = future.get(10, TimeUnit.SECONDS);
assertEquals(op.getReturnValue(), result);
log.tracef("%s operation is done", op);
// Allow the rebalance confirmation to proceed and wait for the topology to change everywhere
checkPoint.trigger("resume_rebalance_confirmation_" + rebalanceTopologyId + "_from_" + primaryOwnerCache.getCacheManager().getAddress());
checkPoint.trigger("resume_rebalance_confirmation_" + rebalanceTopologyId + "_from_" + nonOwnerCache.getCacheManager().getAddress());
TestingUtil.waitForNoRebalance(primaryOwnerCache, nonOwnerCache);
switch(op) {
case REMOVE:
case REMOVE_EXACT:
break;
default:
assertIsInContainerImmortal(primaryOwnerCache, key);
assertIsInContainerImmortal(nonOwnerCache, key);
break;
}
// Check the value to make sure data container contains correct value
assertEquals(op.getValue(), primaryOwnerCache.get(key));
assertEquals(op.getValue(), nonOwnerCache.get(key));
}
use of org.infinispan.commands.statetransfer.StateResponseCommand in project infinispan by infinispan.
the class StateResponseOrderingTest method testStateResponseWhileRestartingBrokenTransfers.
public void testStateResponseWhileRestartingBrokenTransfers() throws Throwable {
// The initial topology is different from the other method's
consistentHashFactory.setOwnerIndexes(new int[][] { { 1, 2, 3 }, { 2, 1, 3 } });
consistentHashFactory.triggerRebalance(cache(0));
// waitForStableTopology doesn't work here, since the cache looks already "balanced"
// So we wait for the primary owner of segment 1 to change
eventuallyEquals(address(2), () -> advancedCache(0).getDistributionManager().getReadConsistentHash().locatePrimaryOwnerForSegment(1));
// See https://issues.jboss.org/browse/ISPN-3120?focusedCommentId=12777231
// Start with segment 0 owned by [cache1, cache2, cache3], and segment 1 owned by [cache2, cache1, cache3]
// Trigger a rebalance with cache0 becoming an owner for both segments
// Wait for either cache1 or cache2 to send a StateResponseCommand
// Block the state response on cache0
// Kill the node that didn't receive the request
// Block new state requests from cache0 so that the killed node's segment doesn't have a transfer task
// Unblock the first state response
// Check that the StateResponseCommand hasn't marked state transfer as completed
// Unblock the new state request
// Wait for the state transfer to end and check that state hasn't been lost
StateSequencer sequencer = new StateSequencer();
sequencer.logicalThread("st", "st:block_first_state_response", "st:kill_node", "st:block_second_state_request", "st:resume_first_state_response", "st:after_first_state_response", "st:check_incomplete", "st:resume_second_state_request");
final AtomicReference<Address> firstResponseSender = new AtomicReference<>();
CommandMatcher firstStateResponseMatcher = new CommandMatcher() {
CommandMatcher realMatcher = matchCommand(StateResponseCommand.class).matchCount(0).build();
public boolean accept(ReplicableCommand command) {
if (!realMatcher.accept(command))
return false;
firstResponseSender.set(((StateResponseCommand) command).getOrigin());
return true;
}
};
advanceOnInboundRpc(sequencer, cache(0), firstStateResponseMatcher).before("st:block_first_state_response", "st:resume_first_state_response").after("st:after_first_state_response");
CommandMatcher secondStateRequestMatcher = new CommandMatcher() {
private final AtomicInteger counter = new AtomicInteger();
@Override
public boolean accept(ReplicableCommand command) {
if (command instanceof StateTransferGetTransactionsCommand) {
// Command 2 is the first sent after the node is killed
if (counter.getAndIncrement() == 2)
return true;
log.debugf("Not blocking command %s", command);
}
return false;
}
};
advanceOnOutboundRpc(sequencer, cache(0), secondStateRequestMatcher).before("st:block_second_state_request", "st:resume_second_state_request");
DistributionManager dm0 = advancedCache(0).getDistributionManager();
StateTransferManager stm0 = TestingUtil.extractComponentRegistry(cache(0)).getStateTransferManager();
MagicKey k1 = new MagicKey("k1", cache(1));
assertEquals(Arrays.asList(address(1), address(2), address(3)), dm0.getCacheTopology().getDistribution(k1).readOwners());
cache(0).put(k1, "v1");
MagicKey k2 = new MagicKey("k2", cache(2));
assertEquals(Arrays.asList(address(2), address(1), address(3)), dm0.getCacheTopology().getDistribution(k2).readOwners());
cache(0).put(k2, "v2");
// Start the rebalance
consistentHashFactory.setOwnerIndexes(new int[][] { { 0, 1, 2 }, { 0, 2, 1 } });
consistentHashFactory.triggerRebalance(cache(0));
// Wait for cache0 to receive the state response
sequencer.enter("st:kill_node");
assertNotNull(dm0.getCacheTopology().getPendingCH());
// No need to update the owner indexes, the CH factory only knows about the cache members
int nodeToKeep = managerIndex(firstResponseSender.get());
int nodeToKill = nodeToKeep == 1 ? 2 : 1;
log.debugf("Blocked state response from %s, killing %s", firstResponseSender.get(), manager(nodeToKill));
cache(nodeToKill).stop();
eventuallyEquals(3, () -> dm0.getCacheTopology().getMembers().size());
sequencer.exit("st:kill_node");
sequencer.enter("st:check_incomplete");
assertTrue(stm0.isStateTransferInProgress());
sequencer.exit("st:check_incomplete");
// Only the 3 live caches are in the collection, wait for the rehash to end
waitForNoRebalance(cache(0), cache(nodeToKeep), cache(3));
assertTrue(dm0.getCacheTopology().isReadOwner(k1));
assertTrue(dm0.getCacheTopology().isReadOwner(k2));
assertEquals("v1", cache(0).get(k1));
assertEquals("v2", cache(0).get(k2));
}
use of org.infinispan.commands.statetransfer.StateResponseCommand in project infinispan by infinispan.
the class ConditionalOperationPrimaryOwnerFailTest method testEntryNotWrapped.
public void testEntryNotWrapped() throws Throwable {
assertClusterSize("Wrong cluster size!", 3);
final Object key = new MagicKey(cache(0), cache(1));
final Cache<Object, Object> futureBackupOwnerCache = cache(2);
cache(0).put(key, INITIAL_VALUE);
final PerCacheInboundInvocationHandler spyHandler = spyInvocationHandler(futureBackupOwnerCache);
final EntryFactory spyEntryFactory = spyEntryFactory(futureBackupOwnerCache);
// it blocks the StateResponseCommand.class
final CountDownLatch latch1 = new CountDownLatch(1);
final CountDownLatch latch2 = new CountDownLatch(1);
doAnswer(invocation -> {
CacheRpcCommand command = (CacheRpcCommand) invocation.getArguments()[0];
if (command instanceof StateResponseCommand) {
log.debugf("Blocking command %s", command);
latch2.countDown();
latch1.await();
}
return invocation.callRealMethod();
}).when(spyHandler).handle(any(CacheRpcCommand.class), any(Reply.class), any(DeliverOrder.class));
doAnswer(invocation -> {
InvocationContext context = (InvocationContext) invocation.getArguments()[0];
log.debugf("wrapEntryForWriting invoked with %s", context);
CompletionStage<Void> stage = (CompletionStage<Void>) invocation.callRealMethod();
CompletionStages.join(stage);
assertNull(context.lookupEntry(key), "Entry should not be wrapped!");
return stage;
}).when(spyEntryFactory).wrapEntryForWriting(any(InvocationContext.class), any(), anyInt(), anyBoolean(), anyBoolean(), any());
Future<?> killMemberResult = fork(() -> killMember(1));
// await until the key is received from state transfer (the command is blocked now...)
latch2.await(30, TimeUnit.SECONDS);
futureBackupOwnerCache.put(key, FINAL_VALUE);
latch1.countDown();
killMemberResult.get(30, TimeUnit.SECONDS);
}
use of org.infinispan.commands.statetransfer.StateResponseCommand in project infinispan by infinispan.
the class PushTransferTest method testNodeJoin.
public void testNodeJoin() throws Exception {
List<MagicKey> keys = init();
EmbeddedCacheManager cm4 = addClusterEnabledCacheManager(TestDataSCI.INSTANCE, null, TRANSPORT_FLAGS);
cm4.defineConfiguration(CACHE_NAME, defaultConfig.build());
int startTopologyId = c1.getAdvancedCache().getDistributionManager().getCacheTopology().getTopologyId();
BlockingLocalTopologyManager bltm = BlockingLocalTopologyManager.replaceTopologyManager(cm4, CACHE_NAME);
CountDownLatch statePushedLatch = new CountDownLatch(1);
CountDownLatch stateAppliedLatch = new CountDownLatch(1);
TestingUtil.addCacheStartingHook(cm4, (name, cr) -> {
PerCacheInboundInvocationHandler originalHandler = cr.getComponent(PerCacheInboundInvocationHandler.class);
AbstractDelegatingHandler newHandler = new AbstractDelegatingHandler(originalHandler) {
@Override
public void handle(CacheRpcCommand command, Reply reply, DeliverOrder order) {
// StateResponseCommand is topology-aware, so handle() just queues it on the remote executor
if (command instanceof StateResponseCommand) {
log.tracef("State received on %s", cm4.getAddress());
statePushedLatch.countDown();
}
originalHandler.handle(command, response -> {
log.tracef("State applied on %s", cm4.getAddress());
stateAppliedLatch.countDown();
reply.reply(response);
}, order);
}
};
BasicComponentRegistry bcr = cr.getComponent(BasicComponentRegistry.class);
bcr.replaceComponent(PerCacheInboundInvocationHandler.class.getName(), newHandler, false);
cr.rewire();
cr.cacheComponents();
});
Future<Cache> c4Future = fork(() -> cm4.getCache(CACHE_NAME));
// Any StateResponseCommand should be delayed until node 4 has the TRANSITORY topology
assertTrue(statePushedLatch.await(10, TimeUnit.SECONDS));
assertFalse(stateAppliedLatch.await(100, TimeUnit.MILLISECONDS));
// Finish the rebalance, unblocking the StateResponseCommand(s)
bltm.confirmTopologyUpdate(CacheTopology.Phase.TRANSITORY);
assertEquals(0, stateAppliedLatch.getCount());
bltm.confirmTopologyUpdate(CacheTopology.Phase.NO_REBALANCE);
Cache c4 = c4Future.get(30, TimeUnit.SECONDS);
TestingUtil.blockUntilViewsReceived(30000, false, c1, c2, c3, c4);
TestingUtil.waitForNoRebalance(c1, c2, c3, c4);
for (MagicKey key : keys) {
int copies = Stream.of(c1, c2, c3, c4).mapToInt(c -> c.getAdvancedCache().getDataContainer().containsKey(key) ? 1 : 0).sum();
assertEquals("Key " + key + " has incorrect number of copies", 2, copies);
}
}
Aggregations