use of org.apache.ignite.failure.FailureContext in project ignite by apache.
the class GridDhtPartitionSupplier method handleDemandMessage.
/**
* For each demand message method lookups (or creates new) supply context and starts to iterate entries across requested partitions.
* Each entry in iterator is placed to prepared supply message.
*
* If supply message size in bytes becomes greater than {@link IgniteConfiguration#getRebalanceBatchSize()}
* method sends this message to demand node and saves partial state of iterated entries to supply context,
* then restores the context again after new demand message with the same context id is arrived.
*
* @param topicId Id of the topic is used for the supply-demand communication.
* @param nodeId Id of the node which sent the demand message.
* @param demandMsg Demand message.
*/
public void handleDemandMessage(int topicId, UUID nodeId, GridDhtPartitionDemandMessage demandMsg) {
assert demandMsg != null;
assert nodeId != null;
T3<UUID, Integer, AffinityTopologyVersion> contextId = new T3<>(nodeId, topicId, demandMsg.topologyVersion());
if (demandMsg.rebalanceId() < 0) {
// Demand node requested context cleanup.
synchronized (scMap) {
SupplyContext sctx = scMap.get(contextId);
if (sctx != null && sctx.rebalanceId == -demandMsg.rebalanceId()) {
clearContext(scMap.remove(contextId), log);
if (log.isDebugEnabled())
log.debug("Supply context cleaned [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", supplyContext=" + sctx + "]");
} else {
if (log.isDebugEnabled())
log.debug("Stale supply context cleanup message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", supplyContext=" + sctx + "]");
}
return;
}
}
ClusterNode demanderNode = grp.shared().discovery().node(nodeId);
if (demanderNode == null) {
if (log.isDebugEnabled())
log.debug("Demand message rejected (demander left cluster) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
return;
}
IgniteRebalanceIterator iter = null;
SupplyContext sctx = null;
Set<Integer> remainingParts = null;
GridDhtPartitionSupplyMessage supplyMsg = new GridDhtPartitionSupplyMessage(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled());
try {
synchronized (scMap) {
sctx = scMap.remove(contextId);
if (sctx != null && demandMsg.rebalanceId() < sctx.rebalanceId) {
// Stale message, return context back and return.
scMap.put(contextId, sctx);
if (log.isDebugEnabled())
log.debug("Stale demand message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", actualContext=" + sctx + "]");
return;
}
}
// Demand request should not contain empty partitions if no supply context is associated with it.
if (sctx == null && (demandMsg.partitions() == null || demandMsg.partitions().isEmpty())) {
if (log.isDebugEnabled())
log.debug("Empty demand message (no context and partitions) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
return;
}
if (log.isDebugEnabled())
log.debug("Demand message accepted [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
assert !(sctx != null && !demandMsg.partitions().isEmpty());
long maxBatchesCnt = /* Each thread should gain prefetched batches. */
grp.preloader().batchesPrefetchCount() * grp.shared().gridConfig().getRebalanceThreadPoolSize();
if (sctx == null) {
if (log.isDebugEnabled())
log.debug("Starting supplying rebalancing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", fullPartitions=" + S.compact(demandMsg.partitions().fullSet()) + ", histPartitions=" + S.compact(demandMsg.partitions().historicalSet()) + "]");
} else
maxBatchesCnt = 1;
if (sctx == null || sctx.iterator == null) {
remainingParts = new HashSet<>(demandMsg.partitions().fullSet());
CachePartitionPartialCountersMap histMap = demandMsg.partitions().historicalMap();
for (int i = 0; i < histMap.size(); i++) {
int p = histMap.partitionAt(i);
remainingParts.add(p);
}
iter = grp.offheap().rebalanceIterator(demandMsg.partitions(), demandMsg.topologyVersion());
for (Integer part : demandMsg.partitions().fullSet()) {
if (iter.isPartitionMissing(part))
continue;
GridDhtLocalPartition loc = top.localPartition(part, demandMsg.topologyVersion(), false);
assert loc != null && loc.state() == GridDhtPartitionState.OWNING : "Partition should be in OWNING state: " + loc;
supplyMsg.addEstimatedKeysCount(loc.dataStore().fullSize());
}
for (int i = 0; i < histMap.size(); i++) {
int p = histMap.partitionAt(i);
if (iter.isPartitionMissing(p))
continue;
supplyMsg.addEstimatedKeysCount(histMap.updateCounterAt(i) - histMap.initialUpdateCounterAt(i));
}
} else {
iter = sctx.iterator;
remainingParts = sctx.remainingParts;
}
final int msgMaxSize = grp.preloader().batchSize();
long batchesCnt = 0;
CacheDataRow prevRow = null;
while (iter.hasNext()) {
CacheDataRow row = iter.peek();
// Prevent mvcc entry history splitting into separate batches.
boolean canFlushHistory = !grp.mvccEnabled() || prevRow != null && ((grp.sharedGroup() && row.cacheId() != prevRow.cacheId()) || !row.key().equals(prevRow.key()));
if (canFlushHistory && supplyMsg.messageSize() >= msgMaxSize) {
if (++batchesCnt >= maxBatchesCnt) {
saveSupplyContext(contextId, iter, remainingParts, demandMsg.rebalanceId());
reply(topicId, demanderNode, demandMsg, supplyMsg, contextId);
return;
} else {
if (!reply(topicId, demanderNode, demandMsg, supplyMsg, contextId))
return;
supplyMsg = new GridDhtPartitionSupplyMessage(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled());
}
}
row = iter.next();
prevRow = row;
int part = row.partition();
GridDhtLocalPartition loc = top.localPartition(part, demandMsg.topologyVersion(), false);
assert (loc != null && loc.state() == OWNING && loc.reservations() > 0) || iter.isPartitionMissing(part) : "Partition should be in OWNING state and has at least 1 reservation " + loc;
if (iter.isPartitionMissing(part) && remainingParts.contains(part)) {
supplyMsg.missed(part);
remainingParts.remove(part);
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_MISSED))
grp.addRebalanceMissEvent(part);
if (log.isDebugEnabled())
log.debug("Requested partition is marked as missing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", p=" + part + "]");
continue;
}
if (!remainingParts.contains(part))
continue;
GridCacheEntryInfo info = extractEntryInfo(row);
if (info == null)
continue;
supplyMsg.addEntry0(part, iter.historical(part), info, grp.shared(), grp.cacheObjectContext());
if (iter.isPartitionDone(part)) {
supplyMsg.last(part, loc.updateCounter());
remainingParts.remove(part);
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_SUPPLIED))
grp.addRebalanceSupplyEvent(part);
}
}
Iterator<Integer> remainingIter = remainingParts.iterator();
while (remainingIter.hasNext()) {
int p = remainingIter.next();
if (iter.isPartitionDone(p)) {
GridDhtLocalPartition loc = top.localPartition(p, demandMsg.topologyVersion(), false);
assert loc != null : "Supply partition is gone: grp=" + grp.cacheOrGroupName() + ", p=" + p;
supplyMsg.last(p, loc.updateCounter());
remainingIter.remove();
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_SUPPLIED))
grp.addRebalanceSupplyEvent(p);
} else if (iter.isPartitionMissing(p)) {
supplyMsg.missed(p);
remainingIter.remove();
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_MISSED))
grp.addRebalanceMissEvent(p);
}
}
assert remainingParts.isEmpty() : "Partitions after rebalance should be either done or missing: " + remainingParts;
if (sctx != null)
clearContext(sctx, log);
else
iter.close();
reply(topicId, demanderNode, demandMsg, supplyMsg, contextId);
if (log.isInfoEnabled())
log.info("Finished supplying rebalancing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
} catch (Throwable t) {
if (iter != null && !iter.isClosed()) {
try {
iter.close();
} catch (IgniteCheckedException e) {
t.addSuppressed(e);
}
}
if (grp.shared().kernalContext().isStopping())
return;
// Sending supply messages with error requires new protocol.
boolean sendErrMsg = demanderNode.version().compareTo(GridDhtPartitionSupplyMessageV2.AVAILABLE_SINCE) >= 0;
if (t instanceof IgniteSpiException) {
if (log.isDebugEnabled())
log.debug("Failed to send message to node (current node is stopping?) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", msg=" + t.getMessage() + ']');
sendErrMsg = false;
} else
U.error(log, "Failed to continue supplying [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t);
try {
if (sctx != null)
clearContext(sctx, log);
} catch (Throwable t1) {
U.error(log, "Failed to cleanup supplying context [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t1);
}
if (!sendErrMsg)
return;
boolean fallbackToFullRebalance = X.hasCause(t, IgniteHistoricalIteratorException.class);
try {
GridDhtPartitionSupplyMessage errMsg;
if (fallbackToFullRebalance) {
// Mark the last checkpoint as not applicable for WAL rebalance.
grp.shared().database().lastCheckpointInapplicableForWalRebalance(grp.groupId());
// Mark all remaining partitions as missed to trigger full rebalance.
if (iter == null && F.isEmpty(remainingParts)) {
remainingParts = new HashSet<>(demandMsg.partitions().fullSet());
remainingParts.addAll(demandMsg.partitions().historicalSet());
}
for (int p : Optional.ofNullable(remainingParts).orElseGet(Collections::emptySet)) supplyMsg.missed(p);
errMsg = supplyMsg;
} else {
errMsg = new GridDhtPartitionSupplyMessageV2(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled(), t);
}
reply(topicId, demanderNode, demandMsg, errMsg, contextId);
} catch (Throwable t1) {
U.error(log, "Failed to send supply error message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t1);
}
// instead of triggering failure handler.
if (!fallbackToFullRebalance) {
grp.shared().kernalContext().failure().process(new FailureContext(FailureType.CRITICAL_ERROR, new IgniteCheckedException("Failed to continue supplying [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t)));
}
}
}
use of org.apache.ignite.failure.FailureContext in project ignite by apache.
the class RendezvousAffinityFunction method assignPartition.
/**
* Returns collection of nodes (primary first) for specified partition.
*
* @param part Partition.
* @param nodes Nodes.
* @param backups Number of backups.
* @param neighborhoodCache Neighborhood.
* @return Assignment.
*/
public List<ClusterNode> assignPartition(int part, List<ClusterNode> nodes, int backups, @Nullable Map<UUID, Collection<ClusterNode>> neighborhoodCache) {
if (nodes.size() <= 1)
return nodes;
IgniteBiTuple<Long, ClusterNode>[] hashArr = (IgniteBiTuple<Long, ClusterNode>[]) new IgniteBiTuple[nodes.size()];
for (int i = 0; i < nodes.size(); i++) {
ClusterNode node = nodes.get(i);
Object nodeHash = resolveNodeHash(node);
long hash = hash(nodeHash.hashCode(), part);
hashArr[i] = F.t(hash, node);
}
final int primaryAndBackups = backups == Integer.MAX_VALUE ? nodes.size() : Math.min(backups + 1, nodes.size());
Iterable<ClusterNode> sortedNodes = new LazyLinearSortedContainer(hashArr, primaryAndBackups);
// REPLICATED cache case
if (backups == Integer.MAX_VALUE)
return replicatedAssign(nodes, sortedNodes);
Iterator<ClusterNode> it = sortedNodes.iterator();
List<ClusterNode> res = new ArrayList<>(primaryAndBackups);
Collection<ClusterNode> allNeighbors = new HashSet<>();
ClusterNode primary = it.next();
res.add(primary);
if (exclNeighbors)
allNeighbors.addAll(neighborhoodCache.get(primary.id()));
// Select backups.
if (backups > 0) {
while (it.hasNext() && res.size() < primaryAndBackups) {
ClusterNode node = it.next();
try {
if ((backupFilter != null && backupFilter.apply(primary, node)) || (affinityBackupFilter != null && affinityBackupFilter.apply(node, res)) || (affinityBackupFilter == null && backupFilter == null)) {
if (exclNeighbors) {
if (!allNeighbors.contains(node)) {
res.add(node);
allNeighbors.addAll(neighborhoodCache.get(node.id()));
}
} else
res.add(node);
}
} catch (Exception ex) {
ignite.context().failure().process(new FailureContext(FailureType.CRITICAL_ERROR, ex));
}
}
}
if (res.size() < primaryAndBackups && nodes.size() >= primaryAndBackups && exclNeighbors) {
// Need to iterate again in case if there are no nodes which pass exclude neighbors backups criteria.
it = sortedNodes.iterator();
it.next();
while (it.hasNext() && res.size() < primaryAndBackups) {
ClusterNode node = it.next();
if (!res.contains(node))
res.add(node);
}
if (!exclNeighborsWarn) {
LT.warn(log, "Affinity function excludeNeighbors property is ignored " + "because topology has no enough nodes to assign backups.");
exclNeighborsWarn = true;
}
}
assert res.size() <= primaryAndBackups;
return res;
}
use of org.apache.ignite.failure.FailureContext in project ignite by apache.
the class IndexingDefragmentation method defragmentTable.
/**
* Defragment one given table.
*/
private boolean defragmentTable(CacheGroupContext newCtx, IntMap<LinkMap> mappingByPartition, CheckpointTimeoutLock cpLock, Runnable cancellationChecker, int pageSize, PageMemoryEx oldCachePageMem, PageMemory newCachePageMemory, long cpLockThreshold, AtomicLong lastCpLockTs, TableIndexes indexes) throws IgniteCheckedException {
cpLock.checkpointReadLock();
try {
TreeIterator treeIterator = new TreeIterator(pageSize);
GridCacheContext<?, ?> cctx = indexes.cctx;
cancellationChecker.run();
for (InlineIndex oldIdx : indexes.idxs) {
InlineIndexRowHandler oldRowHnd = oldIdx.segment(0).rowHandler();
SortedIndexDefinition idxDef = (SortedIndexDefinition) indexing.indexDefinition(oldIdx.id());
InlineIndexImpl newIdx = new DefragIndexFactory(newCtx.offheap(), newCachePageMemory, oldIdx).createIndex(cctx, idxDef).unwrap(InlineIndexImpl.class);
int segments = oldIdx.segmentsCount();
for (int i = 0; i < segments; ++i) {
treeIterator.iterate(oldIdx.segment(i), oldCachePageMem, (theTree, io, pageAddr, idx) -> {
cancellationChecker.run();
if (System.currentTimeMillis() - lastCpLockTs.get() >= cpLockThreshold) {
cpLock.checkpointReadUnlock();
cpLock.checkpointReadLock();
lastCpLockTs.set(System.currentTimeMillis());
}
assert 1 == io.getVersion() : "IO version " + io.getVersion() + " is not supported by current defragmentation algorithm." + " Please implement copying of tree in a new format.";
BPlusIO<IndexRow> h2IO = DefragIndexFactory.wrap(io, oldRowHnd);
IndexRow row = theTree.getRow(h2IO, pageAddr, idx);
if (row instanceof DefragIndexRowImpl) {
DefragIndexRowImpl r = (DefragIndexRowImpl) row;
CacheDataRow cacheDataRow = r.cacheDataRow();
int partition = cacheDataRow.partition();
long link = r.link();
LinkMap map = mappingByPartition.get(partition);
long newLink = map.get(link);
// Use old row handler, as MetaInfo is copied from old tree.
DefragIndexRowImpl newRow = DefragIndexRowImpl.create(oldRowHnd, newLink, r, ((MvccIO) io).storeMvccInfo());
newIdx.putIndexRow(newRow);
}
return true;
});
}
}
return true;
} catch (Throwable t) {
newCtx.cacheObjectContext().kernalContext().failure().process(new FailureContext(CRITICAL_ERROR, t));
throw t;
} finally {
cpLock.checkpointReadUnlock();
}
}
use of org.apache.ignite.failure.FailureContext in project ignite by apache.
the class InlineIndexImpl method putx.
/**
*/
private boolean putx(IndexRowImpl idxRow, int segment, boolean flag) throws IgniteCheckedException {
try {
boolean replaced;
if (flag)
replaced = segments[segment].putx(idxRow);
else {
IndexRow prevRow0 = segments[segment].put(idxRow);
replaced = prevRow0 != null;
}
return replaced;
} catch (Throwable t) {
cctx.kernalContext().failure().process(new FailureContext(CRITICAL_ERROR, t));
throw t;
}
}
use of org.apache.ignite.failure.FailureContext in project ignite by apache.
the class GridDhtTxLocal method finishTx.
/**
* @param commit Commit flag.
* @param prepFut Prepare future.
* @param fut Finish future.
*/
private void finishTx(boolean commit, @Nullable IgniteInternalFuture prepFut, GridDhtTxFinishFuture fut) {
assert prepFut == null || prepFut.isDone();
boolean primarySync = syncMode() == PRIMARY_SYNC;
IgniteCheckedException err = null;
if (!commit) {
final IgniteInternalFuture<?> lockFut = tryRollbackAsync();
if (lockFut != null) {
if (lockFut instanceof DhtLockFuture)
((DhtLockFuture<?>) lockFut).onError(rollbackException());
else if (!lockFut.isDone()) {
/*
* Prevents race with {@link GridDhtTransactionalCacheAdapter#lockAllAsync
* (GridCacheContext, ClusterNode, GridNearLockRequest, CacheEntryPredicate[])}
*/
final IgniteInternalFuture finalPrepFut = prepFut;
lockFut.listen(new IgniteInClosure<IgniteInternalFuture<?>>() {
@Override
public void apply(IgniteInternalFuture<?> ignored) {
finishTx(false, finalPrepFut, fut);
}
});
return;
}
}
}
if (!commit && prepFut != null) {
try {
prepFut.get();
} catch (IgniteCheckedException e) {
if (log.isDebugEnabled())
log.debug("Failed to prepare transaction [tx=" + this + ", e=" + e + ']');
} finally {
prepFut = null;
}
}
try {
if (prepFut != null)
// Check for errors.
prepFut.get();
boolean finished = localFinish(commit, false);
if (!finished)
err = new IgniteCheckedException("Failed to finish transaction [commit=" + commit + ", tx=" + CU.txString(this) + ']');
} catch (IgniteCheckedException e) {
logTxFinishErrorSafe(log, commit, e);
// Treat heuristic exception as critical.
if (X.hasCause(e, IgniteTxHeuristicCheckedException.class))
cctx.kernalContext().failure().process(new FailureContext(FailureType.CRITICAL_ERROR, e));
err = e;
} catch (Throwable t) {
fut.onDone(t);
throw t;
}
if (primarySync)
sendFinishReply(err);
if (err != null)
fut.rollbackOnError(err);
else
fut.finish(commit);
}
Aggregations