use of org.apache.ignite.spi.IgniteSpiException in project ignite by apache.
the class GridDhtPartitionSupplier method handleDemandMessage.
/**
* For each demand message method lookups (or creates new) supply context and starts to iterate entries across requested partitions.
* Each entry in iterator is placed to prepared supply message.
*
* If supply message size in bytes becomes greater than {@link IgniteConfiguration#getRebalanceBatchSize()}
* method sends this message to demand node and saves partial state of iterated entries to supply context,
* then restores the context again after new demand message with the same context id is arrived.
*
* @param topicId Id of the topic is used for the supply-demand communication.
* @param nodeId Id of the node which sent the demand message.
* @param demandMsg Demand message.
*/
public void handleDemandMessage(int topicId, UUID nodeId, GridDhtPartitionDemandMessage demandMsg) {
assert demandMsg != null;
assert nodeId != null;
T3<UUID, Integer, AffinityTopologyVersion> contextId = new T3<>(nodeId, topicId, demandMsg.topologyVersion());
if (demandMsg.rebalanceId() < 0) {
// Demand node requested context cleanup.
synchronized (scMap) {
SupplyContext sctx = scMap.get(contextId);
if (sctx != null && sctx.rebalanceId == -demandMsg.rebalanceId()) {
clearContext(scMap.remove(contextId), log);
if (log.isDebugEnabled())
log.debug("Supply context cleaned [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", supplyContext=" + sctx + "]");
} else {
if (log.isDebugEnabled())
log.debug("Stale supply context cleanup message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", supplyContext=" + sctx + "]");
}
return;
}
}
ClusterNode demanderNode = grp.shared().discovery().node(nodeId);
if (demanderNode == null) {
if (log.isDebugEnabled())
log.debug("Demand message rejected (demander left cluster) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
return;
}
IgniteRebalanceIterator iter = null;
SupplyContext sctx = null;
Set<Integer> remainingParts = null;
GridDhtPartitionSupplyMessage supplyMsg = new GridDhtPartitionSupplyMessage(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled());
try {
synchronized (scMap) {
sctx = scMap.remove(contextId);
if (sctx != null && demandMsg.rebalanceId() < sctx.rebalanceId) {
// Stale message, return context back and return.
scMap.put(contextId, sctx);
if (log.isDebugEnabled())
log.debug("Stale demand message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", actualContext=" + sctx + "]");
return;
}
}
// Demand request should not contain empty partitions if no supply context is associated with it.
if (sctx == null && (demandMsg.partitions() == null || demandMsg.partitions().isEmpty())) {
if (log.isDebugEnabled())
log.debug("Empty demand message (no context and partitions) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
return;
}
if (log.isDebugEnabled())
log.debug("Demand message accepted [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
assert !(sctx != null && !demandMsg.partitions().isEmpty());
long maxBatchesCnt = /* Each thread should gain prefetched batches. */
grp.preloader().batchesPrefetchCount() * grp.shared().gridConfig().getRebalanceThreadPoolSize();
if (sctx == null) {
if (log.isDebugEnabled())
log.debug("Starting supplying rebalancing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", fullPartitions=" + S.compact(demandMsg.partitions().fullSet()) + ", histPartitions=" + S.compact(demandMsg.partitions().historicalSet()) + "]");
} else
maxBatchesCnt = 1;
if (sctx == null || sctx.iterator == null) {
remainingParts = new HashSet<>(demandMsg.partitions().fullSet());
CachePartitionPartialCountersMap histMap = demandMsg.partitions().historicalMap();
for (int i = 0; i < histMap.size(); i++) {
int p = histMap.partitionAt(i);
remainingParts.add(p);
}
iter = grp.offheap().rebalanceIterator(demandMsg.partitions(), demandMsg.topologyVersion());
for (Integer part : demandMsg.partitions().fullSet()) {
if (iter.isPartitionMissing(part))
continue;
GridDhtLocalPartition loc = top.localPartition(part, demandMsg.topologyVersion(), false);
assert loc != null && loc.state() == GridDhtPartitionState.OWNING : "Partition should be in OWNING state: " + loc;
supplyMsg.addEstimatedKeysCount(loc.dataStore().fullSize());
}
for (int i = 0; i < histMap.size(); i++) {
int p = histMap.partitionAt(i);
if (iter.isPartitionMissing(p))
continue;
supplyMsg.addEstimatedKeysCount(histMap.updateCounterAt(i) - histMap.initialUpdateCounterAt(i));
}
} else {
iter = sctx.iterator;
remainingParts = sctx.remainingParts;
}
final int msgMaxSize = grp.preloader().batchSize();
long batchesCnt = 0;
CacheDataRow prevRow = null;
while (iter.hasNext()) {
CacheDataRow row = iter.peek();
// Prevent mvcc entry history splitting into separate batches.
boolean canFlushHistory = !grp.mvccEnabled() || prevRow != null && ((grp.sharedGroup() && row.cacheId() != prevRow.cacheId()) || !row.key().equals(prevRow.key()));
if (canFlushHistory && supplyMsg.messageSize() >= msgMaxSize) {
if (++batchesCnt >= maxBatchesCnt) {
saveSupplyContext(contextId, iter, remainingParts, demandMsg.rebalanceId());
reply(topicId, demanderNode, demandMsg, supplyMsg, contextId);
return;
} else {
if (!reply(topicId, demanderNode, demandMsg, supplyMsg, contextId))
return;
supplyMsg = new GridDhtPartitionSupplyMessage(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled());
}
}
row = iter.next();
prevRow = row;
int part = row.partition();
GridDhtLocalPartition loc = top.localPartition(part, demandMsg.topologyVersion(), false);
assert (loc != null && loc.state() == OWNING && loc.reservations() > 0) || iter.isPartitionMissing(part) : "Partition should be in OWNING state and has at least 1 reservation " + loc;
if (iter.isPartitionMissing(part) && remainingParts.contains(part)) {
supplyMsg.missed(part);
remainingParts.remove(part);
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_MISSED))
grp.addRebalanceMissEvent(part);
if (log.isDebugEnabled())
log.debug("Requested partition is marked as missing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", p=" + part + "]");
continue;
}
if (!remainingParts.contains(part))
continue;
GridCacheEntryInfo info = extractEntryInfo(row);
if (info == null)
continue;
supplyMsg.addEntry0(part, iter.historical(part), info, grp.shared(), grp.cacheObjectContext());
if (iter.isPartitionDone(part)) {
supplyMsg.last(part, loc.updateCounter());
remainingParts.remove(part);
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_SUPPLIED))
grp.addRebalanceSupplyEvent(part);
}
}
Iterator<Integer> remainingIter = remainingParts.iterator();
while (remainingIter.hasNext()) {
int p = remainingIter.next();
if (iter.isPartitionDone(p)) {
GridDhtLocalPartition loc = top.localPartition(p, demandMsg.topologyVersion(), false);
assert loc != null : "Supply partition is gone: grp=" + grp.cacheOrGroupName() + ", p=" + p;
supplyMsg.last(p, loc.updateCounter());
remainingIter.remove();
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_SUPPLIED))
grp.addRebalanceSupplyEvent(p);
} else if (iter.isPartitionMissing(p)) {
supplyMsg.missed(p);
remainingIter.remove();
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_MISSED))
grp.addRebalanceMissEvent(p);
}
}
assert remainingParts.isEmpty() : "Partitions after rebalance should be either done or missing: " + remainingParts;
if (sctx != null)
clearContext(sctx, log);
else
iter.close();
reply(topicId, demanderNode, demandMsg, supplyMsg, contextId);
if (log.isInfoEnabled())
log.info("Finished supplying rebalancing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
} catch (Throwable t) {
if (iter != null && !iter.isClosed()) {
try {
iter.close();
} catch (IgniteCheckedException e) {
t.addSuppressed(e);
}
}
if (grp.shared().kernalContext().isStopping())
return;
// Sending supply messages with error requires new protocol.
boolean sendErrMsg = demanderNode.version().compareTo(GridDhtPartitionSupplyMessageV2.AVAILABLE_SINCE) >= 0;
if (t instanceof IgniteSpiException) {
if (log.isDebugEnabled())
log.debug("Failed to send message to node (current node is stopping?) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", msg=" + t.getMessage() + ']');
sendErrMsg = false;
} else
U.error(log, "Failed to continue supplying [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t);
try {
if (sctx != null)
clearContext(sctx, log);
} catch (Throwable t1) {
U.error(log, "Failed to cleanup supplying context [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t1);
}
if (!sendErrMsg)
return;
boolean fallbackToFullRebalance = X.hasCause(t, IgniteHistoricalIteratorException.class);
try {
GridDhtPartitionSupplyMessage errMsg;
if (fallbackToFullRebalance) {
// Mark the last checkpoint as not applicable for WAL rebalance.
grp.shared().database().lastCheckpointInapplicableForWalRebalance(grp.groupId());
// Mark all remaining partitions as missed to trigger full rebalance.
if (iter == null && F.isEmpty(remainingParts)) {
remainingParts = new HashSet<>(demandMsg.partitions().fullSet());
remainingParts.addAll(demandMsg.partitions().historicalSet());
}
for (int p : Optional.ofNullable(remainingParts).orElseGet(Collections::emptySet)) supplyMsg.missed(p);
errMsg = supplyMsg;
} else {
errMsg = new GridDhtPartitionSupplyMessageV2(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled(), t);
}
reply(topicId, demanderNode, demandMsg, errMsg, contextId);
} catch (Throwable t1) {
U.error(log, "Failed to send supply error message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t1);
}
// instead of triggering failure handler.
if (!fallbackToFullRebalance) {
grp.shared().kernalContext().failure().process(new FailureContext(FailureType.CRITICAL_ERROR, new IgniteCheckedException("Failed to continue supplying [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t)));
}
}
}
use of org.apache.ignite.spi.IgniteSpiException in project ignite by apache.
the class IndexProcessor method store.
/**
* Updates index with new row. Note that key is unique for cache, so if cache contains multiple indexes
* the key should be removed from indexes other than one being updated.
*
* @param idxs List of indexes to update.
* @param newRow cache row to store in index.
* @param prevRow optional cache row that will be replaced with new row.
*/
public void store(Collection<? extends Index> idxs, CacheDataRow newRow, @Nullable CacheDataRow prevRow, boolean prevRowAvailable) throws IgniteSpiException {
IgniteCheckedException err = null;
ddlLock.readLock().lock();
try {
for (Index idx : idxs) err = updateIndex(idx, newRow, prevRow, prevRowAvailable, err);
if (err != null)
throw err;
} catch (IgniteCheckedException e) {
throw new IgniteSpiException("Failed to store row in index", e);
} finally {
ddlLock.readLock().unlock();
}
}
use of org.apache.ignite.spi.IgniteSpiException in project ignite by apache.
the class TcpDiscoverySpi method initializeImpl.
/**
*/
private void initializeImpl() {
if (impl != null)
return;
sslEnable = ignite().configuration().getSslContextFactory() != null;
initFailureDetectionTimeout();
if (!forceSrvMode && (Boolean.TRUE.equals(ignite.configuration().isClientMode()))) {
if (ackTimeout == 0)
ackTimeout = DFLT_ACK_TIMEOUT_CLIENT;
if (sockTimeout == 0)
sockTimeout = DFLT_SOCK_TIMEOUT_CLIENT;
impl = new ClientImpl(this);
ctxInitLatch.countDown();
} else {
if (ackTimeout == 0)
ackTimeout = DFLT_ACK_TIMEOUT;
if (sockTimeout == 0)
sockTimeout = DFLT_SOCK_TIMEOUT;
impl = new ServerImpl(this);
}
metricsUpdateFreq = ignite.configuration().getMetricsUpdateFrequency();
if (!failureDetectionTimeoutEnabled()) {
assertParameter(sockTimeout > 0, "sockTimeout > 0");
assertParameter(ackTimeout > 0, "ackTimeout > 0");
assertParameter(maxAckTimeout > ackTimeout, "maxAckTimeout > ackTimeout");
assertParameter(reconCnt > 0, "reconnectCnt > 0");
}
assertParameter(netTimeout > 0, "networkTimeout > 0");
assertParameter(ipFinder != null, "ipFinder != null");
assertParameter(metricsUpdateFreq > 0, "metricsUpdateFreq > 0" + " (inited from igniteConfiguration.metricsUpdateFrequency)");
assertParameter(ipFinderCleanFreq > 0, "ipFinderCleanFreq > 0");
assertParameter(locPort > 1023, "localPort > 1023");
assertParameter(locPortRange >= 0, "localPortRange >= 0");
assertParameter(locPort + locPortRange <= 0xffff, "locPort + locPortRange <= 0xffff");
assertParameter(threadPri > 0, "threadPri > 0");
assertParameter(statsPrintFreq >= 0, "statsPrintFreq >= 0");
if (isSslEnabled()) {
try {
SSLContext sslCtx = ignite().configuration().getSslContextFactory().create();
sslSockFactory = sslCtx.getSocketFactory();
sslSrvSockFactory = sslCtx.getServerSocketFactory();
} catch (IgniteException e) {
throw new IgniteSpiException("Failed to create SSL context. SSL factory: " + ignite.configuration().getSslContextFactory(), e);
}
}
try {
locHost = U.resolveLocalHost(locAddr);
} catch (IOException e) {
throw new IgniteSpiException("Unknown local address: " + locAddr, e);
}
if (log.isDebugEnabled()) {
log.debug(configInfo("localHost", locHost.getHostAddress()));
log.debug(configInfo("localPort", locPort));
log.debug(configInfo("localPortRange", locPortRange));
log.debug(configInfo("threadPri", threadPri));
if (!failureDetectionTimeoutEnabled()) {
log.debug("Failure detection timeout is ignored because at least one of the parameters from this list" + " has been set explicitly: 'sockTimeout', 'ackTimeout', 'maxAckTimeout', 'reconnectCount'.");
log.debug(configInfo("networkTimeout", netTimeout));
log.debug(configInfo("sockTimeout", sockTimeout));
log.debug(configInfo("ackTimeout", ackTimeout));
log.debug(configInfo("maxAckTimeout", maxAckTimeout));
log.debug(configInfo("reconnectCount", reconCnt));
} else
log.debug(configInfo("failureDetectionTimeout", failureDetectionTimeout()));
log.debug(configInfo("ipFinder", ipFinder));
log.debug(configInfo("ipFinderCleanFreq", ipFinderCleanFreq));
log.debug(configInfo("metricsUpdateFreq", metricsUpdateFreq));
log.debug(configInfo("statsPrintFreq", statsPrintFreq));
}
// Warn on odd network timeout.
if (netTimeout < 3000)
U.warn(log, "Network timeout is too low (at least 3000 ms recommended): " + netTimeout);
if (ipFinder instanceof TcpDiscoveryMulticastIpFinder) {
TcpDiscoveryMulticastIpFinder mcastIpFinder = ((TcpDiscoveryMulticastIpFinder) ipFinder);
if (mcastIpFinder.getLocalAddress() == null)
mcastIpFinder.setLocalAddress(locAddr);
}
cfgNodeId = ignite.configuration().getNodeId();
}
use of org.apache.ignite.spi.IgniteSpiException in project ignite by apache.
the class ServerImpl method sendCustomEvent.
/**
* {@inheritDoc}
*/
@Override
public void sendCustomEvent(DiscoverySpiCustomMessage evt) {
try {
TcpDiscoveryCustomEventMessage msg;
if (((CustomMessageWrapper) evt).delegate() instanceof DiscoveryServerOnlyCustomMessage)
msg = new TcpDiscoveryServerOnlyCustomEventMessage(getLocalNodeId(), evt, U.marshal(spi.marshaller(), evt));
else
msg = new TcpDiscoveryCustomEventMessage(getLocalNodeId(), evt, U.marshal(spi.marshaller(), evt));
Span rootSpan = tracing.create(TraceableMessagesTable.traceName(msg.getClass())).addTag(SpanTags.tag(SpanTags.EVENT_NODE, SpanTags.ID), () -> getLocalNodeId().toString()).addTag(SpanTags.tag(SpanTags.EVENT_NODE, SpanTags.CONSISTENT_ID), () -> locNode.consistentId().toString()).addTag(SpanTags.MESSAGE_CLASS, () -> ((CustomMessageWrapper) evt).delegate().getClass().getSimpleName()).addLog(() -> "Created");
// This root span will be parent both from local and remote nodes.
msg.spanContainer().serializedSpanBytes(tracing.serialize(rootSpan));
msgWorker.addMessage(msg);
rootSpan.addLog(() -> "Sent").end();
} catch (IgniteCheckedException e) {
throw new IgniteSpiException("Failed to marshal custom event: " + evt, e);
}
}
use of org.apache.ignite.spi.IgniteSpiException in project ignite by apache.
the class ClientImpl method sendCustomEvent.
/**
* {@inheritDoc}
*/
@Override
public void sendCustomEvent(DiscoverySpiCustomMessage evt) {
State state = this.state;
if (state == DISCONNECTED)
throw new IgniteClientDisconnectedException(null, "Failed to send custom message: client is disconnected.");
if (state == STOPPED || state == SEGMENTED || state == STARTING)
throw new IgniteException("Failed to send custom message: client is " + state.name().toLowerCase() + ".");
try {
TcpDiscoveryCustomEventMessage msg;
if (((CustomMessageWrapper) evt).delegate() instanceof DiscoveryServerOnlyCustomMessage)
msg = new TcpDiscoveryServerOnlyCustomEventMessage(getLocalNodeId(), evt, U.marshal(spi.marshaller(), evt));
else
msg = new TcpDiscoveryCustomEventMessage(getLocalNodeId(), evt, U.marshal(spi.marshaller(), evt));
Span rootSpan = tracing.create(TraceableMessagesTable.traceName(msg.getClass())).addTag(SpanTags.tag(SpanTags.EVENT_NODE, SpanTags.ID), () -> getLocalNodeId().toString()).addTag(SpanTags.tag(SpanTags.EVENT_NODE, SpanTags.CONSISTENT_ID), () -> locNode.consistentId().toString()).addTag(SpanTags.MESSAGE_CLASS, () -> ((CustomMessageWrapper) evt).delegate().getClass().getSimpleName()).addLog(() -> "Created");
// This root span will be parent both from local and remote nodes.
msg.spanContainer().serializedSpanBytes(tracing.serialize(rootSpan));
sockWriter.sendMessage(msg);
rootSpan.addLog(() -> "Sent").end();
} catch (IgniteCheckedException e) {
throw new IgniteSpiException("Failed to marshal custom event: " + evt, e);
}
}
Aggregations