use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project ignite by apache.
the class CacheContinuousQueryHandler method register.
/**
* {@inheritDoc}
*/
@Override
public RegisterStatus register(final UUID nodeId, final UUID routineId, final GridKernalContext ctx) throws IgniteCheckedException {
assert nodeId != null;
assert routineId != null;
assert ctx != null;
initLocalListener(locLsnr, ctx);
if (initFut == null) {
initFut = p2pUnmarshalFut.chain((fut) -> {
try {
fut.get();
initRemoteFilter(getEventFilter0(), ctx);
IgniteClosure trans = getTransformer0();
if (trans != null)
ctx.resource().injectGeneric(trans);
} catch (IgniteCheckedException | ExceptionInInitializerError e) {
throw new IgniteException("Failed to initialize a continuous query.", e);
}
return null;
});
}
if (initFut.error() != null)
throw new IgniteCheckedException("Failed to initialize a continuous query.", initFut.error());
entryBufs = new ConcurrentHashMap<>();
ackBuf = new CacheContinuousQueryAcknowledgeBuffer();
rcvs = new ConcurrentHashMap<>();
this.nodeId = nodeId;
this.routineId = routineId;
this.ctx = ctx;
final boolean loc = nodeId.equals(ctx.localNodeId());
assert !skipPrimaryCheck || loc;
log = ctx.log(CU.CONTINUOUS_QRY_LOG_CATEGORY);
CacheContinuousQueryListener<K, V> lsnr = new CacheContinuousQueryListener<K, V>() {
@Override
public void onBeforeRegister() {
GridCacheContext<K, V> cctx = cacheContext(ctx);
if (cctx != null && !cctx.isLocal())
cctx.topology().readLock();
}
@Override
public void onAfterRegister() {
GridCacheContext<K, V> cctx = cacheContext(ctx);
if (cctx != null && !cctx.isLocal())
cctx.topology().readUnlock();
}
@Override
public void onRegister() {
GridCacheContext<K, V> cctx = cacheContext(ctx);
if (cctx != null && !cctx.isLocal())
locInitUpdCntrs = toCountersMap(cctx.topology().localUpdateCounters(false));
}
@Override
public boolean keepBinary() {
return keepBinary;
}
@Override
public void onEntryUpdated(final CacheContinuousQueryEvent<K, V> evt, boolean primary, final boolean recordIgniteEvt, GridDhtAtomicAbstractUpdateFuture fut) {
if (ignoreExpired && evt.getEventType() == EventType.EXPIRED)
return;
if (log.isDebugEnabled())
log.debug("Entry updated on affinity node [evt=" + evt + ", primary=" + primary + ']');
final GridCacheContext<K, V> cctx = cacheContext(ctx);
// Check that cache stopped.
if (cctx == null)
return;
if (!needNotify(false, cctx, -1, -1, evt))
return;
// skipPrimaryCheck is set only when listen locally for replicated cache events.
assert !skipPrimaryCheck || (cctx.isReplicated() && ctx.localNodeId().equals(nodeId));
if (asyncCb) {
ContinuousQueryAsyncClosure clsr = new ContinuousQueryAsyncClosure(primary, evt, recordIgniteEvt, fut);
ctx.pools().asyncCallbackPool().execute(clsr, evt.partitionId());
} else {
final boolean notify = filter(evt);
if (log.isDebugEnabled())
log.debug("Filter invoked for event [evt=" + evt + ", primary=" + primary + ", notify=" + notify + ']');
if (primary || skipPrimaryCheck)
onEntryUpdate(evt, notify, loc, recordIgniteEvt);
else
handleBackupEntry(cctx, evt.entry());
}
}
@Override
public void onUnregister() {
try {
CacheEntryEventFilter filter = getEventFilter();
if (filter instanceof PlatformContinuousQueryFilter)
((PlatformContinuousQueryFilter) filter).onQueryUnregister();
} catch (IgniteCheckedException e) {
if (log.isDebugEnabled()) {
log.debug("Failed to execute the onUnregister callback " + "on the continuoue query listener. " + "[nodeId=" + nodeId + ", routineId=" + routineId + ", cacheName=" + cacheName + ", err=" + e + "]");
}
}
}
@Override
public void cleanupOnAck(Map<Integer, Long> updateCntrs) {
for (Map.Entry<Integer, Long> e : updateCntrs.entrySet()) {
CacheContinuousQueryEventBuffer buf = entryBufs.get(e.getKey());
if (buf != null)
buf.cleanupOnAck(e.getValue());
}
}
@Override
public void flushOnExchangeDone(GridKernalContext ctx, AffinityTopologyVersion topVer) {
assert topVer != null;
try {
GridCacheContext<K, V> cctx = cacheContext(ctx);
ClusterNode node = ctx.discovery().node(nodeId);
for (Map.Entry<Integer, CacheContinuousQueryEventBuffer> bufE : entryBufs.entrySet()) {
CacheContinuousQueryEventBuffer buf = bufE.getValue();
Collection<CacheContinuousQueryEntry> entries = buf.flushOnExchange((cntr, filtered) -> createFilteredEntry(cctx.cacheId(), bufE.getKey(), topVer, cntr, filtered));
if (entries == null || node == null)
continue;
for (CacheContinuousQueryEntry e : entries) {
e.markBackup();
if (!e.isFiltered())
prepareEntry(cctx, nodeId, e);
}
ctx.continuous().addBackupNotification(nodeId, routineId, entries, topic);
}
} catch (IgniteCheckedException e) {
U.error(ctx.log(CU.CONTINUOUS_QRY_LOG_CATEGORY), "Failed to send backup event notification to node: " + nodeId, e);
}
}
@Override
public void acknowledgeBackupOnTimeout(GridKernalContext ctx) {
sendBackupAcknowledge(ackBuf.acknowledgeOnTimeout(), routineId, ctx);
}
@Override
public void skipUpdateEvent(CacheContinuousQueryEvent<K, V> evt, AffinityTopologyVersion topVer, boolean primary) {
assert evt != null;
CacheContinuousQueryEntry e = evt.entry();
e.markFiltered();
onEntryUpdated(evt, primary, false, null);
}
@Override
public CounterSkipContext skipUpdateCounter(final GridCacheContext cctx, @Nullable CounterSkipContext skipCtx, int part, long cntr, AffinityTopologyVersion topVer, boolean primary) {
if (skipCtx == null)
skipCtx = new CounterSkipContext(part, cntr, topVer);
if (!needNotify(true, cctx, part, cntr, null))
return skipCtx;
if (loc) {
assert !locOnly;
final Collection<CacheEntryEvent<? extends K, ? extends V>> evts = handleEvent(ctx, skipCtx.entry());
if (!evts.isEmpty()) {
if (asyncCb) {
ctx.pools().asyncCallbackPool().execute(new Runnable() {
@Override
public void run() {
try {
notifyLocalListener(evts, getTransformer());
} catch (IgniteCheckedException ex) {
U.error(ctx.log(CU.CONTINUOUS_QRY_LOG_CATEGORY), "Failed to notify local listener.", ex);
}
}
}, part);
} else
skipCtx.addProcessClosure(new Runnable() {
@Override
public void run() {
try {
notifyLocalListener(evts, getTransformer());
} catch (IgniteCheckedException ex) {
U.error(ctx.log(CU.CONTINUOUS_QRY_LOG_CATEGORY), "Failed to notify local listener.", ex);
}
}
});
}
return skipCtx;
}
CacheContinuousQueryEventBuffer buf = partitionBuffer(cctx, part);
final Object entryOrList = buf.processEntry(skipCtx.entry(), !primary);
if (entryOrList != null) {
skipCtx.addProcessClosure(new Runnable() {
@Override
public void run() {
try {
ctx.continuous().addNotification(nodeId, routineId, entryOrList, topic, false, true);
} catch (ClusterTopologyCheckedException ex) {
if (log.isDebugEnabled())
log.debug("Failed to send event notification to node, node left cluster " + "[node=" + nodeId + ", err=" + ex + ']');
} catch (IgniteCheckedException ex) {
U.error(ctx.log(CU.CONTINUOUS_QRY_LOG_CATEGORY), "Failed to send event notification to node: " + nodeId, ex);
}
}
});
}
return skipCtx;
}
@Override
public void onPartitionEvicted(int part) {
entryBufs.remove(part);
}
@Override
public boolean oldValueRequired() {
return oldValRequired;
}
@Override
public boolean notifyExisting() {
return notifyExisting;
}
private String taskName() {
return ctx.security().enabled() ? ctx.task().resolveTaskName(taskHash) : null;
}
@Override
public boolean isPrimaryOnly() {
return locOnly && !skipPrimaryCheck;
}
/**
* Checks whether it is need to notify listeners.
*
* @param skipEvt {@code True} if this is a skip counter event.
* @param cctx Cache context.
* @param part Partition id.
* @param cntr Update counter.
* @param evt CQ event.
* @return {@code True} if notification should happen immediately, or {@code false} if it should be delayed.
*/
private boolean needNotify(boolean skipEvt, GridCacheContext cctx, int part, long cntr, CacheContinuousQueryEvent evt) {
assert !skipEvt || evt == null;
// part == -1 && cntr == -1 means skip counter.
assert skipEvt || part == -1 && cntr == -1;
if (!cctx.mvccEnabled() || cctx.isLocal())
return true;
assert locInitUpdCntrs != null;
cntr = skipEvt ? cntr : evt.getPartitionUpdateCounter();
part = skipEvt ? part : evt.partitionId();
T2<Long, Long> initCntr = locInitUpdCntrs.get(part);
// Do not notify listener if entry was updated before the query is started.
return initCntr == null || cntr >= initCntr.get2();
}
};
CacheContinuousQueryManager mgr = manager(ctx);
if (mgr == null)
return RegisterStatus.DELAYED;
RegisterStatus regStatus = mgr.registerListener(routineId, lsnr, internal);
if (regStatus == RegisterStatus.REGISTERED)
initFut.listen(res -> sendQueryExecutedEvent());
return regStatus;
}
use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project ignite by apache.
the class GridCacheDistributedQueryManager method sendQueryResponse.
/**
* Sends cache query response.
*
* @param nodeId Node to send response.
* @param res Cache query response.
* @param timeout Message timeout.
* @return {@code true} if response was sent, {@code false} otherwise.
*/
private boolean sendQueryResponse(UUID nodeId, GridCacheQueryResponse res, long timeout) {
ClusterNode node = cctx.node(nodeId);
if (node == null)
return false;
int attempt = 1;
IgniteCheckedException err = null;
while (!Thread.currentThread().isInterrupted()) {
try {
if (log.isDebugEnabled())
log.debug("Send query response: " + res);
Object topic = topic(nodeId, res.requestId());
cctx.io().sendOrderedMessage(node, topic, res, GridIoPolicy.QUERY_POOL, timeout > 0 ? timeout : Long.MAX_VALUE);
return true;
} catch (ClusterTopologyCheckedException ignored) {
if (log.isDebugEnabled())
log.debug("Failed to send query response since node left grid [nodeId=" + nodeId + ", res=" + res + "]");
return false;
} catch (IgniteCheckedException e) {
if (err == null)
err = e;
if (Thread.currentThread().isInterrupted())
break;
if (attempt < RESEND_ATTEMPTS) {
if (log.isDebugEnabled())
log.debug("Failed to send queries response (will try again) [nodeId=" + nodeId + ", res=" + res + ", attempt=" + attempt + ", err=" + e + "]");
if (!Thread.currentThread().isInterrupted())
try {
U.sleep(RESEND_FREQ);
} catch (IgniteInterruptedCheckedException e1) {
U.error(log, "Waiting for queries response resending was interrupted (response will not be sent) " + "[nodeId=" + nodeId + ", response=" + res + "]", e1);
return false;
}
} else {
U.error(log, "Failed to sender cache response [nodeId=" + nodeId + ", response=" + res + "]", err);
return false;
}
}
attempt++;
}
return false;
}
use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project ignite by apache.
the class ClusterProcessor method initDiagnosticListeners.
/**
* @throws IgniteCheckedException If failed.
*/
public void initDiagnosticListeners() throws IgniteCheckedException {
ctx.event().addLocalEventListener(new GridLocalEventListener() {
@Override
public void onEvent(Event evt) {
assert evt instanceof DiscoveryEvent;
assert evt.type() == EVT_NODE_FAILED || evt.type() == EVT_NODE_LEFT;
DiscoveryEvent discoEvt = (DiscoveryEvent) evt;
UUID nodeId = discoEvt.eventNode().id();
ConcurrentHashMap<Long, InternalDiagnosticFuture> futs = diagnosticFutMap.get();
if (futs != null) {
for (InternalDiagnosticFuture fut : futs.values()) {
if (fut.nodeId.equals(nodeId))
fut.onDone(new IgniteDiagnosticInfo("Target node failed: " + nodeId));
}
}
allNodesMetrics.remove(nodeId);
}
}, EVT_NODE_FAILED, EVT_NODE_LEFT);
ctx.io().addMessageListener(TOPIC_INTERNAL_DIAGNOSTIC, new GridMessageListener() {
@Override
public void onMessage(UUID nodeId, Object msg, byte plc) {
if (msg instanceof IgniteDiagnosticMessage) {
IgniteDiagnosticMessage msg0 = (IgniteDiagnosticMessage) msg;
if (msg0.request()) {
ClusterNode node = ctx.discovery().node(nodeId);
if (node == null) {
if (diagnosticLog.isDebugEnabled()) {
diagnosticLog.debug("Skip diagnostic request, sender node left " + "[node=" + nodeId + ", msg=" + msg + ']');
}
return;
}
byte[] diagRes;
IgniteClosure<GridKernalContext, IgniteDiagnosticInfo> c;
try {
c = msg0.unmarshal(marsh);
diagRes = marsh.marshal(c.apply(ctx));
} catch (Exception e) {
U.error(diagnosticLog, "Failed to run diagnostic closure: " + e, e);
try {
IgniteDiagnosticInfo errInfo = new IgniteDiagnosticInfo("Failed to run diagnostic closure: " + e);
diagRes = marsh.marshal(errInfo);
} catch (Exception e0) {
U.error(diagnosticLog, "Failed to marshal diagnostic closure result: " + e, e);
diagRes = null;
}
}
IgniteDiagnosticMessage res = IgniteDiagnosticMessage.createResponse(diagRes, msg0.futureId());
try {
ctx.io().sendToGridTopic(node, TOPIC_INTERNAL_DIAGNOSTIC, res, GridIoPolicy.SYSTEM_POOL);
} catch (ClusterTopologyCheckedException e) {
if (diagnosticLog.isDebugEnabled()) {
diagnosticLog.debug("Failed to send diagnostic response, node left " + "[node=" + nodeId + ", msg=" + msg + ']');
}
} catch (IgniteCheckedException e) {
U.error(diagnosticLog, "Failed to send diagnostic response [msg=" + msg0 + "]", e);
}
} else {
InternalDiagnosticFuture fut = diagnosticFuturesMap().get(msg0.futureId());
if (fut != null) {
IgniteDiagnosticInfo res;
try {
res = msg0.unmarshal(marsh);
if (res == null)
res = new IgniteDiagnosticInfo("Remote node failed to marshal response.");
} catch (Exception e) {
U.error(diagnosticLog, "Failed to unmarshal diagnostic response: " + e, e);
res = new IgniteDiagnosticInfo("Failed to unmarshal diagnostic response: " + e);
}
fut.onResponse(res);
} else
U.warn(diagnosticLog, "Failed to find diagnostic message future [msg=" + msg0 + ']');
}
} else
U.warn(diagnosticLog, "Received unexpected message: " + msg);
}
});
if (sndMetrics) {
ctx.io().addMessageListener(TOPIC_METRICS, new GridMessageListener() {
@Override
public void onMessage(UUID nodeId, Object msg, byte plc) {
if (msg instanceof ClusterMetricsUpdateMessage)
processMetricsUpdateMessage(nodeId, (ClusterMetricsUpdateMessage) msg);
else
U.warn(log, "Received unexpected message for TOPIC_METRICS: " + msg);
}
});
}
}
use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project ignite by apache.
the class ClusterProcessor method updateMetrics.
/**
*/
private void updateMetrics() {
if (ctx.isStopping() || ctx.clientDisconnected())
return;
ClusterNode oldest = ctx.discovery().oldestAliveServerNode(AffinityTopologyVersion.NONE);
if (oldest == null)
return;
if (ctx.localNodeId().equals(oldest.id())) {
IgniteClusterNode locNode = (IgniteClusterNode) ctx.discovery().localNode();
locNode.setMetrics(metricsProvider.metrics());
locNode.setCacheMetrics(metricsProvider.cacheMetrics());
ClusterNodeMetrics metrics = new ClusterNodeMetrics(locNode.metrics(), locNode.cacheMetrics());
try {
byte[] metricsBytes = U.zip(U.marshal(ctx.config().getMarshaller(), metrics));
allNodesMetrics.put(ctx.localNodeId(), metricsBytes);
} catch (IgniteCheckedException e) {
U.warn(log, "Failed to marshal local node metrics: " + e, e);
}
ctx.discovery().metricsUpdateEvent(ctx.discovery().discoCache(), locNode);
Collection<ClusterNode> allNodes = ctx.discovery().allNodes();
ClusterMetricsUpdateMessage msg = new ClusterMetricsUpdateMessage(new HashMap<>(allNodesMetrics));
for (ClusterNode node : allNodes) {
if (ctx.localNodeId().equals(node.id()) || !ctx.discovery().alive(node.id()))
continue;
try {
ctx.io().sendToGridTopic(node, TOPIC_METRICS, msg, GridIoPolicy.SYSTEM_POOL);
} catch (ClusterTopologyCheckedException e) {
if (log.isDebugEnabled())
log.debug("Failed to send metrics update, node failed: " + e);
} catch (IgniteCheckedException e) {
U.warn(log, "Failed to send metrics update: " + e, e);
}
}
} else {
ClusterNodeMetrics metrics = new ClusterNodeMetrics(metricsProvider.metrics(), metricsProvider.cacheMetrics());
try {
byte[] metricsBytes = U.zip(U.marshal(ctx.config().getMarshaller(), metrics));
ClusterMetricsUpdateMessage msg = new ClusterMetricsUpdateMessage(metricsBytes);
ctx.io().sendToGridTopic(oldest, TOPIC_METRICS, msg, GridIoPolicy.SYSTEM_POOL);
} catch (ClusterTopologyCheckedException e) {
if (log.isDebugEnabled())
log.debug("Failed to send metrics update to oldest, node failed: " + e);
} catch (IgniteCheckedException e) {
LT.warn(log, e, "Failed to send metrics update to oldest: " + e, false, false);
}
}
}
use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project ignite by apache.
the class GridJobProcessor method handleException.
/**
* Handles errors that happened prior to job creation.
*
* @param node Sender node.
* @param req Job execution request.
* @param ex Exception that happened.
* @param endTime Job end time.
*/
private void handleException(ClusterNode node, GridJobExecuteRequest req, IgniteException ex, long endTime) {
UUID locNodeId = ctx.localNodeId();
ClusterNode sndNode = ctx.discovery().node(node.id());
if (sndNode == null) {
U.warn(log, "Failed to reply to sender node because it left grid [nodeId=" + node.id() + ", jobId=" + req.getJobId() + ']');
if (ctx.event().isRecordable(EVT_JOB_FAILED)) {
JobEvent evt = new JobEvent();
evt.jobId(req.getJobId());
evt.message("Job reply failed (original task node left grid): " + req.getJobId());
evt.node(ctx.discovery().localNode());
evt.taskName(req.getTaskName());
evt.taskClassName(req.getTaskClassName());
evt.taskSessionId(req.getSessionId());
evt.type(EVT_JOB_FAILED);
evt.taskNode(node);
evt.taskSubjectId(securitySubjectId(ctx));
// Record job reply failure.
ctx.event().record(evt);
}
return;
}
try {
boolean loc = ctx.localNodeId().equals(sndNode.id()) && !ctx.config().isMarshalLocalJobs();
GridJobExecuteResponse jobRes = new GridJobExecuteResponse(locNodeId, req.getSessionId(), req.getJobId(), loc ? null : U.marshal(marsh, ex), ex, loc ? null : U.marshal(marsh, null), null, loc ? null : U.marshal(marsh, null), null, false, null);
if (req.isSessionFullSupport()) {
// Send response to designated job topic.
// Always go through communication to preserve order,
// if attributes are enabled.
// Job response topic.
Object topic = TOPIC_TASK.topic(req.getJobId(), locNodeId);
long timeout = endTime - U.currentTimeMillis();
if (timeout <= 0)
// Ignore the actual timeout and send response anyway.
timeout = 1;
// Send response to designated job topic.
// Always go through communication to preserve order.
ctx.io().sendOrderedMessage(sndNode, topic, jobRes, req.isInternal() ? MANAGEMENT_POOL : SYSTEM_POOL, timeout, false);
} else if (ctx.localNodeId().equals(sndNode.id()))
ctx.task().processJobExecuteResponse(ctx.localNodeId(), jobRes);
else
// Send response to common topic as unordered message.
ctx.io().sendToGridTopic(sndNode, TOPIC_TASK, jobRes, req.isInternal() ? MANAGEMENT_POOL : SYSTEM_POOL);
} catch (IgniteCheckedException e) {
// The only option here is to log, as we must assume that resending will fail too.
if ((e instanceof ClusterTopologyCheckedException) || isDeadNode(node.id()))
// Avoid stack trace for left nodes.
U.error(log, "Failed to reply to sender node because it left grid [nodeId=" + node.id() + ", jobId=" + req.getJobId() + ']');
else {
assert sndNode != null;
U.error(log, "Error sending reply for job [nodeId=" + sndNode.id() + ", jobId=" + req.getJobId() + ']', e);
}
if (ctx.event().isRecordable(EVT_JOB_FAILED)) {
JobEvent evt = new JobEvent();
evt.jobId(req.getJobId());
evt.message("Failed to send reply for job: " + req.getJobId());
evt.node(ctx.discovery().localNode());
evt.taskName(req.getTaskName());
evt.taskClassName(req.getTaskClassName());
evt.taskSessionId(req.getSessionId());
evt.type(EVT_JOB_FAILED);
evt.taskNode(node);
evt.taskSubjectId(securitySubjectId(ctx));
// Record job reply failure.
ctx.event().record(evt);
}
}
}
Aggregations