use of org.apache.ignite.internal.processors.tracing.Span in project ignite by apache.
the class ServerImpl method spiStop0.
/**
* Stops SPI finally or stops SPI for restart.
*
* @param disconnect {@code True} if SPI is being disconnected.
* @throws IgniteSpiException If failed.
*/
private void spiStop0(boolean disconnect) throws IgniteSpiException {
if (log.isDebugEnabled()) {
if (disconnect)
log.debug("Disconnecting SPI.");
else
log.debug("Preparing to start local node stop procedure.");
}
if (disconnect) {
synchronized (mux) {
spiState = DISCONNECTING;
}
}
if (msgWorker != null && msgWorker.runner() != null && msgWorker.runner().isAlive() && !disconnect) {
// Send node left message only if it is final stop.
TcpDiscoveryNodeLeftMessage nodeLeftMsg = new TcpDiscoveryNodeLeftMessage(locNode.id());
Span rootSpan = tracing.create(TraceableMessagesTable.traceName(nodeLeftMsg.getClass())).addTag(SpanTags.tag(SpanTags.EVENT_NODE, SpanTags.ID), () -> locNode.id().toString()).addTag(SpanTags.tag(SpanTags.EVENT_NODE, SpanTags.CONSISTENT_ID), () -> locNode.consistentId().toString()).addLog(() -> "Created");
nodeLeftMsg.spanContainer().serializedSpanBytes(tracing.serialize(rootSpan));
msgWorker.addMessage(nodeLeftMsg);
rootSpan.addLog(() -> "Sent").end();
synchronized (mux) {
long timeout = spi.netTimeout;
long thresholdNanos = System.nanoTime() + U.millisToNanos(timeout);
while (spiState != LEFT && timeout > 0) {
try {
mux.wait(timeout);
timeout = U.nanosToMillis(thresholdNanos - System.nanoTime());
} catch (InterruptedException ignored) {
Thread.currentThread().interrupt();
break;
}
}
if (spiState == LEFT) {
if (log.isDebugEnabled())
log.debug("Verification for local node leave has been received from coordinator" + " (continuing stop procedure).");
} else if (log.isInfoEnabled()) {
log.info("No verification for local node leave has been received from coordinator" + " (will stop node anyway).");
}
}
}
if (tcpSrvr != null)
tcpSrvr.stop();
tcpSrvr = null;
Collection<SocketReader> tmp;
synchronized (mux) {
tmp = U.arrayList(readers);
}
U.interrupt(tmp);
U.joinThreads(tmp, log);
U.interrupt(ipFinderCleaner);
U.join(ipFinderCleaner, log);
U.cancel(msgWorker);
U.join(msgWorker, log);
for (ClientMessageWorker clientWorker : clientMsgWorkers.values()) {
if (clientWorker != null) {
U.interrupt(clientWorker.runner());
U.join(clientWorker.runner(), log);
}
}
clientMsgWorkers.clear();
IgniteUtils.shutdownNow(ServerImpl.class, utilityPool, log);
U.interrupt(statsPrinter);
U.join(statsPrinter, log);
Collection<TcpDiscoveryNode> nodes = null;
if (!disconnect)
spi.printStopInfo();
else {
spi.getSpiContext().deregisterPorts();
nodes = ring.visibleNodes();
}
long topVer = ring.topologyVersion();
ring.clear();
if (nodes != null) {
// This is restart/disconnection and we need to fire FAIL event for each remote node.
DiscoverySpiListener lsnr = spi.lsnr;
if (lsnr != null) {
Collection<ClusterNode> processed = new HashSet<>(nodes.size());
for (TcpDiscoveryNode n : nodes) {
if (n.isLocal())
continue;
assert n.visible();
processed.add(n);
List<ClusterNode> top = U.arrayList(nodes, F.notIn(processed));
topVer++;
Map<Long, Collection<ClusterNode>> hist = updateTopologyHistory(topVer, Collections.unmodifiableList(top));
lsnr.onDiscovery(new DiscoveryNotification(EVT_NODE_FAILED, topVer, n, top, hist, null, null)).get();
}
}
}
printStatistics();
spi.stats.clear();
synchronized (mux) {
// Clear stored data.
leavingNodes.clear();
failedNodes.clear();
spiState = DISCONNECTED;
}
}
use of org.apache.ignite.internal.processors.tracing.Span in project ignite by apache.
the class RunningQueryManager method unregister.
/**
* Unregister running query.
*
* @param qryId id of the query, which is given by {@link #register register} method.
* @param failReason exception that caused query execution fail, or {@code null} if query succeded.
*/
public void unregister(long qryId, @Nullable Throwable failReason) {
if (qryId <= 0)
return;
boolean failed = failReason != null;
GridRunningQueryInfo qry = runs.remove(qryId);
// Attempt to unregister query twice.
if (qry == null)
return;
Span qrySpan = qry.span();
try {
if (failed)
qrySpan.addTag(ERROR, failReason::getMessage);
// We need to collect query history and metrics only for SQL queries.
if (isSqlQuery(qry)) {
qry.runningFuture().onDone();
qryHistTracker.collectHistory(qry, failed);
if (!failed)
successQrsCnt.increment();
else {
failedQrsCnt.increment();
// right when query failed due to some other reason.
if (QueryUtils.wasCancelled(failReason))
canceledQrsCnt.increment();
}
}
if (ctx.performanceStatistics().enabled() && qry.startTimeNanos() > 0) {
ctx.performanceStatistics().query(qry.queryType(), qry.query(), qry.requestId(), qry.startTime(), System.nanoTime() - qry.startTimeNanos(), !failed);
}
} finally {
qrySpan.end();
}
}
use of org.apache.ignite.internal.processors.tracing.Span in project ignite by apache.
the class InboundConnectionHandler method onMessage.
/**
* {@inheritDoc}
*/
@Override
public void onMessage(final GridNioSession ses, Message msg) {
Span span = MTC.span();
span.addLog(() -> "Communication received");
span.addTag(SpanTags.MESSAGE, () -> traceName(msg));
ConnectionKey connKey = ses.meta(CONN_IDX_META);
if (connKey == null) {
assert ses.accepted() : ses;
if (!connectGate.tryEnter()) {
if (log.isDebugEnabled())
log.debug("Close incoming connection, failed to enter gateway.");
ses.send(new RecoveryLastReceivedMessage(NODE_STOPPING)).listen(fut -> ses.close());
return;
}
try {
onFirstMessage(ses, msg);
} finally {
connectGate.leave();
}
} else {
if (isChannelConnIdx(connKey.connectionIndex())) {
if (ses.meta(CHANNEL_FUT_META) == null)
nioSrvWrapper.onChannelCreate((GridSelectorNioSessionImpl) ses, connKey, msg);
else {
GridFutureAdapter<Channel> fut = ses.meta(CHANNEL_FUT_META);
GridSelectorNioSessionImpl ses0 = (GridSelectorNioSessionImpl) ses;
ses0.closeSocketOnSessionClose(false);
ses0.close().listen(f -> {
if (f.error() != null) {
fut.onDone(f.error());
return;
}
fut.onDone(ses0.key().channel());
});
}
return;
}
Object consistentId = ses.meta(CONSISTENT_ID_META);
assert consistentId != null;
if (msg instanceof RecoveryLastReceivedMessage) {
metricsLsnr.onMessageReceived(msg, consistentId);
GridNioRecoveryDescriptor recovery = ses.outRecoveryDescriptor();
if (recovery != null) {
RecoveryLastReceivedMessage msg0 = (RecoveryLastReceivedMessage) msg;
if (log.isDebugEnabled()) {
log.debug("Received recovery acknowledgement [rmtNode=" + connKey.nodeId() + ", connIdx=" + connKey.connectionIndex() + ", rcvCnt=" + msg0.received() + ']');
}
recovery.ackReceived(msg0.received());
}
return;
} else {
GridNioRecoveryDescriptor recovery = ses.inRecoveryDescriptor();
if (recovery != null) {
long rcvCnt = recovery.onReceived();
if (rcvCnt % cfg.ackSendThreshold() == 0) {
if (log.isDebugEnabled()) {
log.debug("Send recovery acknowledgement [rmtNode=" + connKey.nodeId() + ", connIdx=" + connKey.connectionIndex() + ", rcvCnt=" + rcvCnt + ']');
}
ses.systemMessage(new RecoveryLastReceivedMessage(rcvCnt));
recovery.lastAcknowledged(rcvCnt);
}
} else if (connKey.dummy()) {
assert msg instanceof NodeIdMessage : msg;
TcpCommunicationNodeConnectionCheckFuture fut = ses.meta(SES_FUT_META);
assert fut != null : msg;
fut.onConnected(U.bytesToUuid(((NodeIdMessage) msg).nodeIdBytes(), 0));
nioSrvWrapper.nio().closeFromWorkerThread(ses);
return;
}
}
metricsLsnr.onMessageReceived(msg, consistentId);
IgniteRunnable c;
if (cfg.messageQueueLimit() > 0) {
GridNioMessageTracker tracker = ses.meta(TRACKER_META);
if (tracker == null) {
GridNioMessageTracker old = ses.addMeta(TRACKER_META, tracker = new GridNioMessageTracker(ses, cfg.messageQueueLimit()));
assert old == null;
}
tracker.onMessageReceived();
c = tracker;
} else
c = NOOP;
lsnr.onMessage(connKey.nodeId(), msg, c);
}
}
use of org.apache.ignite.internal.processors.tracing.Span in project ignite by apache.
the class GridMapQueryExecutor method onQueryRequest.
/**
* @param node Node.
* @param req Query request.
* @throws IgniteCheckedException On error.
*/
public void onQueryRequest(final ClusterNode node, final GridH2QueryRequest req) throws IgniteCheckedException {
int[] qryParts = req.queryPartitions();
final Map<UUID, int[]> partsMap = req.partitions();
final int[] parts = qryParts == null ? (partsMap == null ? null : partsMap.get(ctx.localNodeId())) : qryParts;
boolean distributedJoins = req.isFlagSet(GridH2QueryRequest.FLAG_DISTRIBUTED_JOINS);
boolean enforceJoinOrder = req.isFlagSet(GridH2QueryRequest.FLAG_ENFORCE_JOIN_ORDER);
boolean explain = req.isFlagSet(GridH2QueryRequest.FLAG_EXPLAIN);
boolean replicated = req.isFlagSet(GridH2QueryRequest.FLAG_REPLICATED);
final boolean lazy = req.isFlagSet(GridH2QueryRequest.FLAG_LAZY);
boolean treatReplicatedAsPartitioned = req.isFlagSet(GridH2QueryRequest.FLAG_REPLICATED_AS_PARTITIONED);
try {
Boolean dataPageScanEnabled = req.isDataPageScanEnabled();
final List<Integer> cacheIds = req.caches();
final int parallelism = explain || replicated || F.isEmpty(cacheIds) ? 1 : CU.firstPartitioned(ctx.cache().context(), cacheIds).config().getQueryParallelism();
BitSet segments = new BitSet(parallelism);
if (parts != null) {
for (int i = 0; i < parts.length; i++) segments.set(calculateSegment(parallelism, parts[i]));
} else
segments.set(0, parallelism);
final Object[] params = req.parameters();
final int timeout = req.timeout() > 0 || req.explicitTimeout() ? req.timeout() : (int) h2.distributedConfiguration().defaultQueryTimeout();
int firstSegment = segments.nextSetBit(0);
int segment = firstSegment;
while ((segment = segments.nextSetBit(segment + 1)) != -1) {
assert !F.isEmpty(cacheIds);
final int segment0 = segment;
Span span = MTC.span();
ctx.closure().runLocal(() -> {
try (TraceSurroundings ignored = MTC.supportContinual(span)) {
onQueryRequest0(node, req.queryId(), req.requestId(), segment0, req.schemaName(), req.queries(), cacheIds, req.topologyVersion(), partsMap, parts, req.pageSize(), distributedJoins, enforceJoinOrder, false, timeout, params, lazy, req.mvccSnapshot(), dataPageScanEnabled, treatReplicatedAsPartitioned);
} catch (Throwable e) {
sendError(node, req.requestId(), e);
}
}, QUERY_POOL);
}
onQueryRequest0(node, req.queryId(), req.requestId(), firstSegment, req.schemaName(), req.queries(), cacheIds, req.topologyVersion(), partsMap, parts, req.pageSize(), distributedJoins, enforceJoinOrder, replicated, timeout, params, lazy, req.mvccSnapshot(), dataPageScanEnabled, treatReplicatedAsPartitioned);
} catch (Throwable e) {
sendError(node, req.requestId(), e);
}
}
Aggregations