Search in sources :

Example 11 with Span

use of org.apache.ignite.internal.processors.tracing.Span in project ignite by apache.

the class ServerImpl method spiStop0.

/**
 * Stops SPI finally or stops SPI for restart.
 *
 * @param disconnect {@code True} if SPI is being disconnected.
 * @throws IgniteSpiException If failed.
 */
private void spiStop0(boolean disconnect) throws IgniteSpiException {
    if (log.isDebugEnabled()) {
        if (disconnect)
            log.debug("Disconnecting SPI.");
        else
            log.debug("Preparing to start local node stop procedure.");
    }
    if (disconnect) {
        synchronized (mux) {
            spiState = DISCONNECTING;
        }
    }
    if (msgWorker != null && msgWorker.runner() != null && msgWorker.runner().isAlive() && !disconnect) {
        // Send node left message only if it is final stop.
        TcpDiscoveryNodeLeftMessage nodeLeftMsg = new TcpDiscoveryNodeLeftMessage(locNode.id());
        Span rootSpan = tracing.create(TraceableMessagesTable.traceName(nodeLeftMsg.getClass())).addTag(SpanTags.tag(SpanTags.EVENT_NODE, SpanTags.ID), () -> locNode.id().toString()).addTag(SpanTags.tag(SpanTags.EVENT_NODE, SpanTags.CONSISTENT_ID), () -> locNode.consistentId().toString()).addLog(() -> "Created");
        nodeLeftMsg.spanContainer().serializedSpanBytes(tracing.serialize(rootSpan));
        msgWorker.addMessage(nodeLeftMsg);
        rootSpan.addLog(() -> "Sent").end();
        synchronized (mux) {
            long timeout = spi.netTimeout;
            long thresholdNanos = System.nanoTime() + U.millisToNanos(timeout);
            while (spiState != LEFT && timeout > 0) {
                try {
                    mux.wait(timeout);
                    timeout = U.nanosToMillis(thresholdNanos - System.nanoTime());
                } catch (InterruptedException ignored) {
                    Thread.currentThread().interrupt();
                    break;
                }
            }
            if (spiState == LEFT) {
                if (log.isDebugEnabled())
                    log.debug("Verification for local node leave has been received from coordinator" + " (continuing stop procedure).");
            } else if (log.isInfoEnabled()) {
                log.info("No verification for local node leave has been received from coordinator" + " (will stop node anyway).");
            }
        }
    }
    if (tcpSrvr != null)
        tcpSrvr.stop();
    tcpSrvr = null;
    Collection<SocketReader> tmp;
    synchronized (mux) {
        tmp = U.arrayList(readers);
    }
    U.interrupt(tmp);
    U.joinThreads(tmp, log);
    U.interrupt(ipFinderCleaner);
    U.join(ipFinderCleaner, log);
    U.cancel(msgWorker);
    U.join(msgWorker, log);
    for (ClientMessageWorker clientWorker : clientMsgWorkers.values()) {
        if (clientWorker != null) {
            U.interrupt(clientWorker.runner());
            U.join(clientWorker.runner(), log);
        }
    }
    clientMsgWorkers.clear();
    IgniteUtils.shutdownNow(ServerImpl.class, utilityPool, log);
    U.interrupt(statsPrinter);
    U.join(statsPrinter, log);
    Collection<TcpDiscoveryNode> nodes = null;
    if (!disconnect)
        spi.printStopInfo();
    else {
        spi.getSpiContext().deregisterPorts();
        nodes = ring.visibleNodes();
    }
    long topVer = ring.topologyVersion();
    ring.clear();
    if (nodes != null) {
        // This is restart/disconnection and we need to fire FAIL event for each remote node.
        DiscoverySpiListener lsnr = spi.lsnr;
        if (lsnr != null) {
            Collection<ClusterNode> processed = new HashSet<>(nodes.size());
            for (TcpDiscoveryNode n : nodes) {
                if (n.isLocal())
                    continue;
                assert n.visible();
                processed.add(n);
                List<ClusterNode> top = U.arrayList(nodes, F.notIn(processed));
                topVer++;
                Map<Long, Collection<ClusterNode>> hist = updateTopologyHistory(topVer, Collections.unmodifiableList(top));
                lsnr.onDiscovery(new DiscoveryNotification(EVT_NODE_FAILED, topVer, n, top, hist, null, null)).get();
            }
        }
    }
    printStatistics();
    spi.stats.clear();
    synchronized (mux) {
        // Clear stored data.
        leavingNodes.clear();
        failedNodes.clear();
        spiState = DISCONNECTED;
    }
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) DiscoverySpiListener(org.apache.ignite.spi.discovery.DiscoverySpiListener) Span(org.apache.ignite.internal.processors.tracing.Span) Collection(java.util.Collection) DiscoveryNotification(org.apache.ignite.spi.discovery.DiscoveryNotification) TcpDiscoveryNodeLeftMessage(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryNodeLeftMessage) TcpDiscoveryNode(org.apache.ignite.spi.discovery.tcp.internal.TcpDiscoveryNode) GridBoundedLinkedHashSet(org.apache.ignite.internal.util.GridBoundedLinkedHashSet) HashSet(java.util.HashSet) GridConcurrentHashSet(org.apache.ignite.internal.util.GridConcurrentHashSet)

Example 12 with Span

use of org.apache.ignite.internal.processors.tracing.Span in project ignite by apache.

the class RunningQueryManager method unregister.

/**
 * Unregister running query.
 *
 * @param qryId id of the query, which is given by {@link #register register} method.
 * @param failReason exception that caused query execution fail, or {@code null} if query succeded.
 */
public void unregister(long qryId, @Nullable Throwable failReason) {
    if (qryId <= 0)
        return;
    boolean failed = failReason != null;
    GridRunningQueryInfo qry = runs.remove(qryId);
    // Attempt to unregister query twice.
    if (qry == null)
        return;
    Span qrySpan = qry.span();
    try {
        if (failed)
            qrySpan.addTag(ERROR, failReason::getMessage);
        // We need to collect query history and metrics only for SQL queries.
        if (isSqlQuery(qry)) {
            qry.runningFuture().onDone();
            qryHistTracker.collectHistory(qry, failed);
            if (!failed)
                successQrsCnt.increment();
            else {
                failedQrsCnt.increment();
                // right when query failed due to some other reason.
                if (QueryUtils.wasCancelled(failReason))
                    canceledQrsCnt.increment();
            }
        }
        if (ctx.performanceStatistics().enabled() && qry.startTimeNanos() > 0) {
            ctx.performanceStatistics().query(qry.queryType(), qry.query(), qry.requestId(), qry.startTime(), System.nanoTime() - qry.startTimeNanos(), !failed);
        }
    } finally {
        qrySpan.end();
    }
}
Also used : Span(org.apache.ignite.internal.processors.tracing.Span)

Example 13 with Span

use of org.apache.ignite.internal.processors.tracing.Span in project ignite by apache.

the class InboundConnectionHandler method onMessage.

/**
 * {@inheritDoc}
 */
@Override
public void onMessage(final GridNioSession ses, Message msg) {
    Span span = MTC.span();
    span.addLog(() -> "Communication received");
    span.addTag(SpanTags.MESSAGE, () -> traceName(msg));
    ConnectionKey connKey = ses.meta(CONN_IDX_META);
    if (connKey == null) {
        assert ses.accepted() : ses;
        if (!connectGate.tryEnter()) {
            if (log.isDebugEnabled())
                log.debug("Close incoming connection, failed to enter gateway.");
            ses.send(new RecoveryLastReceivedMessage(NODE_STOPPING)).listen(fut -> ses.close());
            return;
        }
        try {
            onFirstMessage(ses, msg);
        } finally {
            connectGate.leave();
        }
    } else {
        if (isChannelConnIdx(connKey.connectionIndex())) {
            if (ses.meta(CHANNEL_FUT_META) == null)
                nioSrvWrapper.onChannelCreate((GridSelectorNioSessionImpl) ses, connKey, msg);
            else {
                GridFutureAdapter<Channel> fut = ses.meta(CHANNEL_FUT_META);
                GridSelectorNioSessionImpl ses0 = (GridSelectorNioSessionImpl) ses;
                ses0.closeSocketOnSessionClose(false);
                ses0.close().listen(f -> {
                    if (f.error() != null) {
                        fut.onDone(f.error());
                        return;
                    }
                    fut.onDone(ses0.key().channel());
                });
            }
            return;
        }
        Object consistentId = ses.meta(CONSISTENT_ID_META);
        assert consistentId != null;
        if (msg instanceof RecoveryLastReceivedMessage) {
            metricsLsnr.onMessageReceived(msg, consistentId);
            GridNioRecoveryDescriptor recovery = ses.outRecoveryDescriptor();
            if (recovery != null) {
                RecoveryLastReceivedMessage msg0 = (RecoveryLastReceivedMessage) msg;
                if (log.isDebugEnabled()) {
                    log.debug("Received recovery acknowledgement [rmtNode=" + connKey.nodeId() + ", connIdx=" + connKey.connectionIndex() + ", rcvCnt=" + msg0.received() + ']');
                }
                recovery.ackReceived(msg0.received());
            }
            return;
        } else {
            GridNioRecoveryDescriptor recovery = ses.inRecoveryDescriptor();
            if (recovery != null) {
                long rcvCnt = recovery.onReceived();
                if (rcvCnt % cfg.ackSendThreshold() == 0) {
                    if (log.isDebugEnabled()) {
                        log.debug("Send recovery acknowledgement [rmtNode=" + connKey.nodeId() + ", connIdx=" + connKey.connectionIndex() + ", rcvCnt=" + rcvCnt + ']');
                    }
                    ses.systemMessage(new RecoveryLastReceivedMessage(rcvCnt));
                    recovery.lastAcknowledged(rcvCnt);
                }
            } else if (connKey.dummy()) {
                assert msg instanceof NodeIdMessage : msg;
                TcpCommunicationNodeConnectionCheckFuture fut = ses.meta(SES_FUT_META);
                assert fut != null : msg;
                fut.onConnected(U.bytesToUuid(((NodeIdMessage) msg).nodeIdBytes(), 0));
                nioSrvWrapper.nio().closeFromWorkerThread(ses);
                return;
            }
        }
        metricsLsnr.onMessageReceived(msg, consistentId);
        IgniteRunnable c;
        if (cfg.messageQueueLimit() > 0) {
            GridNioMessageTracker tracker = ses.meta(TRACKER_META);
            if (tracker == null) {
                GridNioMessageTracker old = ses.addMeta(TRACKER_META, tracker = new GridNioMessageTracker(ses, cfg.messageQueueLimit()));
                assert old == null;
            }
            tracker.onMessageReceived();
            c = tracker;
        } else
            c = NOOP;
        lsnr.onMessage(connKey.nodeId(), msg, c);
    }
}
Also used : RecoveryLastReceivedMessage(org.apache.ignite.spi.communication.tcp.messages.RecoveryLastReceivedMessage) Channel(java.nio.channels.Channel) Span(org.apache.ignite.internal.processors.tracing.Span) GridNioMessageTracker(org.apache.ignite.internal.util.nio.GridNioMessageTracker) IgniteRunnable(org.apache.ignite.lang.IgniteRunnable) NodeIdMessage(org.apache.ignite.spi.communication.tcp.messages.NodeIdMessage) GridNioRecoveryDescriptor(org.apache.ignite.internal.util.nio.GridNioRecoveryDescriptor) GridSelectorNioSessionImpl(org.apache.ignite.internal.util.nio.GridSelectorNioSessionImpl)

Example 14 with Span

use of org.apache.ignite.internal.processors.tracing.Span in project ignite by apache.

the class GridMapQueryExecutor method onQueryRequest.

/**
 * @param node Node.
 * @param req Query request.
 * @throws IgniteCheckedException On error.
 */
public void onQueryRequest(final ClusterNode node, final GridH2QueryRequest req) throws IgniteCheckedException {
    int[] qryParts = req.queryPartitions();
    final Map<UUID, int[]> partsMap = req.partitions();
    final int[] parts = qryParts == null ? (partsMap == null ? null : partsMap.get(ctx.localNodeId())) : qryParts;
    boolean distributedJoins = req.isFlagSet(GridH2QueryRequest.FLAG_DISTRIBUTED_JOINS);
    boolean enforceJoinOrder = req.isFlagSet(GridH2QueryRequest.FLAG_ENFORCE_JOIN_ORDER);
    boolean explain = req.isFlagSet(GridH2QueryRequest.FLAG_EXPLAIN);
    boolean replicated = req.isFlagSet(GridH2QueryRequest.FLAG_REPLICATED);
    final boolean lazy = req.isFlagSet(GridH2QueryRequest.FLAG_LAZY);
    boolean treatReplicatedAsPartitioned = req.isFlagSet(GridH2QueryRequest.FLAG_REPLICATED_AS_PARTITIONED);
    try {
        Boolean dataPageScanEnabled = req.isDataPageScanEnabled();
        final List<Integer> cacheIds = req.caches();
        final int parallelism = explain || replicated || F.isEmpty(cacheIds) ? 1 : CU.firstPartitioned(ctx.cache().context(), cacheIds).config().getQueryParallelism();
        BitSet segments = new BitSet(parallelism);
        if (parts != null) {
            for (int i = 0; i < parts.length; i++) segments.set(calculateSegment(parallelism, parts[i]));
        } else
            segments.set(0, parallelism);
        final Object[] params = req.parameters();
        final int timeout = req.timeout() > 0 || req.explicitTimeout() ? req.timeout() : (int) h2.distributedConfiguration().defaultQueryTimeout();
        int firstSegment = segments.nextSetBit(0);
        int segment = firstSegment;
        while ((segment = segments.nextSetBit(segment + 1)) != -1) {
            assert !F.isEmpty(cacheIds);
            final int segment0 = segment;
            Span span = MTC.span();
            ctx.closure().runLocal(() -> {
                try (TraceSurroundings ignored = MTC.supportContinual(span)) {
                    onQueryRequest0(node, req.queryId(), req.requestId(), segment0, req.schemaName(), req.queries(), cacheIds, req.topologyVersion(), partsMap, parts, req.pageSize(), distributedJoins, enforceJoinOrder, false, timeout, params, lazy, req.mvccSnapshot(), dataPageScanEnabled, treatReplicatedAsPartitioned);
                } catch (Throwable e) {
                    sendError(node, req.requestId(), e);
                }
            }, QUERY_POOL);
        }
        onQueryRequest0(node, req.queryId(), req.requestId(), firstSegment, req.schemaName(), req.queries(), cacheIds, req.topologyVersion(), partsMap, parts, req.pageSize(), distributedJoins, enforceJoinOrder, replicated, timeout, params, lazy, req.mvccSnapshot(), dataPageScanEnabled, treatReplicatedAsPartitioned);
    } catch (Throwable e) {
        sendError(node, req.requestId(), e);
    }
}
Also used : BitSet(java.util.BitSet) Span(org.apache.ignite.internal.processors.tracing.Span) TraceSurroundings(org.apache.ignite.internal.processors.tracing.MTC.TraceSurroundings) UUID(java.util.UUID)

Aggregations

Span (org.apache.ignite.internal.processors.tracing.Span)14 TraceSurroundings (org.apache.ignite.internal.processors.tracing.MTC.TraceSurroundings)4 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)3 IgniteSpiException (org.apache.ignite.spi.IgniteSpiException)3 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 ClusterNode (org.apache.ignite.cluster.ClusterNode)2 CustomMessageWrapper (org.apache.ignite.internal.managers.discovery.CustomMessageWrapper)2 DiscoveryServerOnlyCustomMessage (org.apache.ignite.internal.managers.discovery.DiscoveryServerOnlyCustomMessage)2 GridNearTxLocal (org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxLocal)2 NoopSpan (org.apache.ignite.internal.processors.tracing.NoopSpan)2 TcpDiscoveryCustomEventMessage (org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryCustomEventMessage)2 TcpDiscoveryServerOnlyCustomEventMessage (org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryServerOnlyCustomEventMessage)2 Benchmark (org.openjdk.jmh.annotations.Benchmark)2 Channel (java.nio.channels.Channel)1 BitSet (java.util.BitSet)1 Collection (java.util.Collection)1 Collections.singletonList (java.util.Collections.singletonList)1 List (java.util.List)1 UUID (java.util.UUID)1