Search in sources :

Example 1 with SnapshotCheckResponseMessage

use of org.voltdb.messaging.SnapshotCheckResponseMessage in project voltdb by VoltDB.

the class SnapshotDaemon method initiateSnapshotSave.

private void initiateSnapshotSave(final long handle, final Object[] params, boolean blocking) {
    boolean success = true;
    VoltTable checkResult = SnapshotUtil.constructNodeResultsTable();
    final String jsString = String.class.cast(params[0]);
    if (m_lastInitiationTs != null) {
        final long elapsedMs = System.currentTimeMillis() - m_lastInitiationTs.getFirst();
        // Blocking snapshot may take a long time to finish, don't time it out if it's blocking
        if (!m_lastInitiationTs.getSecond() && elapsedMs > INITIATION_RESPONSE_TIMEOUT_MS) {
            SNAP_LOG.warn(String.format("A snapshot was initiated %d minutes ago and hasn't received a response yet.", TimeUnit.MILLISECONDS.toMinutes(elapsedMs)));
            m_lastInitiationTs = null;
        } else {
            checkResult.addRow(CoreUtils.getHostIdFromHSId(m_mb.getHSId()), CoreUtils.getHostnameOrAddress(), null, "FAILURE", "SNAPSHOT IN PROGRESS");
            success = false;
        }
    }
    if (success) {
        try {
            final JSONObject jsObj = new JSONObject(jsString);
            boolean initiateSnapshot;
            // Do scan work on all known live hosts
            VoltMessage msg = new SnapshotCheckRequestMessage(jsString);
            SnapshotPathType pathType = SnapshotPathType.valueOf(jsObj.getString(SnapshotUtil.JSON_PATH_TYPE));
            Set<Integer> liveHosts = VoltDB.instance().getHostMessenger().getLiveHostIds();
            for (int hostId : liveHosts) {
                m_mb.send(CoreUtils.getHSIdFromHostAndSite(hostId, HostMessenger.SNAPSHOT_IO_AGENT_ID), msg);
            }
            // Wait for responses from all hosts for a certain amount of time
            Map<Integer, VoltTable> responses = Maps.newHashMap();
            // 10s timeout
            final long timeoutMs = 10 * 1000;
            final long endTime = System.currentTimeMillis() + timeoutMs;
            SnapshotCheckResponseMessage response;
            while ((response = (SnapshotCheckResponseMessage) m_mb.recvBlocking(timeoutMs)) != null) {
                final String nonce = jsObj.getString(SnapshotUtil.JSON_NONCE);
                boolean nonceFound = false;
                if (pathType == SnapshotPathType.SNAP_PATH) {
                    // If request was explicitely PATH check path too.
                    if (nonce.equals(response.getNonce()) && response.getPath().equals(jsObj.getString(SnapshotUtil.JSON_PATH))) {
                        nonceFound = true;
                    }
                } else {
                    // If request is with type other than path just check type.
                    if (nonce.equals(response.getNonce()) && response.getSnapshotPathType() == pathType) {
                        nonceFound = true;
                    }
                }
                if (nonceFound) {
                    responses.put(CoreUtils.getHostIdFromHSId(response.m_sourceHSId), response.getResponse());
                }
                if (responses.size() == liveHosts.size() || System.currentTimeMillis() > endTime) {
                    break;
                }
            }
            if (responses.size() != liveHosts.size()) {
                checkResult.addRow(CoreUtils.getHostIdFromHSId(m_mb.getHSId()), CoreUtils.getHostnameOrAddress(), null, "FAILURE", "TIMED OUT CHECKING SNAPSHOT FEASIBILITY");
                success = false;
            }
            if (success) {
                // TRAIL [TruncSnap:12] all participating nodes have initiated successfully
                // Call @SnapshotSave if check passed, return the failure otherwise
                checkResult = VoltTableUtil.unionTables(responses.values());
                initiateSnapshot = SnapshotUtil.didSnapshotRequestSucceed(new VoltTable[] { checkResult });
                if (initiateSnapshot) {
                    m_lastInitiationTs = Pair.of(System.currentTimeMillis(), blocking);
                    m_initiator.initiateSnapshotDaemonWork("@SnapshotSave", handle, params);
                } else {
                    success = false;
                }
            }
        } catch (JSONException e) {
            success = false;
            checkResult.addRow(CoreUtils.getHostIdFromHSId(m_mb.getHSId()), CoreUtils.getHostnameOrAddress(), null, "FAILURE", "ERROR PARSING JSON");
            SNAP_LOG.warn("Error parsing JSON string: " + jsString, e);
        }
    }
    if (!success) {
        final ClientResponseImpl failureResponse = new ClientResponseImpl(ClientResponseImpl.SUCCESS, new VoltTable[] { checkResult }, null);
        failureResponse.setClientHandle(handle);
        processClientResponse(Callables.returning(failureResponse));
    }
}
Also used : JSONException(org.json_voltpatches.JSONException) SnapshotCheckRequestMessage(org.voltdb.messaging.SnapshotCheckRequestMessage) SnapshotPathType(org.voltdb.sysprocs.saverestore.SnapshotPathType) VoltMessage(org.voltcore.messaging.VoltMessage) SnapshotCheckResponseMessage(org.voltdb.messaging.SnapshotCheckResponseMessage) JSONObject(org.json_voltpatches.JSONObject)

Aggregations

JSONException (org.json_voltpatches.JSONException)1 JSONObject (org.json_voltpatches.JSONObject)1 VoltMessage (org.voltcore.messaging.VoltMessage)1 SnapshotCheckRequestMessage (org.voltdb.messaging.SnapshotCheckRequestMessage)1 SnapshotCheckResponseMessage (org.voltdb.messaging.SnapshotCheckResponseMessage)1 SnapshotPathType (org.voltdb.sysprocs.saverestore.SnapshotPathType)1