Search in sources :

Example 1 with NodeExistsException

use of org.apache.zookeeper_voltpatches.KeeperException.NodeExistsException in project voltdb by VoltDB.

the class InvocationDispatcher method takeShutdownSaveSnapshot.

private final ClientResponseImpl takeShutdownSaveSnapshot(final StoredProcedureInvocation task, final InvocationClientHandler handler, final Connection ccxn, final AuthUser user, OverrideCheck bypass) {
    // shutdown save snapshot is available for Pro edition only
    if (!MiscUtils.isPro()) {
        task.setParams();
        return dispatch(task, handler, ccxn, user, bypass, false);
    }
    Object p0 = task.getParams().getParam(0);
    final long zkTxnId;
    if (p0 instanceof Long) {
        zkTxnId = ((Long) p0).longValue();
    } else if (p0 instanceof String) {
        try {
            zkTxnId = Long.parseLong((String) p0);
        } catch (NumberFormatException e) {
            return gracefulFailureResponse("Incorrect argument type", task.clientHandle);
        }
    } else {
        return gracefulFailureResponse("Incorrect argument type", task.clientHandle);
    }
    VoltDBInterface voltdb = VoltDB.instance();
    if (!voltdb.isPreparingShuttingdown()) {
        log.warn("Ignoring shutdown save snapshot request as VoltDB is not shutting down");
        return unexpectedFailureResponse("Ignoring shutdown save snapshot request as VoltDB is not shutting down", task.clientHandle);
    }
    final ZooKeeper zk = voltdb.getHostMessenger().getZK();
    // network threads are blocked from making zookeeper calls
    Future<Long> fut = voltdb.getSES(true).submit(new Callable<Long>() {

        @Override
        public Long call() {
            try {
                Stat stat = zk.exists(VoltZK.operationMode, false);
                if (stat == null) {
                    VoltDB.crashLocalVoltDB("cluster operation mode zookeeper node does not exist");
                    return Long.MIN_VALUE;
                }
                return stat.getMzxid();
            } catch (KeeperException | InterruptedException e) {
                VoltDB.crashLocalVoltDB("Failed to stat the cluster operation zookeeper node", true, e);
                return Long.MIN_VALUE;
            }
        }
    });
    try {
        if (fut.get().longValue() != zkTxnId) {
            return unexpectedFailureResponse("Internal error: cannot write a startup snapshot because the " + "current system state is not consistent with an orderly shutdown. " + "Please try \"voltadmin shutdown --save\" again.", task.clientHandle);
        }
    } catch (InterruptedException | ExecutionException e1) {
        VoltDB.crashLocalVoltDB("Failed to stat the cluster operation zookeeper node", true, e1);
        return null;
    }
    NodeSettings paths = m_catalogContext.get().getNodeSettings();
    String data;
    try {
        data = new JSONStringer().object().keySymbolValuePair(SnapshotUtil.JSON_TERMINUS, zkTxnId).endObject().toString();
    } catch (JSONException e) {
        VoltDB.crashLocalVoltDB("Failed to create startup snapshot save command", true, e);
        return null;
    }
    log.info("Saving startup snapshot");
    consoleLog.info("Taking snapshot to save database contents");
    final SimpleClientResponseAdapter alternateAdapter = new SimpleClientResponseAdapter(ClientInterface.SHUTDONW_SAVE_CID, "Blocking Startup Snapshot Save");
    final InvocationClientHandler alternateHandler = new InvocationClientHandler() {

        @Override
        public boolean isAdmin() {
            return handler.isAdmin();
        }

        @Override
        public long connectionId() {
            return ClientInterface.SHUTDONW_SAVE_CID;
        }
    };
    final long sourceHandle = task.clientHandle;
    task.setClientHandle(alternateAdapter.registerCallback(SimpleClientResponseAdapter.NULL_CALLBACK));
    SnapshotUtil.SnapshotResponseHandler savCallback = new SnapshotUtil.SnapshotResponseHandler() {

        @Override
        public void handleResponse(ClientResponse r) {
            if (r == null) {
                String msg = "Snapshot save failed. The database is paused and the shutdown has been cancelled";
                transmitResponseMessage(gracefulFailureResponse(msg, sourceHandle), ccxn, sourceHandle);
            }
            if (r.getStatus() != ClientResponse.SUCCESS) {
                String msg = "Snapshot save failed: " + r.getStatusString() + ". The database is paused and the shutdown has been cancelled";
                ClientResponseImpl resp = new ClientResponseImpl(ClientResponse.GRACEFUL_FAILURE, r.getResults(), msg, sourceHandle);
                transmitResponseMessage(resp, ccxn, sourceHandle);
            }
            consoleLog.info("Snapshot taken successfully");
            task.setParams();
            dispatch(task, alternateHandler, alternateAdapter, user, bypass, false);
        }
    };
    // network threads are blocked from making zookeeper calls
    final byte[] guardContent = data.getBytes(StandardCharsets.UTF_8);
    Future<Boolean> guardFuture = voltdb.getSES(true).submit(new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            try {
                ZKUtil.asyncMkdirs(zk, VoltZK.shutdown_save_guard, guardContent).get();
            } catch (NodeExistsException itIsOk) {
                return false;
            } catch (InterruptedException | KeeperException e) {
                VoltDB.crashLocalVoltDB("Failed to create shutdown save guard zookeeper node", true, e);
                return false;
            }
            return true;
        }
    });
    boolean created;
    try {
        created = guardFuture.get().booleanValue();
    } catch (InterruptedException | ExecutionException e) {
        VoltDB.crashLocalVoltDB("Failed to create shutdown save guard zookeeper node", true, e);
        return null;
    }
    if (!created) {
        return unexpectedFailureResponse("Internal error: detected concurrent invocations of \"voltadmin shutdown --save\"", task.clientHandle);
    }
    voltdb.getClientInterface().bindAdapter(alternateAdapter, null);
    SnapshotUtil.requestSnapshot(sourceHandle, paths.resolve(paths.getSnapshoth()).toPath().toUri().toString(), SnapshotUtil.getShutdownSaveNonce(zkTxnId), true, SnapshotFormat.NATIVE, SnapshotPathType.SNAP_AUTO, data, savCallback, true);
    return null;
}
Also used : ClientResponse(org.voltdb.client.ClientResponse) SnapshotUtil(org.voltdb.sysprocs.saverestore.SnapshotUtil) NodeExistsException(org.apache.zookeeper_voltpatches.KeeperException.NodeExistsException) Stat(org.apache.zookeeper_voltpatches.data.Stat) ExecutionException(java.util.concurrent.ExecutionException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) JSONStringer(org.json_voltpatches.JSONStringer) JSONException(org.json_voltpatches.JSONException) JSONException(org.json_voltpatches.JSONException) NodeExistsException(org.apache.zookeeper_voltpatches.KeeperException.NodeExistsException) KeeperException(org.apache.zookeeper_voltpatches.KeeperException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) NodeSettings(org.voltdb.settings.NodeSettings) ZooKeeper(org.apache.zookeeper_voltpatches.ZooKeeper) JSONObject(org.json_voltpatches.JSONObject)

Example 2 with NodeExistsException

use of org.apache.zookeeper_voltpatches.KeeperException.NodeExistsException in project voltdb by VoltDB.

the class SnapshotDaemon method scheduleSnapshotForLater.

/*
     * Schedule a user snapshot request for later since the database was busy.
     * Continue doing this as long as the error response returned by the DB is snapshot in progress.
     * Since the snapshot is being scheduled for later we will send an immediate response to the client
     * via ZK relay.
     */
private void scheduleSnapshotForLater(final String requestObj, final String requestId, final boolean isFirstAttempt) throws Exception {
    /*
         * Only need to send the queue response the first time we attempt to schedule the snapshot
         * for later. It may be necessary to reschedule via this function multiple times.
         */
    if (isFirstAttempt) {
        SNAP_LOG.info("A user snapshot request could not be immediately fulfilled and will be reattempted later");
        /*
             * Construct a result to send to the client right now via ZK
             * saying we queued it to run later
             */
        VoltTable result = SnapshotUtil.constructNodeResultsTable();
        result.addRow(-1, CoreUtils.getHostnameOrAddress(), "", "SUCCESS", "SNAPSHOT REQUEST QUEUED");
        final ClientResponseImpl queuedResponse = new ClientResponseImpl(ClientResponseImpl.SUCCESS, new VoltTable[] { result }, "Snapshot request could not be fulfilled because a snapshot " + "is in progress. It was queued for execution", 0);
        ByteBuffer buf = ByteBuffer.allocate(queuedResponse.getSerializedSize());
        m_zk.create(VoltZK.user_snapshot_response + requestId, queuedResponse.flattenToBuffer(buf).array(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
    }
    /*
         * Now queue the request for later
         */
    final Runnable r = new Runnable() {

        @Override
        public void run() {
            try {
                /*
                     * Construct a callback to handle the response to the
                     * @SnapshotSave invocation that will reattempt the user snapshot
                     */
                final long handle = m_nextCallbackHandle++;
                m_procedureCallbacks.put(handle, new ProcedureCallback() {

                    @Override
                    public void clientCallback(ClientResponse clientResponse) {
                        m_lastInitiationTs = null;
                        try {
                            /*
                                 * If there is an error then we are done
                                 * attempting this user snapshot. The params must be bad
                                 * or things are broken.
                                 */
                            if (clientResponse.getStatus() != ClientResponse.SUCCESS) {
                                SNAP_LOG.error(clientResponse.getStatusString());
                                //Reset the watch, in case this is recoverable
                                userSnapshotRequestExistenceCheck(true);
                                return;
                            }
                            VoltTable[] results = clientResponse.getResults();
                            //Do this check to avoid an NPE
                            if (results == null || results.length == 0 || results[0].getRowCount() < 1) {
                                SNAP_LOG.error("Queued user snapshot request reattempt received an unexpected response" + " and will not be reattempted. The client response is (status: " + clientResponse.getStatus() + " " + clientResponse.getStatusString() + " result: " + (results != null && results.length > 0 ? results[0] : "null") + ")");
                                /*
                                     * Don't think this should happen, reset the watch to allow later requests
                                     */
                                userSnapshotRequestExistenceCheck(true);
                                return;
                            }
                            VoltTable result = results[0];
                            boolean snapshotInProgress = false;
                            boolean haveFailure = false;
                            while (result.advanceRow()) {
                                if (result.getString("RESULT").equals("FAILURE")) {
                                    if (result.getString("ERR_MSG").equals("SNAPSHOT IN PROGRESS")) {
                                        snapshotInProgress = true;
                                    } else {
                                        haveFailure = true;
                                    }
                                }
                            }
                            /*
                                 * If a snapshot was in progress, reattempt later, otherwise,
                                 * if there was a failure, abort the attempt and log.
                                 */
                            if (snapshotInProgress) {
                                SNAP_LOG.info("Queued user snapshot was reattempted, but a snapshot was " + " still in progress. It will be reattempted.");
                                //Turtles all the way down
                                scheduleSnapshotForLater(requestObj, requestId, false);
                            } else if (haveFailure) {
                                SNAP_LOG.info("Queued user snapshot was attempted, but there was a failure.");
                                try {
                                    ClientResponseImpl rimpl = (ClientResponseImpl) clientResponse;
                                    saveResponseToZKAndReset(requestId, rimpl);
                                } catch (NodeExistsException e) {
                                // used to pass null as request ID to avoid this check if the request ID
                                // already existed, this gives us the same behavior with a pre-existing
                                // request ID
                                }
                                //Log the details of the failure, after resetting the watch in case of some odd NPE
                                result.resetRowPosition();
                                SNAP_LOG.info(result);
                            } else {
                                try {
                                    SNAP_LOG.debug("Queued user snapshot was successfully requested, saving to path " + VoltZK.user_snapshot_response + requestId);
                                    /*
                                         * Snapshot was started no problem, reset the watch for new requests
                                         */
                                    ClientResponseImpl rimpl = (ClientResponseImpl) clientResponse;
                                    saveResponseToZKAndReset(requestId, rimpl);
                                } catch (NodeExistsException e) {
                                // used to pass null as request ID to avoid this check if the request ID
                                // already existed, this gives us the same behavior with a pre-existing
                                // request ID
                                }
                                return;
                            }
                        } catch (Exception e) {
                            SNAP_LOG.error("Error processing procedure callback for user snapshot", e);
                            try {
                                userSnapshotRequestExistenceCheck(true);
                            } catch (Exception e1) {
                                VoltDB.crashLocalVoltDB("Error resetting watch for user snapshot requests", true, e1);
                            }
                        }
                    }
                });
                initiateSnapshotSave(handle, new Object[] { requestObj }, false);
            } catch (Exception e) {
                try {
                    userSnapshotRequestExistenceCheck(true);
                } catch (Exception e1) {
                    VoltDB.crashLocalVoltDB("Error checking for existence of user snapshots", true, e1);
                }
            }
        }
    };
    m_es.schedule(r, m_userSnapshotRetryInterval, TimeUnit.SECONDS);
}
Also used : ProcedureCallback(org.voltdb.client.ProcedureCallback) ClientResponse(org.voltdb.client.ClientResponse) NodeExistsException(org.apache.zookeeper_voltpatches.KeeperException.NodeExistsException) ByteBuffer(java.nio.ByteBuffer) JSONException(org.json_voltpatches.JSONException) NodeExistsException(org.apache.zookeeper_voltpatches.KeeperException.NodeExistsException) KeeperException(org.apache.zookeeper_voltpatches.KeeperException) ExecutionException(java.util.concurrent.ExecutionException)

Example 3 with NodeExistsException

use of org.apache.zookeeper_voltpatches.KeeperException.NodeExistsException in project voltdb by VoltDB.

the class SnapshotDaemon method leaderElection.

/**
     * Leader election for snapshots.
     * Leader will watch for truncation and user snapshot requests
     */
private void leaderElection() {
    loggingLog.info("Starting leader election for snapshot truncation daemon");
    try {
        while (true) {
            Stat stat = m_zk.exists(VoltZK.snapshot_truncation_master, new Watcher() {

                @Override
                public void process(WatchedEvent event) {
                    switch(event.getType()) {
                        case NodeDeleted:
                            loggingLog.info("Detected the snapshot truncation leader's ephemeral node deletion");
                            m_es.execute(new Runnable() {

                                @Override
                                public void run() {
                                    leaderElection();
                                }
                            });
                            break;
                        default:
                            break;
                    }
                }
            });
            if (stat == null) {
                try {
                    m_zk.create(VoltZK.snapshot_truncation_master, null, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
                    m_isAutoSnapshotLeader = true;
                    if (m_lastKnownSchedule != null) {
                        makeActivePrivate(m_lastKnownSchedule);
                    }
                    electedTruncationLeader();
                    return;
                } catch (NodeExistsException e) {
                }
            } else {
                loggingLog.info("Leader election concluded, a leader already exists");
                break;
            }
        }
    } catch (Exception e) {
        VoltDB.crashLocalVoltDB("Exception in snapshot daemon electing master via ZK", true, e);
    }
}
Also used : WatchedEvent(org.apache.zookeeper_voltpatches.WatchedEvent) Stat(org.apache.zookeeper_voltpatches.data.Stat) NodeExistsException(org.apache.zookeeper_voltpatches.KeeperException.NodeExistsException) Watcher(org.apache.zookeeper_voltpatches.Watcher) JSONException(org.json_voltpatches.JSONException) NodeExistsException(org.apache.zookeeper_voltpatches.KeeperException.NodeExistsException) KeeperException(org.apache.zookeeper_voltpatches.KeeperException) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

ExecutionException (java.util.concurrent.ExecutionException)3 KeeperException (org.apache.zookeeper_voltpatches.KeeperException)3 NodeExistsException (org.apache.zookeeper_voltpatches.KeeperException.NodeExistsException)3 JSONException (org.json_voltpatches.JSONException)3 Stat (org.apache.zookeeper_voltpatches.data.Stat)2 ClientResponse (org.voltdb.client.ClientResponse)2 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 WatchedEvent (org.apache.zookeeper_voltpatches.WatchedEvent)1 Watcher (org.apache.zookeeper_voltpatches.Watcher)1 ZooKeeper (org.apache.zookeeper_voltpatches.ZooKeeper)1 JSONObject (org.json_voltpatches.JSONObject)1 JSONStringer (org.json_voltpatches.JSONStringer)1 ProcedureCallback (org.voltdb.client.ProcedureCallback)1 NodeSettings (org.voltdb.settings.NodeSettings)1 SnapshotUtil (org.voltdb.sysprocs.saverestore.SnapshotUtil)1