Search in sources :

Example 1 with SnapshotSchedule

use of org.voltdb.catalog.SnapshotSchedule in project voltdb by VoltDB.

the class RealVoltDB method initialize.

     * Initialize all the global components, then initialize all the m_sites.
     * @param config configuration that gets passed in from commandline.
public void initialize(Configuration config) {"PID of this Volt process is " + CLibrary.getpid());
    synchronized (m_startAndStopLock) {
        exitAfterMessage = false;
        // Handle multiple invocations of server thread in the same JVM.
        // by clearing static variables/properties which ModuleManager,
        // and Settings depend on
        m_isRunningWithOldVerb = config.m_startAction.isLegacy();
        // check that this is a 64 bit VM
        if (System.getProperty("").contains("64") == false) {
            hostLog.fatal("You are running on an unsupported (probably 32 bit) JVM. Exiting.");
        // print the ascii art!.
        // determine the edition
        // Check license availability
        // All above - not for init
        String edition = "Community Edition";
        if (config.m_startAction != StartAction.INITIALIZE) {
            consoleLog.l7dlog(Level.INFO,, null);
            // load license API
            if (config.m_pathToLicense == null) {
                m_licenseApi = MiscUtils.licenseApiFactory();
                if (m_licenseApi == null) {
                    hostLog.fatal("Unable to open license file in default directories");
            } else {
                m_licenseApi = MiscUtils.licenseApiFactory(config.m_pathToLicense);
                if (m_licenseApi == null) {
                    hostLog.fatal("Unable to open license file in provided path: " + config.m_pathToLicense);
            if (m_licenseApi == null) {
                hostLog.fatal("Please contact to request a license.");
                VoltDB.crashLocalVoltDB("Failed to initialize license verifier. " + "See previous log message for details.", false, null);
            if (config.m_isEnterprise) {
                if (m_licenseApi.isEnterprise())
                    edition = "Enterprise Edition";
                if (m_licenseApi.isPro())
                    edition = "Pro Edition";
                if (m_licenseApi.isEnterpriseTrial())
                    edition = "Enterprise Edition";
                if (m_licenseApi.isProTrial())
                    edition = "Pro Edition";
                if (m_licenseApi.isAWSMarketplace())
                    edition = "AWS Marketplace Pro Edition";
            // this also prints out the license type on the console
            // print out the licensee on the license
            if (config.m_isEnterprise) {
                String licensee = m_licenseApi.licensee();
                if ((licensee != null) && (licensee.length() > 0)) {
          "Licensed to: %s", licensee));
        // Replay command line args that we can see
        StringBuilder sb = new StringBuilder(2048).append("Command line arguments: ");
        sb.append(System.getProperty("", "[not available]"));;
        List<String> iargs = ManagementFactory.getRuntimeMXBean().getInputArguments();
        sb.delete(0, sb.length()).append("Command line JVM arguments:");
        for (String iarg : iargs) sb.append(" ").append(iarg);
        if (iargs.size() > 0)
  "No JVM command line args known.");
        sb.delete(0, sb.length()).append("Command line JVM classpath: ");
        sb.append(System.getProperty("java.class.path", "[not available]"));;
        if (config.m_startAction == StartAction.INITIALIZE) {
            if (config.m_forceVoltdbCreate) {
        // If there's no deployment provide a default and put it under voltdbroot.
        if (config.m_pathToDeployment == null) {
            try {
                config.m_pathToDeployment = setupDefaultDeployment(hostLog, config.m_voltdbRoot);
                config.m_deploymentDefault = true;
            } catch (IOException e) {
                VoltDB.crashLocalVoltDB("Failed to write default deployment.", false, null);
        ReadDeploymentResults readDepl = readPrimedDeployment(config);
        if (config.m_startAction == StartAction.INITIALIZE) {
            if (config.m_forceVoltdbCreate && m_nodeSettings.clean()) {
                String msg = "Archived previous snapshot directory to " + m_nodeSettings.getSnapshoth() + ".1";
            stageDeploymentFileForInitialize(config, readDepl.deployment);
  "Initialized VoltDB root directory " + config.m_voltdbRoot.getPath());
  "Initialized VoltDB root directory " + config.m_voltdbRoot.getPath());
        if (config.m_startAction.isLegacy()) {
            consoleLog.warn("The \"" + config.m_startAction.m_verb + "\" command is deprecated, please use \"init\" and \"start\" for your cluster operations.");
        // config UUID is part of the status tracker.
        m_statusTracker = new NodeStateTracker();
        final File stagedCatalogLocation = new VoltFile(RealVoltDB.getStagedCatalogPath(config.m_voltdbRoot.getAbsolutePath()));
        if (config.m_startAction.isLegacy()) {
            File rootFH = CatalogUtil.getVoltDbRoot(readDepl.deployment.getPaths());
            File inzFH = new VoltFile(rootFH, VoltDB.INITIALIZED_MARKER);
            if (inzFH.exists()) {
                VoltDB.crashLocalVoltDB("Cannot use legacy start action " + config.m_startAction + " on voltdbroot " + rootFH + " that was initialized with the init command");
            //Case where you give primed deployment with -d look in ../../ for initialized marker.
            //Also check if parents are config and voltdbroot
            File cfile = (new File(config.m_pathToDeployment)).getParentFile();
            if (cfile != null) {
                rootFH = cfile.getParentFile();
                if ("config".equals(cfile.getName()) && VoltDB.DBROOT.equals(rootFH.getName())) {
                    inzFH = new VoltFile(rootFH, VoltDB.INITIALIZED_MARKER);
                    if (inzFH.exists()) {
                        VoltDB.crashLocalVoltDB("Can not use legacy start action " + config.m_startAction + " on voltdbroot " + rootFH + " that was initialized with the init command");
            if (stagedCatalogLocation.isFile()) {
                hostLog.warn("Initialized schema is present, but is being ignored and may be removed.");
        } else {
            assert (config.m_startAction == StartAction.PROBE);
            if (stagedCatalogLocation.isFile()) {
                assert (config.m_pathToCatalog == null) : config.m_pathToCatalog;
                config.m_pathToCatalog = stagedCatalogLocation.getAbsolutePath();
        List<String> failed = m_nodeSettings.ensureDirectoriesExist();
        if (!failed.isEmpty()) {
            String msg = "Unable to access or create the following directories:\n  - " + Joiner.on("\n  - ").join(failed);
        if (config.m_hostCount == VoltDB.UNDEFINED) {
            config.m_hostCount = readDepl.deployment.getCluster().getHostcount();
        // set the mode first thing
        m_mode = OperationMode.INITIALIZING;
        m_config = config;
        m_startMode = null;
        // set a bunch of things to null/empty/new for tests
        // which reusue the process
        m_safeMpTxnId = Long.MAX_VALUE;
        m_lastSeenMpTxnId = Long.MIN_VALUE;
        m_clientInterface = null;
        m_adminListener = null;
        m_commandLog = new DummyCommandLog();
        m_snmp = new DummySnmpTrapSender();
        m_messenger = null;
        m_opsRegistrar = new OpsRegistrar();
        m_snapshotCompletionMonitor = null;
        m_catalogContext = null;
        m_partitionCountStats = null;
        m_ioStats = null;
        m_memoryStats = null;
        m_commandLogStats = null;
        m_statsManager = null;
        m_restoreAgent = null;
        m_recoveryStartTime = System.currentTimeMillis();
        m_hostIdWithStartupCatalog = 0;
        m_pathToStartupCatalog = m_config.m_pathToCatalog;
        m_replicationActive = new AtomicBoolean(false);
        m_configLogger = null;
        // set up site structure
        final int computationThreads = Math.max(2, CoreUtils.availableProcessors() / 4);
        m_computationService = CoreUtils.getListeningExecutorService("Computation service thread", computationThreads, m_config.m_computationCoreBindings);
        // Set std-out/err to use the UTF-8 encoding and fail if UTF-8 isn't supported
        try {
            System.setOut(new PrintStream(System.out, true, "UTF-8"));
            System.setErr(new PrintStream(System.err, true, "UTF-8"));
        } catch (UnsupportedEncodingException e) {
            hostLog.fatal("Support for the UTF-8 encoding is required for VoltDB. This means you are likely running an unsupported JVM. Exiting.");
        m_snapshotCompletionMonitor = new SnapshotCompletionMonitor();
        // use CLI overrides for testing hotfix version compatibility
        if (m_config.m_versionStringOverrideForTest != null) {
            m_versionString = m_config.m_versionStringOverrideForTest;
        if (m_config.m_versionCompatibilityRegexOverrideForTest != null) {
            m_hotfixableRegexPattern = m_config.m_versionCompatibilityRegexOverrideForTest;
        if (m_config.m_buildStringOverrideForTest != null) {
            m_buildString = m_config.m_buildStringOverrideForTest;
        // Prime cluster settings from configuration parameters
        // evaluate properties with the following sources in terms of priority
        // 1) properties from command line options
        // 2) properties from the files
        // 3) properties from the deployment file
        // this reads the file config/
        ClusterSettings fromPropertyFile = ClusterSettings.create();
        // handle case we recover clusters that were elastically expanded
        if (m_config.m_startAction.doesRecover()) {
            m_config.m_hostCount = fromPropertyFile.hostcount();
        Map<String, String> fromCommandLine = m_config.asClusterSettingsMap();
        Map<String, String> fromDeploymentFile = CatalogUtil.asClusterSettingsMap(readDepl.deployment);
        ClusterSettings clusterSettings = ClusterSettings.create(fromCommandLine, fromPropertyFile.asMap(), fromDeploymentFile);
        // persist the merged settings;
        m_clusterSettings.set(clusterSettings, 1);
        MeshProber.Determination determination = buildClusterMesh(readDepl);
        if (m_config.m_startAction == StartAction.PROBE) {
            String action = "Starting a new database cluster";
            if (determination.startAction.doesRejoin()) {
                action = "Rejoining a running cluster";
            } else if (determination.startAction == StartAction.JOIN) {
                action = "Adding this node to a running cluster";
            } else if (determination.startAction.doesRecover()) {
                action = "Restarting the database cluster from the command logs";
        m_config.m_startAction = determination.startAction;
        m_config.m_hostCount = determination.hostCount;
        m_terminusNonce = determination.terminusNonce;
        // determine if this is a rejoining node
        // (used for license check and later the actual rejoin)
        m_rejoining = m_config.m_startAction.doesRejoin();
        m_rejoinDataPending = m_config.m_startAction.doesJoin();
        m_joining = m_config.m_startAction == StartAction.JOIN;
        if (m_rejoining || m_joining) {
        //Register dummy agents immediately
        //Start validating the build string in the background
        final Future<?> buildStringValidation = validateBuildString(getBuildString(), m_messenger.getZK());
        // race to create start action nodes and then verify theirs compatibility.
        m_messenger.getZK().create(VoltZK.start_action, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, new ZKUtil.StringCallback(), null);
        VoltZK.createStartActionNode(m_messenger.getZK(), m_messenger.getHostId(), m_config.m_startAction);
        // durable means commandlogging is enabled.
        boolean durable = readDeploymentAndCreateStarterCatalogContext(config);
        if (config.m_isEnterprise && m_config.m_startAction.doesRequireEmptyDirectories() && !config.m_forceVoltdbCreate && durable) {
        //If terminus is present we will recover from shutdown save so dont move.
        if (!durable && m_config.m_startAction.doesRecover() && determination.terminusNonce == null) {
            if (m_nodeSettings.clean()) {
                String msg = "Archiving old snapshots to " + m_nodeSettings.getSnapshoth() + ".1 and starting an empty database." + " Use voltadmin restore if you wish to restore an old database instance.";
        // wait to make sure every host actually *see* each other's ZK node state.
        final int numberOfNodes = m_messenger.getLiveHostIds().size();
        Map<Integer, HostInfo> hostInfos = m_messenger.waitForGroupJoin(numberOfNodes);
        Map<Integer, String> hostGroups = Maps.newHashMap();
        Map<Integer, Integer> sitesPerHostMap = Maps.newHashMap();
        hostInfos.forEach((k, v) -> {
            hostGroups.put(k, v.m_group);
            sitesPerHostMap.put(k, v.m_localSitesCount);
        if (m_messenger.isPaused() || m_config.m_isPaused) {
        // Create the thread pool here. It's needed by buildClusterMesh()
        m_periodicWorkThread = CoreUtils.getScheduledThreadPoolExecutor("Periodic Work", 1, CoreUtils.SMALL_STACK_SIZE);
        m_periodicPriorityWorkThread = CoreUtils.getScheduledThreadPoolExecutor("Periodic Priority Work", 1, CoreUtils.SMALL_STACK_SIZE);
        Class<?> snapshotIOAgentClass = MiscUtils.loadProClass("org.voltdb.SnapshotIOAgentImpl", "Snapshot", true);
        if (snapshotIOAgentClass != null) {
            try {
                m_snapshotIOAgent = (SnapshotIOAgent) snapshotIOAgentClass.getConstructor(HostMessenger.class, long.class).newInstance(m_messenger, m_messenger.getHSIdForLocalSite(HostMessenger.SNAPSHOT_IO_AGENT_ID));
                m_messenger.createMailbox(m_snapshotIOAgent.getHSId(), m_snapshotIOAgent);
            } catch (Exception e) {
                VoltDB.crashLocalVoltDB("Failed to instantiate snapshot IO agent", true, e);
        try {
            SimpleDateFormat sdf = new SimpleDateFormat("EEE MMM d, yyyy");
            JSONObject jo = new JSONObject();
            jo.put("trial", m_licenseApi.isAnyKindOfTrial());
            jo.put("hostcount", m_licenseApi.maxHostcount());
            jo.put("commandlogging", m_licenseApi.isCommandLoggingAllowed());
            jo.put("wanreplication", m_licenseApi.isDrReplicationAllowed());
            jo.put("expiration", sdf.format(m_licenseApi.expires().getTime()));
            m_licenseInformation = jo.toString();
        } catch (JSONException ex) {
        // Create the GlobalServiceElector.  Do this here so we can register the MPI with it
        // when we construct it below
        m_globalServiceElector = new GlobalServiceElector(m_messenger.getZK(), m_messenger.getHostId());
        // Start the GlobalServiceElector.  Not sure where this will actually belong.
        try {
        } catch (Exception e) {
            VoltDB.crashLocalVoltDB("Unable to start GlobalServiceElector", true, e);
        // Always create a mailbox for elastic join data transfer
        if (m_config.m_isEnterprise) {
            long elasticHSId = m_messenger.getHSIdForLocalSite(HostMessenger.REBALANCE_SITE_ID);
            m_messenger.createMailbox(elasticHSId, new SiteMailbox(m_messenger, elasticHSId));
        if (m_joining) {
            Class<?> elasticJoinCoordClass = MiscUtils.loadProClass("org.voltdb.join.ElasticJoinNodeCoordinator", "Elastic", false);
            try {
                Constructor<?> constructor = elasticJoinCoordClass.getConstructor(HostMessenger.class, String.class);
                m_joinCoordinator = (JoinCoordinator) constructor.newInstance(m_messenger, VoltDB.instance().getVoltDBRootPath());
            } catch (Exception e) {
                VoltDB.crashLocalVoltDB("Failed to instantiate join coordinator", true, e);
             * Construct all the mailboxes for things that need to be globally addressable so they can be published
             * in one atomic shot.
             * The starting state for partition assignments are statically derived from the host id generated
             * by host messenger and the k-factor/host count/sites per host. This starting state
             * is published to ZK as the topology metadata node.
             * On join and rejoin the node has to inspect the topology meta node to find out what is missing
             * and then update the topology listing itself as the replica for those partitions.
             * Then it does a compare and set of the topology.
             * Ning: topology may not reflect the true partitions in the cluster during join. So if another node
             * is trying to rejoin, it should rely on the cartographer's view to pick the partitions to replace.
        AbstractTopology topo = getTopology(config.m_startAction, hostGroups, sitesPerHostMap, m_joinCoordinator);
        m_partitionsToSitesAtStartupForExportInit = new ArrayList<>();
        try {
            // IV2 mailbox stuff
            m_configuredReplicationFactor = topo.getReplicationFactor();
            m_cartographer = new Cartographer(m_messenger, m_configuredReplicationFactor, m_catalogContext.cluster.getNetworkpartition());
            m_partitionZeroLeader = new Supplier<Boolean>() {

                public Boolean get() {
                    return m_cartographer.isPartitionZeroLeader();
            List<Integer> partitions = null;
            if (m_rejoining) {
                m_configuredNumberOfPartitions = m_cartographer.getPartitionCount();
                partitions = recoverPartitions(topo, hostGroups.get(m_messenger.getHostId()));
                if (partitions == null) {
                    partitions = m_cartographer.getIv2PartitionsToReplace(m_configuredReplicationFactor, m_catalogContext.getNodeSettings().getLocalSitesCount(), m_messenger.getHostId(), hostGroups);
                if (partitions.size() == 0) {
                    VoltDB.crashLocalVoltDB("The VoltDB cluster already has enough nodes to satisfy " + "the requested k-safety factor of " + m_configuredReplicationFactor + ".\n" + "No more nodes can join.", false, null);
            } else {
                m_configuredNumberOfPartitions = topo.getPartitionCount();
                partitions = topo.getPartitionIdList(m_messenger.getHostId());
            for (int ii = 0; ii < partitions.size(); ii++) {
                Integer partition = partitions.get(ii);
                m_iv2InitiatorStartingTxnIds.put(partition, TxnEgo.makeZero(partition).getTxnId());
            m_iv2Initiators = createIv2Initiators(partitions, m_config.m_startAction, m_partitionsToSitesAtStartupForExportInit);
            m_iv2InitiatorStartingTxnIds.put(MpInitiator.MP_INIT_PID, TxnEgo.makeZero(MpInitiator.MP_INIT_PID).getTxnId());
            // Pass the local HSIds to the MPI so it can farm out buddy sites
            // to the RO MP site pool
            List<Long> localHSIds = new ArrayList<>();
            for (Initiator ii : m_iv2Initiators.values()) {
            m_MPI = new MpInitiator(m_messenger, localHSIds, getStatsAgent());
            m_iv2Initiators.put(MpInitiator.MP_INIT_PID, m_MPI);
            // Make a list of HDIds to join
            Map<Integer, Long> partsToHSIdsToRejoin = new HashMap<>();
            for (Initiator init : m_iv2Initiators.values()) {
                if (init.isRejoinable()) {
                    partsToHSIdsToRejoin.put(init.getPartitionId(), init.getInitiatorHSId());
            OnDemandBinaryLogger.path = VoltDB.instance().getVoltDBRootPath();
            if (m_rejoining) {
      "Set recovering site count to " + partsToHSIdsToRejoin.size());
                m_joinCoordinator = new Iv2RejoinCoordinator(m_messenger, partsToHSIdsToRejoin.values(), VoltDB.instance().getVoltDBRootPath(), m_config.m_startAction == StartAction.LIVE_REJOIN);
                if (m_config.m_startAction == StartAction.LIVE_REJOIN) {
          "Using live rejoin.");
                } else {
          "Using blocking rejoin.");
            } else if (m_joining) {
        } catch (Exception e) {
            VoltDB.crashLocalVoltDB(e.getMessage(), true, e);
        // do the many init tasks in the Inits class
        Inits inits = new Inits(m_statusTracker, this, 1, durable);
        // Need the catalog so that we know how many tables so we can guess at the necessary heap size
        // This is done under Inits.doInitializationWork(), so need to wait until we get here.
        // Current calculation needs pro/community knowledge, number of tables, and the sites/host,
        // which is the number of initiators (minus the possibly idle MPI initiator)
        checkHeapSanity(MiscUtils.isPro(), m_catalogContext.tables.size(), (m_iv2Initiators.size() - 1), m_configuredReplicationFactor);
        if (m_joining && getReplicationRole() == ReplicationRole.REPLICA) {
            VoltDB.crashLocalVoltDB("Elastic join is prohibited on a replica cluster.", false, null);
             * Construct an adhoc planner for the initial catalog
        final CatalogSpecificPlanner csp = new CatalogSpecificPlanner(m_catalogContext);
        // Initialize stats
        m_ioStats = new IOStats();
        getStatsAgent().registerStatsSource(StatsSelector.IOSTATS, 0, m_ioStats);
        m_memoryStats = new MemoryStats();
        getStatsAgent().registerStatsSource(StatsSelector.MEMORY, 0, m_memoryStats);
        getStatsAgent().registerStatsSource(StatsSelector.TOPO, 0, m_cartographer);
        m_partitionCountStats = new PartitionCountStats(m_cartographer);
        getStatsAgent().registerStatsSource(StatsSelector.PARTITIONCOUNT, 0, m_partitionCountStats);
        m_initiatorStats = new InitiatorStats(m_myHostId);
        m_liveClientsStats = new LiveClientsStats();
        getStatsAgent().registerStatsSource(StatsSelector.LIVECLIENTS, 0, m_liveClientsStats);
        m_latencyStats = new LatencyStats();
        getStatsAgent().registerStatsSource(StatsSelector.LATENCY, 0, m_latencyStats);
        m_latencyCompressedStats = new LatencyHistogramStats(m_myHostId);
        getStatsAgent().registerStatsSource(StatsSelector.LATENCY_COMPRESSED, 0, m_latencyCompressedStats);
        m_latencyHistogramStats = new LatencyUncompressedHistogramStats(m_myHostId);
        getStatsAgent().registerStatsSource(StatsSelector.LATENCY_HISTOGRAM, 0, m_latencyHistogramStats);
        BalancePartitionsStatistics rebalanceStats = new BalancePartitionsStatistics();
        getStatsAgent().registerStatsSource(StatsSelector.REBALANCE, 0, rebalanceStats);
        KSafetyStats kSafetyStats = new KSafetyStats();
        getStatsAgent().registerStatsSource(StatsSelector.KSAFETY, 0, kSafetyStats);
        m_cpuStats = new CpuStats();
        getStatsAgent().registerStatsSource(StatsSelector.CPU, 0, m_cpuStats);
        m_gcStats = new GcStats();
        getStatsAgent().registerStatsSource(StatsSelector.GC, 0, m_gcStats);
        // ENG-6321
        m_commandLogStats = new CommandLogStats(m_commandLog);
        getStatsAgent().registerStatsSource(StatsSelector.COMMANDLOG, 0, m_commandLogStats);
        // Dummy DRCONSUMER stats
             * Initialize the command log on rejoin and join before configuring the IV2
             * initiators.  This will prevent them from receiving transactions
             * which need logging before the internal file writers are
             * initialized.  Root cause of ENG-4136.
             * If sync command log is on, not initializing the command log before the initiators
             * are up would cause deadlock.
        if ((m_commandLog != null) && (m_commandLog.needsInitialization())) {
            consoleLog.l7dlog(Level.INFO,, null);
        } else {
            consoleLog.l7dlog(Level.INFO,, null);
        if (m_commandLog != null && (m_rejoining || m_joining)) {
            //On rejoin the starting IDs are all 0 so technically it will load any snapshot
            //but the newest snapshot will always be the truncation snapshot taken after rejoin
            //completes at which point the node will mark itself as actually recovered.
            // Use the partition count from the cluster config instead of the cartographer
            // here. Since the initiators are not started yet, the cartographer still doesn't
            // know about the new partitions at this point.
            m_commandLog.initForRejoin(m_catalogContext.cluster.getLogconfig().get("log").getLogsize(), Long.MIN_VALUE, m_configuredNumberOfPartitions, true, m_config.m_commandLogBinding, m_iv2InitiatorStartingTxnIds);
        // Create the client interface
        try {
            InetAddress clientIntf = null;
            InetAddress adminIntf = null;
            if (!m_config.m_externalInterface.trim().equals("")) {
                clientIntf = InetAddress.getByName(m_config.m_externalInterface);
                //client and admin interfaces are same by default.
                adminIntf = clientIntf;
            //If user has specified on command line host:port override client and admin interfaces.
            if (m_config.m_clientInterface != null && m_config.m_clientInterface.trim().length() > 0) {
                clientIntf = InetAddress.getByName(m_config.m_clientInterface);
            if (m_config.m_adminInterface != null && m_config.m_adminInterface.trim().length() > 0) {
                adminIntf = InetAddress.getByName(m_config.m_adminInterface);
            m_clientInterface = ClientInterface.create(m_messenger, m_catalogContext, getReplicationRole(), m_cartographer, clientIntf, config.m_port, adminIntf, config.m_adminPort, m_config.m_sslContext);
        } catch (Exception e) {
            VoltDB.crashLocalVoltDB(e.getMessage(), true, e);
        // DR overflow directory
        if (VoltDB.instance().getLicenseApi().isDrReplicationAllowed()) {
            try {
                Class<?> ndrgwClass = null;
                ndrgwClass = Class.forName("org.voltdb.dr2.DRProducer");
                Constructor<?> ndrgwConstructor = ndrgwClass.getConstructor(File.class, File.class, boolean.class, boolean.class, boolean.class, int.class, int.class);
                m_producerDRGateway = (ProducerDRGateway) ndrgwConstructor.newInstance(new VoltFile(VoltDB.instance().getDROverflowPath()), new VoltFile(VoltDB.instance().getSnapshotPath()), (m_config.m_startAction.doesRecover() && (durable || determination.terminusNonce != null)), m_config.m_startAction.doesRejoin(), m_replicationActive.get(), m_configuredNumberOfPartitions, (m_catalogContext.getClusterSettings().hostcount() - m_config.m_missingHostCount));
            } catch (Exception e) {
                VoltDB.crashLocalVoltDB("Unable to load DR system", true, e);
        } else {
            // set up empty stats for the DR Producer
            getStatsAgent().registerStatsSource(StatsSelector.DRPRODUCERNODE, 0, new DRProducerStatsBase.DRProducerNodeStatsBase());
            getStatsAgent().registerStatsSource(StatsSelector.DRPRODUCERPARTITION, 0, new DRProducerStatsBase.DRProducerPartitionStatsBase());
        m_drRoleStats = new DRRoleStats(this);
        getStatsAgent().registerStatsSource(StatsSelector.DRROLE, 0, m_drRoleStats);
             * Configure and start all the IV2 sites
        try {
            final String serializedCatalog = m_catalogContext.catalog.serialize();
            for (Initiator iv2init : m_iv2Initiators.values()) {
                iv2init.configure(getBackendTargetType(), m_catalogContext, serializedCatalog, csp, m_configuredNumberOfPartitions, m_config.m_startAction, getStatsAgent(), m_memoryStats, m_commandLog, m_config.m_executionCoreBindings.poll(), shouldInitiatorCreateMPDRGateway(iv2init));
            // LeaderAppointer startup blocks if the initiators are not initialized.
            // So create the LeaderAppointer after the initiators.
            boolean expectSyncSnapshot = getReplicationRole() == ReplicationRole.REPLICA && config.m_startAction == StartAction.CREATE;
            m_leaderAppointer = new LeaderAppointer(m_messenger, m_configuredNumberOfPartitions, m_catalogContext.getDeployment().getCluster().getKfactor(), topo.topologyToJSON(), m_MPI, kSafetyStats, expectSyncSnapshot);
        } catch (Exception e) {
            Throwable toLog = e;
            if (e instanceof ExecutionException) {
                toLog = ((ExecutionException) e).getCause();
            VoltDB.crashLocalVoltDB("Error configuring IV2 initiator.", true, toLog);
        // Create the statistics manager and register it to JMX registry
        m_statsManager = null;
        try {
            final Class<?> statsManagerClass = MiscUtils.loadProClass("", "JMX", true);
            if (statsManagerClass != null && !DISABLE_JMX) {
                m_statsManager = (StatsManager) statsManagerClass.newInstance();
        } catch (Exception e) {
        //JMXStatsManager will log and we continue.
        try {
        } catch (Exception e) {
            hostLog.fatal("Error initializing snapshot completion monitor", e);
            VoltDB.crashLocalVoltDB("Error initializing snapshot completion monitor", true, e);
             * Make sure the build string successfully validated
             * before continuing to do operations
             * that might return wrongs answers or lose data.
        try {
        } catch (Exception e) {
            VoltDB.crashLocalVoltDB("Failed to validate cluster build string", false, e);
        //so that the secondary connections can be created.
        if (m_joining) {
            int expectedHosts = m_configuredReplicationFactor + 1;
            m_messenger.waitForJoiningHostsToBeReady(expectedHosts, this.m_myHostId);
        } else if (!m_rejoining) {
            // initial start or recover
            int expectedHosts = m_catalogContext.getClusterSettings().hostcount() - m_config.m_missingHostCount;
        // Create secondary connections within partition group
        if (!m_joining && (m_cartographer.getPartitionCount()) != m_configuredNumberOfPartitions) {
            for (Map.Entry<Integer, ImmutableList<Long>> entry : getSiteTrackerForSnapshot().m_partitionsToSitesImmutable.entrySet()) {
       + " -- " + CoreUtils.hsIdCollectionToString(entry.getValue()));
            VoltDB.crashGlobalVoltDB("Mismatch between configured number of partitions (" + m_configuredNumberOfPartitions + ") and actual (" + m_cartographer.getPartitionCount() + ")", true, null);
        // print out a bunch of useful system info
        logDebuggingInfo(m_config.m_adminPort, m_config.m_httpPort, m_httpPortExtraLogMessage, m_jsonEnabled);
        // warn the user on the console if k=0 or if no command logging
        if (m_configuredReplicationFactor == 0) {
            consoleLog.warn("This is not a highly available cluster. K-Safety is set to 0.");
        boolean usingCommandLog = m_config.m_isEnterprise && (m_catalogContext.cluster.getLogconfig() != null) && (m_catalogContext.cluster.getLogconfig().get("log") != null) && m_catalogContext.cluster.getLogconfig().get("log").getEnabled();
        if (!usingCommandLog) {
            // figure out if using a snapshot schedule
            boolean usingPeridoicSnapshots = false;
            for (SnapshotSchedule ss : m_catalogContext.database.getSnapshotschedule()) {
                if (ss.getEnabled()) {
                    usingPeridoicSnapshots = true;
            // print the right warning depending on durability settings
            if (usingPeridoicSnapshots) {
                consoleLog.warn("Durability is limited to periodic snapshots. Command logging is off.");
            } else {
                consoleLog.warn("Durability is turned off. Command logging is off.");
        // warn if cluster is partitionable, but partition detection is off
        if ((m_catalogContext.cluster.getNetworkpartition() == false) && (m_configuredReplicationFactor > 0)) {
            hostLog.warn("Running a redundant (k-safe) cluster with network " + "partition detection disabled is not recommended for production use.");
        // we decided not to include the stronger language below for the 3.0 version (ENG-4215)
        //hostLog.warn("With partition detection disabled, data may be lost or " +
        //      "corrupted by certain classes of network failures.");
        assert (m_clientInterface != null);
        m_clientInterface.initializeSnapshotDaemon(m_messenger, m_globalServiceElector);
        // Start elastic join service
        try {
            if (m_config.m_isEnterprise && TheHashinator.getCurrentConfig().type == HashinatorType.ELASTIC) {
                Class<?> elasticServiceClass = MiscUtils.loadProClass("org.voltdb.join.ElasticJoinCoordinator", "Elastic join", false);
                if (elasticServiceClass == null) {
                    VoltDB.crashLocalVoltDB("Missing the ElasticJoinCoordinator class file in the enterprise " + "edition", false, null);
                Constructor<?> constructor = elasticServiceClass.getConstructor(HostMessenger.class, ClientInterface.class, Cartographer.class, BalancePartitionsStatistics.class, String.class, int.class, Supplier.class);
                m_elasticJoinService = (ElasticJoinService) constructor.newInstance(m_messenger, m_clientInterface, m_cartographer, rebalanceStats, VoltDB.instance().getCommandLogSnapshotPath(), m_catalogContext.getDeployment().getCluster().getKfactor(), m_clusterSettings);
        } catch (Exception e) {
            VoltDB.crashLocalVoltDB("Failed to instantiate elastic join service", false, e);
        // set additional restore agent stuff
        if (m_restoreAgent != null) {
            m_restoreAgent.setInitiator(new Iv2TransactionCreator(m_clientInterface));
        // Start the stats agent at the end, after everything has been constructed
        m_configLogger = new Thread(new ConfigLogging());
Also used : ClusterSettings(org.voltdb.settings.ClusterSettings) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ZKUtil(org.voltcore.zk.ZKUtil) InitiatorStats(org.voltdb.dtxn.InitiatorStats) SiteMailbox(org.voltcore.messaging.SiteMailbox) MpInitiator(org.voltdb.iv2.MpInitiator) SpInitiator(org.voltdb.iv2.SpInitiator) Initiator(org.voltdb.iv2.Initiator) BaseInitiator(org.voltdb.iv2.BaseInitiator) HostMessenger(org.voltcore.messaging.HostMessenger) KSafetyStats(org.voltdb.iv2.KSafetyStats) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) LeaderAppointer(org.voltdb.iv2.LeaderAppointer) UnsupportedEncodingException( MpInitiator(org.voltdb.iv2.MpInitiator) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) DummySnmpTrapSender(org.voltdb.snmp.DummySnmpTrapSender) JSONObject(org.json_voltpatches.JSONObject) Iv2RejoinCoordinator(org.voltdb.rejoin.Iv2RejoinCoordinator) VoltFile(org.voltdb.utils.VoltFile) File( InetAddress( Map(java.util.Map) CatalogMap(org.voltdb.catalog.CatalogMap) TreeMap(java.util.TreeMap) ImmutableMap(com.google_voltpatches.common.collect.ImmutableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) ImmutableList(com.google_voltpatches.common.collect.ImmutableList) BalancePartitionsStatistics(org.voltdb.join.BalancePartitionsStatistics) LatencyHistogramStats(org.voltdb.dtxn.LatencyHistogramStats) Cartographer(org.voltdb.iv2.Cartographer) LatencyStats(org.voltdb.dtxn.LatencyStats) SnapshotSchedule(org.voltdb.catalog.SnapshotSchedule) ExecutionException(java.util.concurrent.ExecutionException) PrintStream( JSONException(org.json_voltpatches.JSONException) IOException( MeshProber(org.voltdb.probe.MeshProber) SocketException( IOException( ExecutionException(java.util.concurrent.ExecutionException) JSONException(org.json_voltpatches.JSONException) UnsupportedEncodingException( KeeperException(org.apache.zookeeper_voltpatches.KeeperException) SettingsException(org.voltdb.settings.SettingsException) VoltFile(org.voltdb.utils.VoltFile) LatencyUncompressedHistogramStats(org.voltdb.dtxn.LatencyUncompressedHistogramStats) HostInfo(org.voltcore.messaging.HostMessenger.HostInfo) SimpleDateFormat(java.text.SimpleDateFormat)

Example 2 with SnapshotSchedule

use of org.voltdb.catalog.SnapshotSchedule in project voltdb by VoltDB.

the class SystemInformation method populateDeploymentProperties.

public static VoltTable populateDeploymentProperties(Cluster cluster, Database database, ClusterSettings clusterSettings, NodeSettings nodeSettings) {
    VoltTable results = new VoltTable(clusterInfoSchema);
    // it would be awesome if these property names could come
    // from the RestApiDescription.xml (or the equivalent thereof) someday --izzy
    results.addRow("voltdbroot", VoltDB.instance().getVoltDBRootPath());
    Deployment deploy = cluster.getDeployment().get("deployment");
    results.addRow("hostcount", Integer.toString(clusterSettings.hostcount()));
    results.addRow("kfactor", Integer.toString(deploy.getKfactor()));
    results.addRow("sitesperhost", Integer.toString(nodeSettings.getLocalSitesCount()));
    String http_enabled = "false";
    int http_port = VoltDB.instance().getConfig().m_httpPort;
    if (http_port != -1 && http_port != Integer.MAX_VALUE) {
        http_enabled = "true";
        results.addRow("httpport", Integer.toString(http_port));
    results.addRow("httpenabled", http_enabled);
    String json_enabled = "false";
    if (cluster.getJsonapi()) {
        json_enabled = "true";
    results.addRow("jsonenabled", json_enabled);
    SnapshotSchedule snaps = database.getSnapshotschedule().get("default");
    String snap_enabled = "false";
    if (snaps != null && snaps.getEnabled()) {
        snap_enabled = "true";
        String snap_freq = Integer.toString(snaps.getFrequencyvalue()) + snaps.getFrequencyunit();
        results.addRow("snapshotpath", VoltDB.instance().getSnapshotPath());
        results.addRow("snapshotprefix", snaps.getPrefix());
        results.addRow("snapshotfrequency", snap_freq);
        results.addRow("snapshotretain", Integer.toString(snaps.getRetain()));
    results.addRow("snapshotenabled", snap_enabled);
    for (Connector export_conn : database.getConnectors()) {
        if (export_conn != null && export_conn.getEnabled()) {
            results.addRow("exportoverflowpath", VoltDB.instance().getExportOverflowPath());
    results.addRow("export", Boolean.toString(CatalogUtil.isExportEnabled()));
    String partition_detect_enabled = "false";
    if (cluster.getNetworkpartition()) {
        partition_detect_enabled = "true";
    results.addRow("partitiondetection", partition_detect_enabled);
    results.addRow("heartbeattimeout", Integer.toString(cluster.getHeartbeattimeout()));
    results.addRow("adminport", Integer.toString(VoltDB.instance().getConfig().m_adminPort));
    String command_log_enabled = "false";
    // log name is MAGIC, you knoooow
    CommandLog command_log = cluster.getLogconfig().get("log");
    if (command_log.getEnabled()) {
        command_log_enabled = "true";
        String command_log_mode = "async";
        if (command_log.getSynchronous()) {
            command_log_mode = "sync";
        String command_log_path = VoltDB.instance().getCommandLogPath();
        String command_log_snaps = VoltDB.instance().getCommandLogSnapshotPath();
        String command_log_fsync_interval = Integer.toString(command_log.getFsyncinterval());
        String command_log_max_txns = Integer.toString(command_log.getMaxtxns());
        results.addRow("commandlogmode", command_log_mode);
        results.addRow("commandlogfreqtime", command_log_fsync_interval);
        results.addRow("commandlogfreqtxns", command_log_max_txns);
        results.addRow("commandlogpath", command_log_path);
        results.addRow("commandlogsnapshotpath", command_log_snaps);
    results.addRow("commandlogenabled", command_log_enabled);
    String users = "";
    for (User user : database.getUsers()) {
        users += addEscapes(user.getTypeName());
        if (user.getGroups() != null && user.getGroups().size() > 0) {
            users += ":";
            for (GroupRef gref : user.getGroups()) {
                users += addEscapes(gref.getGroup().getTypeName());
                users += ",";
            users = users.substring(0, users.length() - 1);
        users += ";";
    results.addRow("users", users);
    // Add system setting information also
    // the attribute names follows the above naming rule
    Systemsettings sysSettings = deploy.getSystemsettings().get("systemsettings");
    results.addRow("elasticduration", Integer.toString(sysSettings.getElasticduration()));
    results.addRow("elasticthroughput", Integer.toString(sysSettings.getElasticthroughput()));
    results.addRow("snapshotpriority", Integer.toString(sysSettings.getSnapshotpriority()));
    results.addRow("temptablesmaxsize", Integer.toString(sysSettings.getTemptablemaxsize()));
    results.addRow("querytimeout", Integer.toString(sysSettings.getQuerytimeout()));
    return results;
Also used : Connector(org.voltdb.catalog.Connector) User(org.voltdb.catalog.User) Systemsettings(org.voltdb.catalog.Systemsettings) CommandLog(org.voltdb.catalog.CommandLog) Deployment(org.voltdb.catalog.Deployment) SnapshotSchedule(org.voltdb.catalog.SnapshotSchedule) GroupRef(org.voltdb.catalog.GroupRef) VoltTable(org.voltdb.VoltTable)

Example 3 with SnapshotSchedule

use of org.voltdb.catalog.SnapshotSchedule in project voltdb by VoltDB.

the class CatalogContext method getDebuggingInfoFromCatalog.

// Generate helpful status messages based on configuration present in the
// catalog.  Used to generated these messages at startup and after an
// @UpdateApplicationCatalog
SortedMap<String, String> getDebuggingInfoFromCatalog(boolean verbose) {
    SortedMap<String, String> logLines = new TreeMap<>();
    // topology
    Deployment deployment = cluster.getDeployment().iterator().next();
    int hostCount = m_dbSettings.getCluster().hostcount();
    if (verbose) {
        Map<Integer, Integer> sphMap;
        try {
            sphMap = m_messenger.getSitesPerHostMapFromZK();
        } catch (KeeperException | InterruptedException | JSONException e) {
            hostLog.warn("Failed to get sitesperhost information from Zookeeper", e);
            sphMap = null;
        int kFactor = deployment.getKfactor();
        if (sphMap == null) {
            logLines.put("deployment1", String.format("Cluster has %d hosts with leader hostname: \"%s\". [unknown] local sites count. K = %d.", hostCount, VoltDB.instance().getConfig().m_leader, kFactor));
            logLines.put("deployment2", "Unable to retrieve partition information from the cluster.");
        } else {
            int localSitesCount = sphMap.get(m_messenger.getHostId());
            logLines.put("deployment1", String.format("Cluster has %d hosts with leader hostname: \"%s\". %d local sites count. K = %d.", hostCount, VoltDB.instance().getConfig().m_leader, localSitesCount, kFactor));
            int totalSitesCount = 0;
            for (Map.Entry<Integer, Integer> e : sphMap.entrySet()) {
                totalSitesCount += e.getValue();
            int replicas = kFactor + 1;
            int partitionCount = totalSitesCount / replicas;
            logLines.put("deployment2", String.format("The entire cluster has %d %s of%s %d logical partition%s.", replicas, replicas > 1 ? "copies" : "copy", partitionCount > 1 ? " each of the" : "", partitionCount, partitionCount > 1 ? "s" : ""));
    // voltdb root
    logLines.put("voltdbroot", "Using \"" + VoltDB.instance().getVoltDBRootPath() + "\" for voltdbroot directory.");
    // partition detection
    if (cluster.getNetworkpartition()) {
        logLines.put("partition-detection", "Detection of network partitions in the cluster is enabled.");
    } else {
        logLines.put("partition-detection", "Detection of network partitions in the cluster is not enabled.");
    // security info
    if (cluster.getSecurityenabled()) {
        logLines.put("sec-enabled", "Client authentication is enabled.");
    } else {
        logLines.put("sec-enabled", "Client authentication is not enabled. Anonymous clients accepted.");
    // auto snapshot info
    SnapshotSchedule ssched = database.getSnapshotschedule().get("default");
    if (ssched == null || !ssched.getEnabled()) {
        logLines.put("snapshot-schedule1", "No schedule set for automated snapshots.");
    } else {
        final String frequencyUnitString = ssched.getFrequencyunit().toLowerCase();
        final char frequencyUnit = frequencyUnitString.charAt(0);
        String msg = "[unknown frequency]";
        switch(frequencyUnit) {
            case 's':
                msg = String.valueOf(ssched.getFrequencyvalue()) + " seconds";
            case 'm':
                msg = String.valueOf(ssched.getFrequencyvalue()) + " minutes";
            case 'h':
                msg = String.valueOf(ssched.getFrequencyvalue()) + " hours";
        logLines.put("snapshot-schedule1", "Automatic snapshots enabled, saved to " + VoltDB.instance().getSnapshotPath() + " and named with prefix '" + ssched.getPrefix() + "'.");
        logLines.put("snapshot-schedule2", "Database will retain a history of " + ssched.getRetain() + " snapshots, generated every " + msg + ".");
    return logLines;
Also used : Deployment(org.voltdb.catalog.Deployment) JSONException(org.json_voltpatches.JSONException) TreeMap(java.util.TreeMap) SnapshotSchedule(org.voltdb.catalog.SnapshotSchedule) Map(java.util.Map) TreeMap(java.util.TreeMap) CatalogMap(org.voltdb.catalog.CatalogMap) SortedMap(java.util.SortedMap) KeeperException(org.apache.zookeeper_voltpatches.KeeperException)

Example 4 with SnapshotSchedule

use of org.voltdb.catalog.SnapshotSchedule in project voltdb by VoltDB.

the class ClientInterface method mayActivateSnapshotDaemon.

// if this ClientInterface's site ID is the lowest non-execution site ID
// in the cluster, make our SnapshotDaemon responsible for snapshots
public void mayActivateSnapshotDaemon() {
    SnapshotSchedule schedule = m_catalogContext.get().database.getSnapshotschedule().get("default");
    if (schedule != null) {
        final ListenableFuture<Void> future = m_snapshotDaemon.mayGoActiveOrInactive(schedule);
        future.addListener(new Runnable() {

            public void run() {
                try {
                } catch (InterruptedException e) {
                    VoltDB.crashLocalVoltDB("Failed to make SnapshotDaemon active", false, e);
                } catch (ExecutionException e) {
                    VoltDB.crashLocalVoltDB("Failed to make SnapshotDaemon active", false, e);
        }, CoreUtils.SAMETHREADEXECUTOR);
Also used : SnapshotSchedule(org.voltdb.catalog.SnapshotSchedule) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) ExecutionException(java.util.concurrent.ExecutionException)

Example 5 with SnapshotSchedule

use of org.voltdb.catalog.SnapshotSchedule in project voltdb by VoltDB.

the class CatalogUtil method setSnapshotInfo.

     * Set the auto-snapshot settings in the catalog from the deployment file
     * @param catalog The catalog to be updated.
     * @param snapshot A reference to the <snapshot> element of the deployment.xml file.
private static void setSnapshotInfo(Catalog catalog, SnapshotType snapshotSettings) {
    Database db = catalog.getClusters().get("cluster").getDatabases().get("database");
    SnapshotSchedule schedule = db.getSnapshotschedule().add("default");
    String frequency = snapshotSettings.getFrequency();
    if (!frequency.endsWith("s") && !frequency.endsWith("m") && !frequency.endsWith("h")) {
        hostLog.error("Snapshot frequency " + frequency + " needs to end with time unit specified" + " that is one of [s, m, h] (seconds, minutes, hours)" + " Defaulting snapshot frequency to 10m.");
        frequency = "10m";
    int frequencyInt = 0;
    String frequencySubstring = frequency.substring(0, frequency.length() - 1);
    try {
        frequencyInt = Integer.parseInt(frequencySubstring);
    } catch (Exception e) {
        hostLog.error("Frequency " + frequencySubstring + " is not an integer. Defaulting frequency to 10m.");
        frequency = "10m";
        frequencyInt = 10;
    String prefix = snapshotSettings.getPrefix();
    if (prefix == null || prefix.isEmpty()) {
        hostLog.error("Snapshot prefix " + prefix + " is not a valid prefix. Using prefix of 'SNAPSHOTNONCE' ");
        prefix = "SNAPSHOTNONCE";
    if (prefix.contains("-") || prefix.contains(",")) {
        String oldprefix = prefix;
        prefix = prefix.replaceAll("-", "_");
        prefix = prefix.replaceAll(",", "_");
        hostLog.error("Snapshot prefix " + oldprefix + " cannot include , or -." + " Using the prefix: " + prefix + " instead.");
    int retain = snapshotSettings.getRetain();
    if (retain < 1) {
        hostLog.error("Snapshot retain value " + retain + " is not a valid value. Must be 1 or greater." + " Defaulting snapshot retain to 1.");
        retain = 1;
    schedule.setFrequencyunit(frequency.substring(frequency.length() - 1, frequency.length()));
Also used : Database(org.voltdb.catalog.Database) SnapshotSchedule(org.voltdb.catalog.SnapshotSchedule) Constraint(org.voltdb.catalog.Constraint) NoSuchAlgorithmException( IOException( URISyntaxException( JAXBException(javax.xml.bind.JAXBException) FileNotFoundException( JSONException(org.json_voltpatches.JSONException) SAXException(org.xml.sax.SAXException) KeeperException(org.apache.zookeeper_voltpatches.KeeperException) MalformedURLException(


SnapshotSchedule (org.voltdb.catalog.SnapshotSchedule)6 KeeperException (org.apache.zookeeper_voltpatches.KeeperException)3 JSONException (org.json_voltpatches.JSONException)3 File ( IOException ( UnsupportedEncodingException ( Map (java.util.Map)2 SortedMap (java.util.SortedMap)2 TreeMap (java.util.TreeMap)2 ExecutionException (java.util.concurrent.ExecutionException)2 CatalogMap (org.voltdb.catalog.CatalogMap)2 Deployment (org.voltdb.catalog.Deployment)2 ImmutableList (com.google_voltpatches.common.collect.ImmutableList)1 ImmutableMap (com.google_voltpatches.common.collect.ImmutableMap)1 FileNotFoundException ( PrintStream ( InetAddress ( MalformedURLException ( SocketException ( URISyntaxException (