Search in sources :

Example 1 with DeadServer

use of org.apache.accumulo.core.master.thrift.DeadServer in project accumulo by apache.

the class MasterResource method getTables.

/**
 * Generates a master information JSON object
 *
 * @return master JSON object
 */
@GET
public static MasterInformation getTables() {
    MasterInformation masterInformation;
    MasterMonitorInfo mmi = Monitor.getMmi();
    if (mmi != null) {
        GCStatus gcStatusObj = Monitor.getGcStatus();
        String gcStatus = "Waiting";
        String label = "";
        if (gcStatusObj != null) {
            long start = 0;
            if (gcStatusObj.current.started != 0 || gcStatusObj.currentLog.started != 0) {
                start = Math.max(gcStatusObj.current.started, gcStatusObj.currentLog.started);
                label = "Running";
            } else if (gcStatusObj.lastLog.finished != 0) {
                start = gcStatusObj.lastLog.finished;
            }
            if (start != 0) {
                gcStatus = String.valueOf(start);
            }
        } else {
            gcStatus = "Down";
        }
        List<String> tservers = new ArrayList<>();
        for (TabletServerStatus up : mmi.tServerInfo) {
            tservers.add(up.name);
        }
        for (DeadServer down : mmi.deadTabletServers) {
            tservers.add(down.server);
        }
        List<String> masters = Monitor.getContext().getInstance().getMasterLocations();
        String master = masters.size() == 0 ? "Down" : AddressUtil.parseAddress(masters.get(0), false).getHost();
        Integer onlineTabletServers = mmi.tServerInfo.size();
        Integer totalTabletServers = tservers.size();
        Integer tablets = Monitor.getTotalTabletCount();
        Integer unassignedTablets = mmi.unassignedTablets;
        long entries = Monitor.getTotalEntries();
        double ingest = Monitor.getTotalIngestRate();
        double entriesRead = Monitor.getTotalScanRate();
        double entriesReturned = Monitor.getTotalQueryRate();
        long holdTime = Monitor.getTotalHoldTime();
        double osLoad = ManagementFactory.getOperatingSystemMXBean().getSystemLoadAverage();
        int tables = Monitor.getTotalTables();
        int deadTabletServers = mmi.deadTabletServers.size();
        long lookups = Monitor.getTotalLookups();
        long uptime = System.currentTimeMillis() - Monitor.getStartTime();
        masterInformation = new MasterInformation(master, onlineTabletServers, totalTabletServers, gcStatus, tablets, unassignedTablets, entries, ingest, entriesRead, entriesReturned, holdTime, osLoad, tables, deadTabletServers, lookups, uptime, label, getGoalState(), getState(), getNumBadTservers(), getServersShuttingDown(), getDeadTservers(), getDeadLoggers());
    } else {
        masterInformation = new MasterInformation();
    }
    return masterInformation;
}
Also used : MasterMonitorInfo(org.apache.accumulo.core.master.thrift.MasterMonitorInfo) ArrayList(java.util.ArrayList) GCStatus(org.apache.accumulo.core.gc.thrift.GCStatus) DeadServer(org.apache.accumulo.core.master.thrift.DeadServer) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus) GET(javax.ws.rs.GET)

Example 2 with DeadServer

use of org.apache.accumulo.core.master.thrift.DeadServer in project accumulo by apache.

the class StatusResource method getTables.

/**
 * Generates the JSON object with the status
 *
 * @return Status report
 */
@GET
public StatusInformation getTables() {
    StatusInformation status;
    Status masterStatus;
    Status gcStatus;
    Status tServerStatus = Status.ERROR;
    if (Monitor.getMmi() != null) {
        if (Monitor.getGcStatus() != null) {
            gcStatus = Status.OK;
        } else {
            gcStatus = Status.ERROR;
        }
        List<String> tservers = new ArrayList<>();
        for (TabletServerStatus up : Monitor.getMmi().tServerInfo) {
            tservers.add(up.name);
        }
        for (DeadServer down : Monitor.getMmi().deadTabletServers) {
            tservers.add(down.server);
        }
        List<String> masters = Monitor.getContext().getInstance().getMasterLocations();
        masterStatus = masters.size() == 0 ? Status.ERROR : Status.OK;
        int tServerUp = Monitor.getMmi().getTServerInfoSize();
        int tServerDown = Monitor.getMmi().getDeadTabletServersSize();
        int tServerBad = Monitor.getMmi().getBadTServersSize();
        /*
       * If there are no dead or bad servers and there are tservers up, status is OK, if there are dead or bad servers and there is at least a tserver up,
       * status is WARN, otherwise, the status is an error.
       */
        if ((tServerDown > 0 || tServerBad > 0) && tServerUp > 0) {
            tServerStatus = Status.WARN;
        } else if ((tServerDown == 0 || tServerBad == 0) && tServerUp > 0) {
            tServerStatus = Status.OK;
        } else if (tServerUp == 0) {
            tServerStatus = Status.ERROR;
        }
    } else {
        masterStatus = Status.ERROR;
        if (null == Monitor.getGcStatus()) {
            gcStatus = Status.ERROR;
        } else {
            gcStatus = Status.OK;
        }
        tServerStatus = Status.ERROR;
    }
    List<DedupedLogEvent> logs = LogService.getInstance().getEvents();
    boolean logsHaveError = false;
    for (DedupedLogEvent dedupedLogEvent : logs) {
        if (dedupedLogEvent.getEvent().getLevel().isGreaterOrEqual(Level.ERROR)) {
            logsHaveError = true;
            break;
        }
    }
    int numProblems = Monitor.getProblemSummary().entrySet().size();
    status = new StatusInformation(masterStatus.toString(), gcStatus.toString(), tServerStatus.toString(), logs.size(), logsHaveError, numProblems);
    return status;
}
Also used : TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus) ArrayList(java.util.ArrayList) DeadServer(org.apache.accumulo.core.master.thrift.DeadServer) DedupedLogEvent(org.apache.accumulo.server.monitor.DedupedLogEvent) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus) GET(javax.ws.rs.GET)

Example 3 with DeadServer

use of org.apache.accumulo.core.master.thrift.DeadServer in project accumulo by apache.

the class GetMasterStats method main.

public static void main(String[] args) throws Exception {
    MasterClientService.Iface client = null;
    MasterMonitorInfo stats = null;
    Instance instance = HdfsZooInstance.getInstance();
    AccumuloServerContext context = new AccumuloServerContext(instance, new ServerConfigurationFactory(instance));
    while (true) {
        try {
            client = MasterClient.getConnectionWithRetry(context);
            stats = client.getMasterStats(Tracer.traceInfo(), context.rpcCreds());
            break;
        } catch (ThriftNotActiveServiceException e) {
            // Let it loop, fetching a new location
            sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
        } finally {
            if (client != null)
                MasterClient.close(client);
        }
    }
    out(0, "State: " + stats.state.name());
    out(0, "Goal State: " + stats.goalState.name());
    if (stats.serversShuttingDown != null && stats.serversShuttingDown.size() > 0) {
        out(0, "Servers to shutdown");
        for (String server : stats.serversShuttingDown) {
            out(1, "%s", server);
        }
    }
    out(0, "Unassigned tablets: %d", stats.unassignedTablets);
    if (stats.badTServers != null && stats.badTServers.size() > 0) {
        out(0, "Bad servers");
        for (Entry<String, Byte> entry : stats.badTServers.entrySet()) {
            out(1, "%s: %d", entry.getKey(), (int) entry.getValue());
        }
    }
    out(0, "Dead tablet servers count: %s", stats.deadTabletServers.size());
    for (DeadServer dead : stats.deadTabletServers) {
        out(1, "Dead tablet server: %s", dead.server);
        out(2, "Last report: %s", new SimpleDateFormat().format(new Date(dead.lastStatus)));
        out(2, "Cause: %s", dead.status);
    }
    out(0, "Bulk imports: %s", stats.bulkImports.size());
    for (BulkImportStatus bulk : stats.bulkImports) {
        out(1, "Import directory: %s", bulk.filename);
        out(2, "Bulk state %s", bulk.state);
        out(2, "Bulk start %s", bulk.startTime);
    }
    if (stats.tableMap != null && stats.tableMap.size() > 0) {
        out(0, "Tables");
        for (Entry<String, TableInfo> entry : stats.tableMap.entrySet()) {
            TableInfo v = entry.getValue();
            out(1, "%s", entry.getKey());
            out(2, "Records: %d", v.recs);
            out(2, "Records in Memory: %d", v.recsInMemory);
            out(2, "Tablets: %d", v.tablets);
            out(2, "Online Tablets: %d", v.onlineTablets);
            out(2, "Ingest Rate: %.2f", v.ingestRate);
            out(2, "Query Rate: %.2f", v.queryRate);
        }
    }
    if (stats.tServerInfo != null && stats.tServerInfo.size() > 0) {
        out(0, "Tablet Servers");
        long now = System.currentTimeMillis();
        for (TabletServerStatus server : stats.tServerInfo) {
            TableInfo summary = TableInfoUtil.summarizeTableStats(server);
            out(1, "Name: %s", server.name);
            out(2, "Ingest: %.2f", summary.ingestRate);
            out(2, "Last Contact: %s", server.lastContact);
            out(2, "OS Load Average: %.2f", server.osLoad);
            out(2, "Queries: %.2f", summary.queryRate);
            out(2, "Time Difference: %.1f", ((now - server.lastContact) / 1000.));
            out(2, "Total Records: %d", summary.recs);
            out(2, "Lookups: %d", server.lookups);
            if (server.holdTime > 0)
                out(2, "Hold Time: %d", server.holdTime);
            if (server.tableMap != null && server.tableMap.size() > 0) {
                out(2, "Tables");
                for (Entry<String, TableInfo> status : server.tableMap.entrySet()) {
                    TableInfo info = status.getValue();
                    out(3, "Table: %s", status.getKey());
                    out(4, "Tablets: %d", info.onlineTablets);
                    out(4, "Records: %d", info.recs);
                    out(4, "Records in Memory: %d", info.recsInMemory);
                    out(4, "Ingest: %.2f", info.ingestRate);
                    out(4, "Queries: %.2f", info.queryRate);
                    out(4, "Major Compacting: %d", info.majors == null ? 0 : info.majors.running);
                    out(4, "Queued for Major Compaction: %d", info.majors == null ? 0 : info.majors.queued);
                    out(4, "Minor Compacting: %d", info.minors == null ? 0 : info.minors.running);
                    out(4, "Queued for Minor Compaction: %d", info.minors == null ? 0 : info.minors.queued);
                }
            }
            out(2, "Recoveries: %d", server.logSorts.size());
            for (RecoveryStatus sort : server.logSorts) {
                out(3, "File: %s", sort.name);
                out(3, "Progress: %.2f%%", sort.progress * 100);
                out(3, "Time running: %s", sort.runtime / 1000.);
            }
            out(3, "Bulk imports: %s", stats.bulkImports.size());
            for (BulkImportStatus bulk : stats.bulkImports) {
                out(4, "Import file: %s", bulk.filename);
                out(5, "Bulk state %s", bulk.state);
                out(5, "Bulk start %s", bulk.startTime);
            }
        }
    }
}
Also used : MasterMonitorInfo(org.apache.accumulo.core.master.thrift.MasterMonitorInfo) AccumuloServerContext(org.apache.accumulo.server.AccumuloServerContext) ThriftNotActiveServiceException(org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException) Instance(org.apache.accumulo.core.client.Instance) HdfsZooInstance(org.apache.accumulo.server.client.HdfsZooInstance) ServerConfigurationFactory(org.apache.accumulo.server.conf.ServerConfigurationFactory) DeadServer(org.apache.accumulo.core.master.thrift.DeadServer) Date(java.util.Date) BulkImportStatus(org.apache.accumulo.core.master.thrift.BulkImportStatus) MasterClientService(org.apache.accumulo.core.master.thrift.MasterClientService) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) RecoveryStatus(org.apache.accumulo.core.master.thrift.RecoveryStatus) SimpleDateFormat(java.text.SimpleDateFormat) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus)

Example 4 with DeadServer

use of org.apache.accumulo.core.master.thrift.DeadServer in project accumulo by apache.

the class DeadServerList method getList.

public List<DeadServer> getList() {
    List<DeadServer> result = new ArrayList<>();
    IZooReaderWriter zoo = ZooReaderWriter.getInstance();
    try {
        List<String> children = zoo.getChildren(path);
        if (children != null) {
            for (String child : children) {
                Stat stat = new Stat();
                byte[] data;
                try {
                    data = zoo.getData(path + "/" + child, stat);
                } catch (NoNodeException nne) {
                    // in the dead server list.
                    continue;
                }
                DeadServer server = new DeadServer(child, stat.getMtime(), new String(data, UTF_8));
                result.add(server);
            }
        }
    } catch (Exception ex) {
        log.error("{}", ex.getMessage(), ex);
    }
    return result;
}
Also used : Stat(org.apache.zookeeper.data.Stat) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) IZooReaderWriter(org.apache.accumulo.fate.zookeeper.IZooReaderWriter) ArrayList(java.util.ArrayList) DeadServer(org.apache.accumulo.core.master.thrift.DeadServer) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException)

Example 5 with DeadServer

use of org.apache.accumulo.core.master.thrift.DeadServer in project accumulo by apache.

the class MasterResource method getDeadTservers.

/**
 * Generates a dead server list as a JSON object
 *
 * @return dead server list
 */
public static DeadServerList getDeadTservers() {
    MasterMonitorInfo mmi = getMmi();
    if (null == mmi) {
        return new DeadServerList();
    }
    DeadServerList deadServers = new DeadServerList();
    // Add new dead servers to the list
    for (DeadServer dead : mmi.deadTabletServers) {
        deadServers.addDeadServer(new DeadServerInformation(dead.server, dead.lastStatus, dead.status));
    }
    return deadServers;
}
Also used : MasterMonitorInfo(org.apache.accumulo.core.master.thrift.MasterMonitorInfo) DeadServerInformation(org.apache.accumulo.monitor.rest.tservers.DeadServerInformation) DeadServerList(org.apache.accumulo.monitor.rest.tservers.DeadServerList) DeadServer(org.apache.accumulo.core.master.thrift.DeadServer)

Aggregations

DeadServer (org.apache.accumulo.core.master.thrift.DeadServer)6 MasterMonitorInfo (org.apache.accumulo.core.master.thrift.MasterMonitorInfo)4 ArrayList (java.util.ArrayList)3 TabletServerStatus (org.apache.accumulo.core.master.thrift.TabletServerStatus)3 GET (javax.ws.rs.GET)2 SimpleDateFormat (java.text.SimpleDateFormat)1 Date (java.util.Date)1 Instance (org.apache.accumulo.core.client.Instance)1 ThriftNotActiveServiceException (org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException)1 GCStatus (org.apache.accumulo.core.gc.thrift.GCStatus)1 BulkImportStatus (org.apache.accumulo.core.master.thrift.BulkImportStatus)1 MasterClientService (org.apache.accumulo.core.master.thrift.MasterClientService)1 RecoveryStatus (org.apache.accumulo.core.master.thrift.RecoveryStatus)1 TableInfo (org.apache.accumulo.core.master.thrift.TableInfo)1 IZooReaderWriter (org.apache.accumulo.fate.zookeeper.IZooReaderWriter)1 DeadLoggerInformation (org.apache.accumulo.monitor.rest.logs.DeadLoggerInformation)1 DeadLoggerList (org.apache.accumulo.monitor.rest.logs.DeadLoggerList)1 DeadServerInformation (org.apache.accumulo.monitor.rest.tservers.DeadServerInformation)1 DeadServerList (org.apache.accumulo.monitor.rest.tservers.DeadServerList)1 AccumuloServerContext (org.apache.accumulo.server.AccumuloServerContext)1