use of org.apache.accumulo.server.master.LiveTServerSet.TServerConnection in project accumulo by apache.
the class Master method gatherTableInformation.
private SortedMap<TServerInstance, TabletServerStatus> gatherTableInformation(Set<TServerInstance> currentServers) {
long start = System.currentTimeMillis();
int threads = Math.max(getConfiguration().getCount(Property.MASTER_STATUS_THREAD_POOL_SIZE), 1);
ExecutorService tp = Executors.newFixedThreadPool(threads);
final SortedMap<TServerInstance, TabletServerStatus> result = new TreeMap<>();
for (TServerInstance serverInstance : currentServers) {
final TServerInstance server = serverInstance;
tp.submit(new Runnable() {
@Override
public void run() {
try {
Thread t = Thread.currentThread();
String oldName = t.getName();
try {
t.setName("Getting status from " + server);
TServerConnection connection = tserverSet.getConnection(server);
if (connection == null)
throw new IOException("No connection to " + server);
TabletServerStatus status = connection.getTableMap(false);
result.put(server, status);
} finally {
t.setName(oldName);
}
} catch (Exception ex) {
log.error("unable to get tablet server status {} {}", server, ex.toString());
log.debug("unable to get tablet server status {}", server, ex);
if (badServers.get(server).incrementAndGet() > MAX_BAD_STATUS_COUNT) {
log.warn("attempting to stop {}", server);
try {
TServerConnection connection = tserverSet.getConnection(server);
if (connection != null) {
connection.halt(masterLock);
}
} catch (TTransportException e) {
// ignore: it's probably down
} catch (Exception e) {
log.info("error talking to troublesome tablet server", e);
}
badServers.remove(server);
}
}
}
});
}
tp.shutdown();
try {
tp.awaitTermination(getConfiguration().getTimeInMillis(Property.TSERV_CLIENT_TIMEOUT) * 2, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
log.debug("Interrupted while fetching status");
}
synchronized (badServers) {
badServers.keySet().retainAll(currentServers);
badServers.keySet().removeAll(result.keySet());
}
log.debug(String.format("Finished gathering information from %d servers in %.2f seconds", result.size(), (System.currentTimeMillis() - start) / 1000.));
return result;
}
use of org.apache.accumulo.server.master.LiveTServerSet.TServerConnection in project accumulo by apache.
the class TabletGroupWatcher method flushChanges.
private void flushChanges(SortedMap<TServerInstance, TabletServerStatus> currentTServers, List<Assignment> assignments, List<Assignment> assigned, List<TabletLocationState> assignedToDeadServers, Map<TServerInstance, List<Path>> logsForDeadServers, List<TabletLocationState> suspendedToGoneServers, Map<KeyExtent, TServerInstance> unassigned) throws DistributedStoreException, TException, WalMarkerException {
boolean tabletsSuspendable = canSuspendTablets();
if (!assignedToDeadServers.isEmpty()) {
int maxServersToShow = min(assignedToDeadServers.size(), 100);
Master.log.debug("{} assigned to dead servers: {}...", assignedToDeadServers.size(), assignedToDeadServers.subList(0, maxServersToShow));
Master.log.debug("logs for dead servers: {}", logsForDeadServers);
if (tabletsSuspendable) {
store.suspend(assignedToDeadServers, logsForDeadServers, master.getSteadyTime());
} else {
store.unassign(assignedToDeadServers, logsForDeadServers);
}
this.master.markDeadServerLogsAsClosed(logsForDeadServers);
this.master.nextEvent.event("Marked %d tablets as suspended because they don't have current servers", assignedToDeadServers.size());
}
if (!suspendedToGoneServers.isEmpty()) {
int maxServersToShow = min(assignedToDeadServers.size(), 100);
Master.log.debug(assignedToDeadServers.size() + " suspended to gone servers: " + assignedToDeadServers.subList(0, maxServersToShow) + "...");
store.unsuspend(suspendedToGoneServers);
}
if (!currentTServers.isEmpty()) {
Map<KeyExtent, TServerInstance> assignedOut = new HashMap<>();
final StringBuilder builder = new StringBuilder(64);
this.master.tabletBalancer.getAssignments(Collections.unmodifiableSortedMap(currentTServers), Collections.unmodifiableMap(unassigned), assignedOut);
for (Entry<KeyExtent, TServerInstance> assignment : assignedOut.entrySet()) {
if (unassigned.containsKey(assignment.getKey())) {
if (assignment.getValue() != null) {
if (!currentTServers.containsKey(assignment.getValue())) {
Master.log.warn("balancer assigned {} to a tablet server that is not current {} ignoring", assignment.getKey(), assignment.getValue());
continue;
}
if (builder.length() > 0) {
builder.append(ASSIGNMENT_BUFFER_SEPARATOR);
}
builder.append(assignment);
// Don't let the log message get too gigantic
if (builder.length() > ASSINGMENT_BUFFER_MAX_LENGTH) {
builder.append("]");
Master.log.debug("{} assigning tablets: [{}", store.name(), builder.toString());
builder.setLength(0);
}
assignments.add(new Assignment(assignment.getKey(), assignment.getValue()));
}
} else {
Master.log.warn("{} load balancer assigning tablet that was not nominated for assignment {}", store.name(), assignment.getKey());
}
}
if (builder.length() > 0) {
// Make sure to log any leftover assignments
builder.append("]");
Master.log.debug("{} assigning tablets: [{}", store.name(), builder.toString());
}
if (!unassigned.isEmpty() && assignedOut.isEmpty())
Master.log.warn("Load balancer failed to assign any tablets");
}
if (assignments.size() > 0) {
Master.log.info(String.format("Assigning %d tablets", assignments.size()));
store.setFutureLocations(assignments);
}
assignments.addAll(assigned);
for (Assignment a : assignments) {
TServerConnection conn = this.master.tserverSet.getConnection(a.server);
if (conn != null) {
conn.assignTablet(this.master.masterLock, a.tablet);
} else {
Master.log.warn("Could not connect to server {}", a.server);
}
master.assignedTablet(a.tablet);
}
}
use of org.apache.accumulo.server.master.LiveTServerSet.TServerConnection in project accumulo by apache.
the class TabletGroupWatcher method sendSplitRequest.
private void sendSplitRequest(MergeInfo info, TabletState state, TabletLocationState tls) {
// Already split?
if (!info.getState().equals(MergeState.SPLITTING))
return;
// Merges don't split
if (!info.isDelete())
return;
// Online and ready to split?
if (!state.equals(TabletState.HOSTED))
return;
// Does this extent cover the end points of the delete?
KeyExtent range = info.getExtent();
if (tls.extent.overlaps(range)) {
for (Text splitPoint : new Text[] { range.getPrevEndRow(), range.getEndRow() }) {
if (splitPoint == null)
continue;
if (!tls.extent.contains(splitPoint))
continue;
if (splitPoint.equals(tls.extent.getEndRow()))
continue;
if (splitPoint.equals(tls.extent.getPrevEndRow()))
continue;
try {
TServerConnection conn;
conn = this.master.tserverSet.getConnection(tls.current);
if (conn != null) {
Master.log.info("Asking {} to split {} at {}", tls.current, tls.extent, splitPoint);
conn.splitTablet(this.master.masterLock, tls.extent, splitPoint);
} else {
Master.log.warn("Not connected to server {}", tls.current);
}
} catch (NotServingTabletException e) {
Master.log.debug("Error asking tablet server to split a tablet: ", e);
} catch (Exception e) {
Master.log.warn("Error asking tablet server to split a tablet: ", e);
}
}
}
}
use of org.apache.accumulo.server.master.LiveTServerSet.TServerConnection in project accumulo by apache.
the class TabletGroupWatcher method sendChopRequest.
private void sendChopRequest(MergeInfo info, TabletState state, TabletLocationState tls) {
// Don't bother if we're in the wrong state
if (!info.getState().equals(MergeState.WAITING_FOR_CHOPPED))
return;
// Tablet must be online
if (!state.equals(TabletState.HOSTED))
return;
// Tablet isn't already chopped
if (tls.chopped)
return;
// Tablet ranges intersect
if (info.needsToBeChopped(tls.extent)) {
TServerConnection conn;
try {
conn = this.master.tserverSet.getConnection(tls.current);
if (conn != null) {
Master.log.info("Asking {} to chop {}", tls.current, tls.extent);
conn.chop(this.master.masterLock, tls.extent);
} else {
Master.log.warn("Could not connect to server {}", tls.current);
}
} catch (TException e) {
Master.log.warn("Communications error asking tablet server to chop a tablet");
}
}
}
use of org.apache.accumulo.server.master.LiveTServerSet.TServerConnection in project accumulo by apache.
the class ShutdownTServerTest method testSingleShutdown.
@Test
public void testSingleShutdown() throws Exception {
final TServerInstance tserver = EasyMock.createMock(TServerInstance.class);
final boolean force = false;
final ShutdownTServer op = new ShutdownTServer(tserver, force);
final Master master = EasyMock.createMock(Master.class);
final long tid = 1l;
final TServerConnection tserverCnxn = EasyMock.createMock(TServerConnection.class);
final TabletServerStatus status = new TabletServerStatus();
status.tableMap = new HashMap<>();
// Put in a table info record, don't care what
status.tableMap.put("a_table", new TableInfo());
master.shutdownTServer(tserver);
EasyMock.expectLastCall().once();
EasyMock.expect(master.onlineTabletServers()).andReturn(Collections.singleton(tserver));
EasyMock.expect(master.getConnection(tserver)).andReturn(tserverCnxn);
EasyMock.expect(tserverCnxn.getTableMap(false)).andReturn(status);
EasyMock.replay(tserver, tserverCnxn, master);
// FATE op is not ready
long wait = op.isReady(tid, master);
assertTrue("Expected wait to be greater than 0", wait > 0);
EasyMock.verify(tserver, tserverCnxn, master);
// Reset the mocks
EasyMock.reset(tserver, tserverCnxn, master);
// The same as above, but should not expect call shutdownTServer on master again
EasyMock.expect(master.onlineTabletServers()).andReturn(Collections.singleton(tserver));
EasyMock.expect(master.getConnection(tserver)).andReturn(tserverCnxn);
EasyMock.expect(tserverCnxn.getTableMap(false)).andReturn(status);
EasyMock.replay(tserver, tserverCnxn, master);
// FATE op is not ready
wait = op.isReady(tid, master);
assertTrue("Expected wait to be greater than 0", wait > 0);
EasyMock.verify(tserver, tserverCnxn, master);
}
Aggregations