use of org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection in project accumulo by apache.
the class Manager method gatherTableInformation.
private SortedMap<TServerInstance, TabletServerStatus> gatherTableInformation(Set<TServerInstance> currentServers, SortedMap<TabletServerId, TServerStatus> balancerMap) {
final long rpcTimeout = getConfiguration().getTimeInMillis(Property.GENERAL_RPC_TIMEOUT);
int threads = getConfiguration().getCount(Property.MANAGER_STATUS_THREAD_POOL_SIZE);
ExecutorService tp = ThreadPools.createExecutorService(getConfiguration(), Property.MANAGER_STATUS_THREAD_POOL_SIZE, false);
long start = System.currentTimeMillis();
final SortedMap<TServerInstance, TabletServerStatus> result = new ConcurrentSkipListMap<>();
final RateLimiter shutdownServerRateLimiter = RateLimiter.create(MAX_SHUTDOWNS_PER_SEC);
for (TServerInstance serverInstance : currentServers) {
final TServerInstance server = serverInstance;
if (threads == 0) {
// Since an unbounded thread pool is being used, rate limit how fast task are added to the
// executor. This prevents the threads from growing large unless there are lots of
// unresponsive tservers.
sleepUninterruptibly(Math.max(1, rpcTimeout / 120_000), TimeUnit.MILLISECONDS);
}
tp.submit(() -> {
try {
Thread t = Thread.currentThread();
String oldName = t.getName();
try {
String message = "Getting status from " + server;
t.setName(message);
long startForServer = System.currentTimeMillis();
log.trace(message);
TServerConnection connection1 = tserverSet.getConnection(server);
if (connection1 == null) {
throw new IOException("No connection to " + server);
}
TabletServerStatus status = connection1.getTableMap(false);
result.put(server, status);
long duration = System.currentTimeMillis() - startForServer;
log.trace("Got status from {} in {} ms", server, duration);
} finally {
t.setName(oldName);
}
} catch (Exception ex) {
log.error("unable to get tablet server status {} {}", server, ex.toString());
log.debug("unable to get tablet server status {}", server, ex);
// MAX_BAD_STATUS_COUNT times
if (badServers.computeIfAbsent(server, k -> new AtomicInteger(0)).incrementAndGet() > MAX_BAD_STATUS_COUNT) {
if (shutdownServerRateLimiter.tryAcquire()) {
log.warn("attempting to stop {}", server);
try {
TServerConnection connection2 = tserverSet.getConnection(server);
if (connection2 != null) {
connection2.halt(managerLock);
}
} catch (TTransportException e1) {
// ignore: it's probably down
} catch (Exception e2) {
log.info("error talking to troublesome tablet server", e2);
}
} else {
log.warn("Unable to shutdown {} as over the shutdown limit of {} per minute", server, MAX_SHUTDOWNS_PER_SEC * 60);
}
badServers.remove(server);
}
}
});
}
tp.shutdown();
try {
tp.awaitTermination(Math.max(10000, rpcTimeout / 3), TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
log.debug("Interrupted while fetching status");
}
tp.shutdownNow();
// Threads may still modify map after shutdownNow is called, so create an immutable snapshot.
SortedMap<TServerInstance, TabletServerStatus> info = ImmutableSortedMap.copyOf(result);
tserverStatus.forEach((tsi, status) -> balancerMap.put(new TabletServerIdImpl(tsi), TServerStatusImpl.fromThrift(status)));
synchronized (badServers) {
badServers.keySet().retainAll(currentServers);
badServers.keySet().removeAll(info.keySet());
}
log.debug(String.format("Finished gathering information from %d of %d servers in %.2f seconds", info.size(), currentServers.size(), (System.currentTimeMillis() - start) / 1000.));
return info;
}
use of org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection in project accumulo by apache.
the class TabletGroupWatcher method sendSplitRequest.
private void sendSplitRequest(MergeInfo info, TabletState state, TabletLocationState tls) {
// Already split?
if (!info.getState().equals(MergeState.SPLITTING))
return;
// Merges don't split
if (!info.isDelete())
return;
// Online and ready to split?
if (!state.equals(TabletState.HOSTED))
return;
// Does this extent cover the end points of the delete?
KeyExtent range = info.getExtent();
if (tls.extent.overlaps(range)) {
for (Text splitPoint : new Text[] { range.prevEndRow(), range.endRow() }) {
if (splitPoint == null)
continue;
if (!tls.extent.contains(splitPoint))
continue;
if (splitPoint.equals(tls.extent.endRow()))
continue;
if (splitPoint.equals(tls.extent.prevEndRow()))
continue;
try {
TServerConnection conn;
conn = manager.tserverSet.getConnection(tls.current);
if (conn != null) {
Manager.log.info("Asking {} to split {} at {}", tls.current, tls.extent, splitPoint);
conn.splitTablet(tls.extent, splitPoint);
} else {
Manager.log.warn("Not connected to server {}", tls.current);
}
} catch (NotServingTabletException e) {
Manager.log.debug("Error asking tablet server to split a tablet: ", e);
} catch (Exception e) {
Manager.log.warn("Error asking tablet server to split a tablet: ", e);
}
}
}
}
use of org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection in project accumulo by apache.
the class CopyFailed method isReady.
@Override
public long isReady(long tid, Manager manager) {
Set<TServerInstance> finished = new HashSet<>();
Set<TServerInstance> running = manager.onlineTabletServers();
for (TServerInstance server : running) {
try {
TServerConnection client = manager.getConnection(server);
if (client != null && !client.isActive(tid))
finished.add(server);
} catch (TException ex) {
log.info("Ignoring error trying to check on tid " + FateTxId.formatTid(tid) + " from server " + server + ": " + ex);
}
}
if (finished.containsAll(running))
return 0;
return 500;
}
use of org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection in project accumulo by apache.
the class CompactionDriver method isReady.
@Override
public long isReady(long tid, Manager manager) throws Exception {
if (tableId.equals(RootTable.ID)) {
// this codes not properly handle the root table. See #798
return 0;
}
String zCancelID = createCompactionCancellationPath(manager.getInstanceID(), tableId);
ZooReaderWriter zoo = manager.getContext().getZooReaderWriter();
if (Long.parseLong(new String(zoo.getData(zCancelID))) >= compactId) {
// compaction was canceled
throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.OTHER, TableOperationsImpl.COMPACTION_CANCELED_MSG);
}
String deleteMarkerPath = PreDeleteTable.createDeleteMarkerPath(manager.getInstanceID(), tableId);
if (zoo.exists(deleteMarkerPath)) {
// table is being deleted
throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.OTHER, TableOperationsImpl.TABLE_DELETED_MSG);
}
MapCounter<TServerInstance> serversToFlush = new MapCounter<>();
long t1 = System.currentTimeMillis();
int tabletsToWaitFor = 0;
int tabletCount = 0;
TabletsMetadata tablets = TabletsMetadata.builder(manager.getContext()).forTable(tableId).overlapping(startRow, endRow).fetch(LOCATION, PREV_ROW, COMPACT_ID).build();
for (TabletMetadata tablet : tablets) {
if (tablet.getCompactId().orElse(-1) < compactId) {
tabletsToWaitFor++;
if (tablet.hasCurrent()) {
serversToFlush.increment(tablet.getLocation(), 1);
}
}
tabletCount++;
}
long scanTime = System.currentTimeMillis() - t1;
manager.getContext().clearTableListCache();
if (tabletCount == 0 && !manager.getContext().tableNodeExists(tableId))
throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.NOTFOUND, null);
if (serversToFlush.size() == 0 && manager.getContext().getTableState(tableId) == TableState.OFFLINE)
throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.OFFLINE, null);
if (tabletsToWaitFor == 0)
return 0;
for (TServerInstance tsi : serversToFlush.keySet()) {
try {
final TServerConnection server = manager.getConnection(tsi);
if (server != null)
server.compact(manager.getManagerLock(), tableId.canonical(), startRow, endRow);
} catch (TException ex) {
LoggerFactory.getLogger(CompactionDriver.class).error(ex.toString());
}
}
long sleepTime = 500;
// make wait time depend on the server with the most to compact
if (serversToFlush.size() > 0)
sleepTime = serversToFlush.max() * sleepTime;
sleepTime = Math.max(2 * scanTime, sleepTime);
sleepTime = Math.min(sleepTime, 30000);
return sleepTime;
}
use of org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection in project accumulo by apache.
the class LiveTServerSetTest method testSessionIds.
@Test
public void testSessionIds() {
Map<String, TServerInfo> servers = new HashMap<>();
TServerConnection mockConn = EasyMock.createMock(TServerConnection.class);
TServerInfo server1 = new TServerInfo(new TServerInstance(HostAndPort.fromParts("localhost", 1234), "5555"), mockConn);
servers.put("server1", server1);
LiveTServerSet tservers = new LiveTServerSet(EasyMock.createMock(ServerContext.class), EasyMock.createMock(Listener.class));
assertEquals(server1.instance, tservers.find(servers, "localhost:1234"));
assertNull(tservers.find(servers, "localhost:4321"));
assertEquals(server1.instance, tservers.find(servers, "localhost:1234[5555]"));
assertNull(tservers.find(servers, "localhost:1234[55755]"));
}
Aggregations