use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.
the class CopyFailed method isReady.
@Override
public long isReady(long tid, Manager manager) {
Set<TServerInstance> finished = new HashSet<>();
Set<TServerInstance> running = manager.onlineTabletServers();
for (TServerInstance server : running) {
try {
TServerConnection client = manager.getConnection(server);
if (client != null && !client.isActive(tid))
finished.add(server);
} catch (TException ex) {
log.info("Ignoring error trying to check on tid " + FateTxId.formatTid(tid) + " from server " + server + ": " + ex);
}
}
if (finished.containsAll(running))
return 0;
return 500;
}
use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.
the class LoadFiles method call.
@Override
public Repo<Manager> call(final long tid, final Manager manager) throws Exception {
manager.updateBulkImportStatus(source, BulkImportState.LOADING);
ExecutorService executor = getThreadPool(manager);
final AccumuloConfiguration conf = manager.getConfiguration();
VolumeManager fs = manager.getVolumeManager();
List<FileStatus> files = new ArrayList<>();
Collections.addAll(files, fs.listStatus(new Path(bulk)));
log.debug(FateTxId.formatTid(tid) + " importing " + files.size() + " files");
Path writable = new Path(this.errorDir, ".iswritable");
if (!fs.createNewFile(writable)) {
// Maybe this is a re-try... clear the flag and try again
fs.delete(writable);
if (!fs.createNewFile(writable))
throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.BULK_IMPORT, TableOperationExceptionType.BULK_BAD_ERROR_DIRECTORY, "Unable to write to " + this.errorDir);
}
fs.delete(writable);
final Set<String> filesToLoad = Collections.synchronizedSet(new HashSet<>());
for (FileStatus f : files) filesToLoad.add(f.getPath().toString());
final int RETRIES = Math.max(1, conf.getCount(Property.MANAGER_BULK_RETRIES));
for (int attempt = 0; attempt < RETRIES && !filesToLoad.isEmpty(); attempt++) {
List<Future<Void>> results = new ArrayList<>();
if (manager.onlineTabletServers().isEmpty())
log.warn("There are no tablet server to process bulk import, waiting (tid = " + FateTxId.formatTid(tid) + ")");
while (manager.onlineTabletServers().isEmpty()) {
sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
}
// Use the threadpool to assign files one-at-a-time to the server
final List<String> loaded = Collections.synchronizedList(new ArrayList<>());
final TServerInstance[] servers;
String prop = conf.get(Property.MANAGER_BULK_TSERVER_REGEX);
if (prop == null || "".equals(prop)) {
servers = manager.onlineTabletServers().toArray(new TServerInstance[0]);
} else {
Pattern regex = Pattern.compile(prop);
List<TServerInstance> subset = new ArrayList<>();
manager.onlineTabletServers().forEach(t -> {
if (regex.matcher(t.getHost()).matches()) {
subset.add(t);
}
});
if (subset.isEmpty()) {
log.warn("There are no tablet servers online that match supplied regex: {}", conf.get(Property.MANAGER_BULK_TSERVER_REGEX));
}
servers = subset.toArray(new TServerInstance[0]);
}
if (servers.length > 0) {
for (final String file : filesToLoad) {
results.add(executor.submit(() -> {
ClientService.Client client = null;
HostAndPort server = null;
try {
// get a connection to a random tablet server, do not prefer cached connections
// because this is running on the manager and there are lots of connections to tablet
// servers serving the metadata tablets
long timeInMillis = manager.getConfiguration().getTimeInMillis(Property.MANAGER_BULK_TIMEOUT);
server = servers[random.nextInt(servers.length)].getHostAndPort();
client = ThriftUtil.getTServerClient(server, manager.getContext(), timeInMillis);
List<String> attempt1 = Collections.singletonList(file);
log.debug("Asking " + server + " to bulk import " + file);
List<String> fail = client.bulkImportFiles(TraceUtil.traceInfo(), manager.getContext().rpcCreds(), tid, tableId.canonical(), attempt1, errorDir, setTime);
if (fail.isEmpty()) {
loaded.add(file);
}
} catch (Exception ex) {
log.error("rpc failed server:" + server + ", tid:" + FateTxId.formatTid(tid) + " " + ex);
} finally {
ThriftUtil.returnClient(client, manager.getContext());
}
return null;
}));
}
}
for (Future<Void> f : results) {
f.get();
}
filesToLoad.removeAll(loaded);
if (!filesToLoad.isEmpty()) {
log.debug(FateTxId.formatTid(tid) + " attempt " + (attempt + 1) + " " + sampleList(filesToLoad, 10) + " failed");
sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
}
}
FSDataOutputStream failFile = fs.overwrite(new Path(errorDir, BulkImport.FAILURES_TXT));
try (BufferedWriter out = new BufferedWriter(new OutputStreamWriter(failFile, UTF_8))) {
for (String f : filesToLoad) {
out.write(f);
out.write("\n");
}
}
// return the next step, which will perform cleanup
return new CompleteBulkImport(tableId, source, bulk, errorDir);
}
use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.
the class ManagerReplicationCoordinator method getServicerAddress.
@Override
public String getServicerAddress(String remoteTableId, TCredentials creds) throws ReplicationCoordinatorException, TException {
try {
security.authenticateUser(manager.getContext().rpcCreds(), creds);
} catch (ThriftSecurityException e) {
log.error("{} failed to authenticate for replication to {}", creds.getPrincipal(), remoteTableId);
throw new ReplicationCoordinatorException(ReplicationCoordinatorErrorCode.CANNOT_AUTHENTICATE, "Could not authenticate " + creds.getPrincipal());
}
Set<TServerInstance> tservers = manager.onlineTabletServers();
if (tservers.isEmpty()) {
throw new ReplicationCoordinatorException(ReplicationCoordinatorErrorCode.NO_AVAILABLE_SERVERS, "No tservers are available for replication");
}
TServerInstance tserver = getRandomTServer(tservers, random.nextInt(tservers.size()));
String replServiceAddr;
try {
replServiceAddr = new String(reader.getData(manager.getZooKeeperRoot() + ReplicationConstants.ZOO_TSERVERS + "/" + tserver.getHostPort()), UTF_8);
} catch (KeeperException | InterruptedException e) {
log.error("Could not fetch replication service port for tserver", e);
throw new ReplicationCoordinatorException(ReplicationCoordinatorErrorCode.SERVICE_CONFIGURATION_UNAVAILABLE, "Could not determine port for replication service running at " + tserver.getHostPort());
}
return replServiceAddr;
}
use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.
the class CompactionDriver method isReady.
@Override
public long isReady(long tid, Manager manager) throws Exception {
if (tableId.equals(RootTable.ID)) {
// this codes not properly handle the root table. See #798
return 0;
}
String zCancelID = createCompactionCancellationPath(manager.getInstanceID(), tableId);
ZooReaderWriter zoo = manager.getContext().getZooReaderWriter();
if (Long.parseLong(new String(zoo.getData(zCancelID))) >= compactId) {
// compaction was canceled
throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.OTHER, TableOperationsImpl.COMPACTION_CANCELED_MSG);
}
String deleteMarkerPath = PreDeleteTable.createDeleteMarkerPath(manager.getInstanceID(), tableId);
if (zoo.exists(deleteMarkerPath)) {
// table is being deleted
throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.OTHER, TableOperationsImpl.TABLE_DELETED_MSG);
}
MapCounter<TServerInstance> serversToFlush = new MapCounter<>();
long t1 = System.currentTimeMillis();
int tabletsToWaitFor = 0;
int tabletCount = 0;
TabletsMetadata tablets = TabletsMetadata.builder(manager.getContext()).forTable(tableId).overlapping(startRow, endRow).fetch(LOCATION, PREV_ROW, COMPACT_ID).build();
for (TabletMetadata tablet : tablets) {
if (tablet.getCompactId().orElse(-1) < compactId) {
tabletsToWaitFor++;
if (tablet.hasCurrent()) {
serversToFlush.increment(tablet.getLocation(), 1);
}
}
tabletCount++;
}
long scanTime = System.currentTimeMillis() - t1;
manager.getContext().clearTableListCache();
if (tabletCount == 0 && !manager.getContext().tableNodeExists(tableId))
throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.NOTFOUND, null);
if (serversToFlush.size() == 0 && manager.getContext().getTableState(tableId) == TableState.OFFLINE)
throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.OFFLINE, null);
if (tabletsToWaitFor == 0)
return 0;
for (TServerInstance tsi : serversToFlush.keySet()) {
try {
final TServerConnection server = manager.getConnection(tsi);
if (server != null)
server.compact(manager.getManagerLock(), tableId.canonical(), startRow, endRow);
} catch (TException ex) {
LoggerFactory.getLogger(CompactionDriver.class).error(ex.toString());
}
}
long sleepTime = 500;
// make wait time depend on the server with the most to compact
if (serversToFlush.size() > 0)
sleepTime = serversToFlush.max() * sleepTime;
sleepTime = Math.max(2 * scanTime, sleepTime);
sleepTime = Math.min(sleepTime, 30000);
return sleepTime;
}
use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.
the class FindOfflineTablets method findOffline.
static int findOffline(ServerContext context, String tableName) throws TableNotFoundException {
final AtomicBoolean scanning = new AtomicBoolean(false);
LiveTServerSet tservers = new LiveTServerSet(context, new Listener() {
@Override
public void update(LiveTServerSet current, Set<TServerInstance> deleted, Set<TServerInstance> added) {
if (!deleted.isEmpty() && scanning.get())
log.warn("Tablet servers deleted while scanning: {}", deleted);
if (!added.isEmpty() && scanning.get())
log.warn("Tablet servers added while scanning: {}", added);
}
});
tservers.startListeningForTabletServerChanges();
scanning.set(true);
Iterator<TabletLocationState> zooScanner = TabletStateStore.getStoreForLevel(DataLevel.ROOT, context).iterator();
int offline = 0;
System.out.println("Scanning zookeeper");
if ((offline = checkTablets(context, zooScanner, tservers)) > 0)
return offline;
if (RootTable.NAME.equals(tableName))
return 0;
System.out.println("Scanning " + RootTable.NAME);
Iterator<TabletLocationState> rootScanner = new MetaDataTableScanner(context, TabletsSection.getRange(), RootTable.NAME);
if ((offline = checkTablets(context, rootScanner, tservers)) > 0)
return offline;
if (MetadataTable.NAME.equals(tableName))
return 0;
System.out.println("Scanning " + MetadataTable.NAME);
Range range = TabletsSection.getRange();
if (tableName != null) {
TableId tableId = context.getTableId(tableName);
range = new KeyExtent(tableId, null, null).toMetaRange();
}
try (MetaDataTableScanner metaScanner = new MetaDataTableScanner(context, range, MetadataTable.NAME)) {
return checkTablets(context, metaScanner, tservers);
}
}
Aggregations