Search in sources :

Example 16 with TServerInstance

use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.

the class CopyFailed method isReady.

@Override
public long isReady(long tid, Manager manager) {
    Set<TServerInstance> finished = new HashSet<>();
    Set<TServerInstance> running = manager.onlineTabletServers();
    for (TServerInstance server : running) {
        try {
            TServerConnection client = manager.getConnection(server);
            if (client != null && !client.isActive(tid))
                finished.add(server);
        } catch (TException ex) {
            log.info("Ignoring error trying to check on tid " + FateTxId.formatTid(tid) + " from server " + server + ": " + ex);
        }
    }
    if (finished.containsAll(running))
        return 0;
    return 500;
}
Also used : TServerConnection(org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection) TException(org.apache.thrift.TException) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) HashSet(java.util.HashSet)

Example 17 with TServerInstance

use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.

the class LoadFiles method call.

@Override
public Repo<Manager> call(final long tid, final Manager manager) throws Exception {
    manager.updateBulkImportStatus(source, BulkImportState.LOADING);
    ExecutorService executor = getThreadPool(manager);
    final AccumuloConfiguration conf = manager.getConfiguration();
    VolumeManager fs = manager.getVolumeManager();
    List<FileStatus> files = new ArrayList<>();
    Collections.addAll(files, fs.listStatus(new Path(bulk)));
    log.debug(FateTxId.formatTid(tid) + " importing " + files.size() + " files");
    Path writable = new Path(this.errorDir, ".iswritable");
    if (!fs.createNewFile(writable)) {
        // Maybe this is a re-try... clear the flag and try again
        fs.delete(writable);
        if (!fs.createNewFile(writable))
            throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.BULK_IMPORT, TableOperationExceptionType.BULK_BAD_ERROR_DIRECTORY, "Unable to write to " + this.errorDir);
    }
    fs.delete(writable);
    final Set<String> filesToLoad = Collections.synchronizedSet(new HashSet<>());
    for (FileStatus f : files) filesToLoad.add(f.getPath().toString());
    final int RETRIES = Math.max(1, conf.getCount(Property.MANAGER_BULK_RETRIES));
    for (int attempt = 0; attempt < RETRIES && !filesToLoad.isEmpty(); attempt++) {
        List<Future<Void>> results = new ArrayList<>();
        if (manager.onlineTabletServers().isEmpty())
            log.warn("There are no tablet server to process bulk import, waiting (tid = " + FateTxId.formatTid(tid) + ")");
        while (manager.onlineTabletServers().isEmpty()) {
            sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
        }
        // Use the threadpool to assign files one-at-a-time to the server
        final List<String> loaded = Collections.synchronizedList(new ArrayList<>());
        final TServerInstance[] servers;
        String prop = conf.get(Property.MANAGER_BULK_TSERVER_REGEX);
        if (prop == null || "".equals(prop)) {
            servers = manager.onlineTabletServers().toArray(new TServerInstance[0]);
        } else {
            Pattern regex = Pattern.compile(prop);
            List<TServerInstance> subset = new ArrayList<>();
            manager.onlineTabletServers().forEach(t -> {
                if (regex.matcher(t.getHost()).matches()) {
                    subset.add(t);
                }
            });
            if (subset.isEmpty()) {
                log.warn("There are no tablet servers online that match supplied regex: {}", conf.get(Property.MANAGER_BULK_TSERVER_REGEX));
            }
            servers = subset.toArray(new TServerInstance[0]);
        }
        if (servers.length > 0) {
            for (final String file : filesToLoad) {
                results.add(executor.submit(() -> {
                    ClientService.Client client = null;
                    HostAndPort server = null;
                    try {
                        // get a connection to a random tablet server, do not prefer cached connections
                        // because this is running on the manager and there are lots of connections to tablet
                        // servers serving the metadata tablets
                        long timeInMillis = manager.getConfiguration().getTimeInMillis(Property.MANAGER_BULK_TIMEOUT);
                        server = servers[random.nextInt(servers.length)].getHostAndPort();
                        client = ThriftUtil.getTServerClient(server, manager.getContext(), timeInMillis);
                        List<String> attempt1 = Collections.singletonList(file);
                        log.debug("Asking " + server + " to bulk import " + file);
                        List<String> fail = client.bulkImportFiles(TraceUtil.traceInfo(), manager.getContext().rpcCreds(), tid, tableId.canonical(), attempt1, errorDir, setTime);
                        if (fail.isEmpty()) {
                            loaded.add(file);
                        }
                    } catch (Exception ex) {
                        log.error("rpc failed server:" + server + ", tid:" + FateTxId.formatTid(tid) + " " + ex);
                    } finally {
                        ThriftUtil.returnClient(client, manager.getContext());
                    }
                    return null;
                }));
            }
        }
        for (Future<Void> f : results) {
            f.get();
        }
        filesToLoad.removeAll(loaded);
        if (!filesToLoad.isEmpty()) {
            log.debug(FateTxId.formatTid(tid) + " attempt " + (attempt + 1) + " " + sampleList(filesToLoad, 10) + " failed");
            sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
        }
    }
    FSDataOutputStream failFile = fs.overwrite(new Path(errorDir, BulkImport.FAILURES_TXT));
    try (BufferedWriter out = new BufferedWriter(new OutputStreamWriter(failFile, UTF_8))) {
        for (String f : filesToLoad) {
            out.write(f);
            out.write("\n");
        }
    }
    // return the next step, which will perform cleanup
    return new CompleteBulkImport(tableId, source, bulk, errorDir);
}
Also used : VolumeManager(org.apache.accumulo.server.fs.VolumeManager) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) AcceptableThriftTableOperationException(org.apache.accumulo.core.clientImpl.AcceptableThriftTableOperationException) BufferedWriter(java.io.BufferedWriter) HostAndPort(org.apache.accumulo.core.util.HostAndPort) ArrayList(java.util.ArrayList) List(java.util.List) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Path(org.apache.hadoop.fs.Path) Pattern(java.util.regex.Pattern) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) AcceptableThriftTableOperationException(org.apache.accumulo.core.clientImpl.AcceptableThriftTableOperationException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) OutputStreamWriter(java.io.OutputStreamWriter)

Example 18 with TServerInstance

use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.

the class ManagerReplicationCoordinator method getServicerAddress.

@Override
public String getServicerAddress(String remoteTableId, TCredentials creds) throws ReplicationCoordinatorException, TException {
    try {
        security.authenticateUser(manager.getContext().rpcCreds(), creds);
    } catch (ThriftSecurityException e) {
        log.error("{} failed to authenticate for replication to {}", creds.getPrincipal(), remoteTableId);
        throw new ReplicationCoordinatorException(ReplicationCoordinatorErrorCode.CANNOT_AUTHENTICATE, "Could not authenticate " + creds.getPrincipal());
    }
    Set<TServerInstance> tservers = manager.onlineTabletServers();
    if (tservers.isEmpty()) {
        throw new ReplicationCoordinatorException(ReplicationCoordinatorErrorCode.NO_AVAILABLE_SERVERS, "No tservers are available for replication");
    }
    TServerInstance tserver = getRandomTServer(tservers, random.nextInt(tservers.size()));
    String replServiceAddr;
    try {
        replServiceAddr = new String(reader.getData(manager.getZooKeeperRoot() + ReplicationConstants.ZOO_TSERVERS + "/" + tserver.getHostPort()), UTF_8);
    } catch (KeeperException | InterruptedException e) {
        log.error("Could not fetch replication service port for tserver", e);
        throw new ReplicationCoordinatorException(ReplicationCoordinatorErrorCode.SERVICE_CONFIGURATION_UNAVAILABLE, "Could not determine port for replication service running at " + tserver.getHostPort());
    }
    return replServiceAddr;
}
Also used : ReplicationCoordinatorException(org.apache.accumulo.core.replication.thrift.ReplicationCoordinatorException) ThriftSecurityException(org.apache.accumulo.core.clientImpl.thrift.ThriftSecurityException) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) KeeperException(org.apache.zookeeper.KeeperException)

Example 19 with TServerInstance

use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.

the class CompactionDriver method isReady.

@Override
public long isReady(long tid, Manager manager) throws Exception {
    if (tableId.equals(RootTable.ID)) {
        // this codes not properly handle the root table. See #798
        return 0;
    }
    String zCancelID = createCompactionCancellationPath(manager.getInstanceID(), tableId);
    ZooReaderWriter zoo = manager.getContext().getZooReaderWriter();
    if (Long.parseLong(new String(zoo.getData(zCancelID))) >= compactId) {
        // compaction was canceled
        throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.OTHER, TableOperationsImpl.COMPACTION_CANCELED_MSG);
    }
    String deleteMarkerPath = PreDeleteTable.createDeleteMarkerPath(manager.getInstanceID(), tableId);
    if (zoo.exists(deleteMarkerPath)) {
        // table is being deleted
        throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.OTHER, TableOperationsImpl.TABLE_DELETED_MSG);
    }
    MapCounter<TServerInstance> serversToFlush = new MapCounter<>();
    long t1 = System.currentTimeMillis();
    int tabletsToWaitFor = 0;
    int tabletCount = 0;
    TabletsMetadata tablets = TabletsMetadata.builder(manager.getContext()).forTable(tableId).overlapping(startRow, endRow).fetch(LOCATION, PREV_ROW, COMPACT_ID).build();
    for (TabletMetadata tablet : tablets) {
        if (tablet.getCompactId().orElse(-1) < compactId) {
            tabletsToWaitFor++;
            if (tablet.hasCurrent()) {
                serversToFlush.increment(tablet.getLocation(), 1);
            }
        }
        tabletCount++;
    }
    long scanTime = System.currentTimeMillis() - t1;
    manager.getContext().clearTableListCache();
    if (tabletCount == 0 && !manager.getContext().tableNodeExists(tableId))
        throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.NOTFOUND, null);
    if (serversToFlush.size() == 0 && manager.getContext().getTableState(tableId) == TableState.OFFLINE)
        throw new AcceptableThriftTableOperationException(tableId.canonical(), null, TableOperation.COMPACT, TableOperationExceptionType.OFFLINE, null);
    if (tabletsToWaitFor == 0)
        return 0;
    for (TServerInstance tsi : serversToFlush.keySet()) {
        try {
            final TServerConnection server = manager.getConnection(tsi);
            if (server != null)
                server.compact(manager.getManagerLock(), tableId.canonical(), startRow, endRow);
        } catch (TException ex) {
            LoggerFactory.getLogger(CompactionDriver.class).error(ex.toString());
        }
    }
    long sleepTime = 500;
    // make wait time depend on the server with the most to compact
    if (serversToFlush.size() > 0)
        sleepTime = serversToFlush.max() * sleepTime;
    sleepTime = Math.max(2 * scanTime, sleepTime);
    sleepTime = Math.min(sleepTime, 30000);
    return sleepTime;
}
Also used : TServerConnection(org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection) TException(org.apache.thrift.TException) TabletsMetadata(org.apache.accumulo.core.metadata.schema.TabletsMetadata) ZooReaderWriter(org.apache.accumulo.fate.zookeeper.ZooReaderWriter) MapCounter(org.apache.accumulo.core.util.MapCounter) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) AcceptableThriftTableOperationException(org.apache.accumulo.core.clientImpl.AcceptableThriftTableOperationException)

Example 20 with TServerInstance

use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.

the class FindOfflineTablets method findOffline.

static int findOffline(ServerContext context, String tableName) throws TableNotFoundException {
    final AtomicBoolean scanning = new AtomicBoolean(false);
    LiveTServerSet tservers = new LiveTServerSet(context, new Listener() {

        @Override
        public void update(LiveTServerSet current, Set<TServerInstance> deleted, Set<TServerInstance> added) {
            if (!deleted.isEmpty() && scanning.get())
                log.warn("Tablet servers deleted while scanning: {}", deleted);
            if (!added.isEmpty() && scanning.get())
                log.warn("Tablet servers added while scanning: {}", added);
        }
    });
    tservers.startListeningForTabletServerChanges();
    scanning.set(true);
    Iterator<TabletLocationState> zooScanner = TabletStateStore.getStoreForLevel(DataLevel.ROOT, context).iterator();
    int offline = 0;
    System.out.println("Scanning zookeeper");
    if ((offline = checkTablets(context, zooScanner, tservers)) > 0)
        return offline;
    if (RootTable.NAME.equals(tableName))
        return 0;
    System.out.println("Scanning " + RootTable.NAME);
    Iterator<TabletLocationState> rootScanner = new MetaDataTableScanner(context, TabletsSection.getRange(), RootTable.NAME);
    if ((offline = checkTablets(context, rootScanner, tservers)) > 0)
        return offline;
    if (MetadataTable.NAME.equals(tableName))
        return 0;
    System.out.println("Scanning " + MetadataTable.NAME);
    Range range = TabletsSection.getRange();
    if (tableName != null) {
        TableId tableId = context.getTableId(tableName);
        range = new KeyExtent(tableId, null, null).toMetaRange();
    }
    try (MetaDataTableScanner metaScanner = new MetaDataTableScanner(context, range, MetadataTable.NAME)) {
        return checkTablets(context, metaScanner, tservers);
    }
}
Also used : TableId(org.apache.accumulo.core.data.TableId) Listener(org.apache.accumulo.server.manager.LiveTServerSet.Listener) Range(org.apache.accumulo.core.data.Range) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) LiveTServerSet(org.apache.accumulo.server.manager.LiveTServerSet) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) MetaDataTableScanner(org.apache.accumulo.server.manager.state.MetaDataTableScanner) TabletLocationState(org.apache.accumulo.core.metadata.TabletLocationState)

Aggregations

TServerInstance (org.apache.accumulo.core.metadata.TServerInstance)89 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)32 ArrayList (java.util.ArrayList)31 Test (org.junit.Test)30 HashMap (java.util.HashMap)21 ServerContext (org.apache.accumulo.server.ServerContext)18 TabletLocationState (org.apache.accumulo.core.metadata.TabletLocationState)17 HostAndPort (org.apache.accumulo.core.util.HostAndPort)14 HashSet (java.util.HashSet)13 TabletServerStatus (org.apache.accumulo.core.master.thrift.TabletServerStatus)13 TableId (org.apache.accumulo.core.data.TableId)12 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)11 List (java.util.List)10 LiveTServerSet (org.apache.accumulo.server.manager.LiveTServerSet)10 TException (org.apache.thrift.TException)10 TreeMap (java.util.TreeMap)9 UUID (java.util.UUID)9 TreeSet (java.util.TreeSet)8 Key (org.apache.accumulo.core.data.Key)8 Value (org.apache.accumulo.core.data.Value)8