Search in sources :

Example 31 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hadoop by apache.

the class CLI method run.

public int run(String[] argv) throws Exception {
    int exitCode = -1;
    if (argv.length < 1) {
        displayUsage("");
        return exitCode;
    }
    // process arguments
    String cmd = argv[0];
    String submitJobFile = null;
    String jobid = null;
    String taskid = null;
    String historyFileOrJobId = null;
    String historyOutFile = null;
    String historyOutFormat = HistoryViewer.HUMAN_FORMAT;
    String counterGroupName = null;
    String counterName = null;
    JobPriority jp = null;
    String taskType = null;
    String taskState = null;
    int fromEvent = 0;
    int nEvents = 0;
    int jpvalue = 0;
    String configOutFile = null;
    boolean getStatus = false;
    boolean getCounter = false;
    boolean killJob = false;
    boolean listEvents = false;
    boolean viewHistory = false;
    boolean viewAllHistory = false;
    boolean listJobs = false;
    boolean listAllJobs = false;
    boolean listActiveTrackers = false;
    boolean listBlacklistedTrackers = false;
    boolean displayTasks = false;
    boolean killTask = false;
    boolean failTask = false;
    boolean setJobPriority = false;
    boolean logs = false;
    boolean downloadConfig = false;
    if ("-submit".equals(cmd)) {
        if (argv.length != 2) {
            displayUsage(cmd);
            return exitCode;
        }
        submitJobFile = argv[1];
    } else if ("-status".equals(cmd)) {
        if (argv.length != 2) {
            displayUsage(cmd);
            return exitCode;
        }
        jobid = argv[1];
        getStatus = true;
    } else if ("-counter".equals(cmd)) {
        if (argv.length != 4) {
            displayUsage(cmd);
            return exitCode;
        }
        getCounter = true;
        jobid = argv[1];
        counterGroupName = argv[2];
        counterName = argv[3];
    } else if ("-kill".equals(cmd)) {
        if (argv.length != 2) {
            displayUsage(cmd);
            return exitCode;
        }
        jobid = argv[1];
        killJob = true;
    } else if ("-set-priority".equals(cmd)) {
        if (argv.length != 3) {
            displayUsage(cmd);
            return exitCode;
        }
        jobid = argv[1];
        try {
            jp = JobPriority.valueOf(argv[2]);
        } catch (IllegalArgumentException iae) {
            try {
                jpvalue = Integer.parseInt(argv[2]);
            } catch (NumberFormatException ne) {
                LOG.info(ne);
                displayUsage(cmd);
                return exitCode;
            }
        }
        setJobPriority = true;
    } else if ("-events".equals(cmd)) {
        if (argv.length != 4) {
            displayUsage(cmd);
            return exitCode;
        }
        jobid = argv[1];
        fromEvent = Integer.parseInt(argv[2]);
        nEvents = Integer.parseInt(argv[3]);
        listEvents = true;
    } else if ("-history".equals(cmd)) {
        viewHistory = true;
        if (argv.length < 2 || argv.length > 7) {
            displayUsage(cmd);
            return exitCode;
        }
        // Some arguments are optional while others are not, and some require
        // second arguments.  Due to this, the indexing can vary depending on
        // what's specified and what's left out, as summarized in the below table:
        // [all] <jobHistoryFile|jobId> [-outfile <file>] [-format <human|json>]
        //   1                  2            3       4         5         6
        //   1                  2            3       4
        //   1                  2                              3         4
        //   1                  2
        //                      1            2       3         4         5
        //                      1            2       3
        //                      1                              2         3
        //                      1
        // "all" is optional, but comes first if specified
        int index = 1;
        if ("all".equals(argv[index])) {
            index++;
            viewAllHistory = true;
            if (argv.length == 2) {
                displayUsage(cmd);
                return exitCode;
            }
        }
        // Get the job history file or job id argument
        historyFileOrJobId = argv[index++];
        // "-outfile" is optional, but if specified requires a second argument
        if (argv.length > index + 1 && "-outfile".equals(argv[index])) {
            index++;
            historyOutFile = argv[index++];
        }
        // "-format" is optional, but if specified required a second argument
        if (argv.length > index + 1 && "-format".equals(argv[index])) {
            index++;
            historyOutFormat = argv[index++];
        }
        // Check for any extra arguments that don't belong here
        if (argv.length > index) {
            displayUsage(cmd);
            return exitCode;
        }
    } else if ("-list".equals(cmd)) {
        if (argv.length != 1 && !(argv.length == 2 && "all".equals(argv[1]))) {
            displayUsage(cmd);
            return exitCode;
        }
        if (argv.length == 2 && "all".equals(argv[1])) {
            listAllJobs = true;
        } else {
            listJobs = true;
        }
    } else if ("-kill-task".equals(cmd)) {
        if (argv.length != 2) {
            displayUsage(cmd);
            return exitCode;
        }
        killTask = true;
        taskid = argv[1];
    } else if ("-fail-task".equals(cmd)) {
        if (argv.length != 2) {
            displayUsage(cmd);
            return exitCode;
        }
        failTask = true;
        taskid = argv[1];
    } else if ("-list-active-trackers".equals(cmd)) {
        if (argv.length != 1) {
            displayUsage(cmd);
            return exitCode;
        }
        listActiveTrackers = true;
    } else if ("-list-blacklisted-trackers".equals(cmd)) {
        if (argv.length != 1) {
            displayUsage(cmd);
            return exitCode;
        }
        listBlacklistedTrackers = true;
    } else if ("-list-attempt-ids".equals(cmd)) {
        if (argv.length != 4) {
            displayUsage(cmd);
            return exitCode;
        }
        jobid = argv[1];
        taskType = argv[2];
        taskState = argv[3];
        displayTasks = true;
        if (!taskTypes.contains(org.apache.hadoop.util.StringUtils.toUpperCase(taskType))) {
            System.out.println("Error: Invalid task-type: " + taskType);
            displayUsage(cmd);
            return exitCode;
        }
        if (!taskStates.contains(org.apache.hadoop.util.StringUtils.toLowerCase(taskState))) {
            System.out.println("Error: Invalid task-state: " + taskState);
            displayUsage(cmd);
            return exitCode;
        }
    } else if ("-logs".equals(cmd)) {
        if (argv.length == 2 || argv.length == 3) {
            logs = true;
            jobid = argv[1];
            if (argv.length == 3) {
                taskid = argv[2];
            } else {
                taskid = null;
            }
        } else {
            displayUsage(cmd);
            return exitCode;
        }
    } else if ("-config".equals(cmd)) {
        downloadConfig = true;
        if (argv.length != 3) {
            displayUsage(cmd);
            return exitCode;
        }
        jobid = argv[1];
        configOutFile = argv[2];
    } else {
        displayUsage(cmd);
        return exitCode;
    }
    // initialize cluster
    cluster = createCluster();
    // Submit the request
    try {
        if (submitJobFile != null) {
            Job job = Job.getInstance(new JobConf(submitJobFile));
            job.submit();
            System.out.println("Created job " + job.getJobID());
            exitCode = 0;
        } else if (getStatus) {
            Job job = getJob(JobID.forName(jobid));
            if (job == null) {
                System.out.println("Could not find job " + jobid);
            } else {
                Counters counters = job.getCounters();
                System.out.println();
                System.out.println(job);
                if (counters != null) {
                    System.out.println(counters);
                } else {
                    System.out.println("Counters not available. Job is retired.");
                }
                exitCode = 0;
            }
        } else if (getCounter) {
            Job job = getJob(JobID.forName(jobid));
            if (job == null) {
                System.out.println("Could not find job " + jobid);
            } else {
                Counters counters = job.getCounters();
                if (counters == null) {
                    System.out.println("Counters not available for retired job " + jobid);
                    exitCode = -1;
                } else {
                    System.out.println(getCounter(counters, counterGroupName, counterName));
                    exitCode = 0;
                }
            }
        } else if (killJob) {
            Job job = getJob(JobID.forName(jobid));
            if (job == null) {
                System.out.println("Could not find job " + jobid);
            } else {
                JobStatus jobStatus = job.getStatus();
                if (jobStatus.getState() == JobStatus.State.FAILED) {
                    System.out.println("Could not mark the job " + jobid + " as killed, as it has already failed.");
                    exitCode = -1;
                } else if (jobStatus.getState() == JobStatus.State.KILLED) {
                    System.out.println("The job " + jobid + " has already been killed.");
                    exitCode = -1;
                } else if (jobStatus.getState() == JobStatus.State.SUCCEEDED) {
                    System.out.println("Could not kill the job " + jobid + ", as it has already succeeded.");
                    exitCode = -1;
                } else {
                    job.killJob();
                    System.out.println("Killed job " + jobid);
                    exitCode = 0;
                }
            }
        } else if (setJobPriority) {
            Job job = getJob(JobID.forName(jobid));
            if (job == null) {
                System.out.println("Could not find job " + jobid);
            } else {
                if (jp != null) {
                    job.setPriority(jp);
                } else {
                    job.setPriorityAsInteger(jpvalue);
                }
                System.out.println("Changed job priority.");
                exitCode = 0;
            }
        } else if (viewHistory) {
            // it's a Job ID
            if (historyFileOrJobId.endsWith(".jhist")) {
                viewHistory(historyFileOrJobId, viewAllHistory, historyOutFile, historyOutFormat);
                exitCode = 0;
            } else {
                Job job = getJob(JobID.forName(historyFileOrJobId));
                if (job == null) {
                    System.out.println("Could not find job " + jobid);
                } else {
                    String historyUrl = job.getHistoryUrl();
                    if (historyUrl == null || historyUrl.isEmpty()) {
                        System.out.println("History file for job " + historyFileOrJobId + " is currently unavailable.");
                    } else {
                        viewHistory(historyUrl, viewAllHistory, historyOutFile, historyOutFormat);
                        exitCode = 0;
                    }
                }
            }
        } else if (listEvents) {
            Job job = getJob(JobID.forName(jobid));
            if (job == null) {
                System.out.println("Could not find job " + jobid);
            } else {
                listEvents(job, fromEvent, nEvents);
                exitCode = 0;
            }
        } else if (listJobs) {
            listJobs(cluster);
            exitCode = 0;
        } else if (listAllJobs) {
            listAllJobs(cluster);
            exitCode = 0;
        } else if (listActiveTrackers) {
            listActiveTrackers(cluster);
            exitCode = 0;
        } else if (listBlacklistedTrackers) {
            listBlacklistedTrackers(cluster);
            exitCode = 0;
        } else if (displayTasks) {
            Job job = getJob(JobID.forName(jobid));
            if (job == null) {
                System.out.println("Could not find job " + jobid);
            } else {
                displayTasks(getJob(JobID.forName(jobid)), taskType, taskState);
                exitCode = 0;
            }
        } else if (killTask) {
            TaskAttemptID taskID = TaskAttemptID.forName(taskid);
            Job job = getJob(taskID.getJobID());
            if (job == null) {
                System.out.println("Could not find job " + jobid);
            } else if (job.killTask(taskID, false)) {
                System.out.println("Killed task " + taskid);
                exitCode = 0;
            } else {
                System.out.println("Could not kill task " + taskid);
                exitCode = -1;
            }
        } else if (failTask) {
            TaskAttemptID taskID = TaskAttemptID.forName(taskid);
            Job job = getJob(taskID.getJobID());
            if (job == null) {
                System.out.println("Could not find job " + jobid);
            } else if (job.killTask(taskID, true)) {
                System.out.println("Killed task " + taskID + " by failing it");
                exitCode = 0;
            } else {
                System.out.println("Could not fail task " + taskid);
                exitCode = -1;
            }
        } else if (logs) {
            JobID jobID = JobID.forName(jobid);
            if (getJob(jobID) == null) {
                System.out.println("Could not find job " + jobid);
            } else {
                try {
                    TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskid);
                    LogParams logParams = cluster.getLogParams(jobID, taskAttemptID);
                    LogCLIHelpers logDumper = new LogCLIHelpers();
                    logDumper.setConf(getConf());
                    exitCode = logDumper.dumpAContainersLogs(logParams.getApplicationId(), logParams.getContainerId(), logParams.getNodeId(), logParams.getOwner());
                } catch (IOException e) {
                    if (e instanceof RemoteException) {
                        throw e;
                    }
                    System.out.println(e.getMessage());
                }
            }
        } else if (downloadConfig) {
            Job job = getJob(JobID.forName(jobid));
            if (job == null) {
                System.out.println("Could not find job " + jobid);
            } else {
                String jobFile = job.getJobFile();
                if (jobFile == null || jobFile.isEmpty()) {
                    System.out.println("Config file for job " + jobFile + " could not be found.");
                } else {
                    Path configPath = new Path(jobFile);
                    FileSystem fs = FileSystem.get(getConf());
                    fs.copyToLocalFile(configPath, new Path(configOutFile));
                    exitCode = 0;
                }
            }
        }
    } catch (RemoteException re) {
        IOException unwrappedException = re.unwrapRemoteException();
        if (unwrappedException instanceof AccessControlException) {
            System.out.println(unwrappedException.getMessage());
        } else {
            throw re;
        }
    } finally {
        cluster.close();
    }
    return exitCode;
}
Also used : Path(org.apache.hadoop.fs.Path) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) JobPriority(org.apache.hadoop.mapreduce.JobPriority) AccessControlException(org.apache.hadoop.security.AccessControlException) IOException(java.io.IOException) LogParams(org.apache.hadoop.mapreduce.v2.LogParams) JobStatus(org.apache.hadoop.mapreduce.JobStatus) FileSystem(org.apache.hadoop.fs.FileSystem) LogCLIHelpers(org.apache.hadoop.yarn.logaggregation.LogCLIHelpers) Counters(org.apache.hadoop.mapreduce.Counters) Job(org.apache.hadoop.mapreduce.Job) RemoteException(org.apache.hadoop.ipc.RemoteException) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapreduce.JobID)

Example 32 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hadoop by apache.

the class TestMiniMRProxyUser method ___testInvalidProxyUser.

@Test
public void ___testInvalidProxyUser() throws Exception {
    UserGroupInformation ugi = UserGroupInformation.createProxyUser("u2", UserGroupInformation.getLoginUser());
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

        public Void run() throws Exception {
            try {
                mrRun();
                fail();
            } catch (RemoteException ex) {
            //nop
            } catch (Exception ex) {
                fail();
            }
            return null;
        }
    });
}
Also used : RemoteException(org.apache.hadoop.ipc.RemoteException) RemoteException(org.apache.hadoop.ipc.RemoteException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 33 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class ConnectionImplementation method locateRegionInMeta.

/*
    * Search the hbase:meta table for the HRegionLocation
    * info that contains the table and row we're seeking.
    */
private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, boolean useCache, boolean retry, int replicaId) throws IOException {
    // we already have the region.
    if (useCache) {
        RegionLocations locations = getCachedLocation(tableName, row);
        if (locations != null && locations.getRegionLocation(replicaId) != null) {
            return locations;
        }
    }
    // build the key of the meta region we should be looking for.
    // the extra 9's on the end are necessary to allow "exact" matches
    // without knowing the precise region names.
    byte[] metaKey = HRegionInfo.createRegionName(tableName, row, HConstants.NINES, false);
    Scan s = new Scan();
    s.setReversed(true);
    s.withStartRow(metaKey);
    s.addFamily(HConstants.CATALOG_FAMILY);
    s.setOneRowLimit();
    if (this.useMetaReplicas) {
        s.setConsistency(Consistency.TIMELINE);
    }
    int maxAttempts = (retry ? numTries : 1);
    for (int tries = 0; true; tries++) {
        if (tries >= maxAttempts) {
            throw new NoServerForRegionException("Unable to find region for " + Bytes.toStringBinary(row) + " in " + tableName + " after " + tries + " tries.");
        }
        if (useCache) {
            RegionLocations locations = getCachedLocation(tableName, row);
            if (locations != null && locations.getRegionLocation(replicaId) != null) {
                return locations;
            }
        } else {
            // If we are not supposed to be using the cache, delete any existing cached location
            // so it won't interfere.
            metaCache.clearCache(tableName, row);
        }
        // Query the meta region
        long pauseBase = this.pause;
        try {
            Result regionInfoRow = null;
            s.resetMvccReadPoint();
            try (ReversedClientScanner rcs = new ReversedClientScanner(conf, s, TableName.META_TABLE_NAME, this, rpcCallerFactory, rpcControllerFactory, getMetaLookupPool(), 0)) {
                regionInfoRow = rcs.next();
            }
            if (regionInfoRow == null) {
                throw new TableNotFoundException(tableName);
            }
            // convert the row result into the HRegionLocation we need!
            RegionLocations locations = MetaTableAccessor.getRegionLocations(regionInfoRow);
            if (locations == null || locations.getRegionLocation(replicaId) == null) {
                throw new IOException("HRegionInfo was null in " + tableName + ", row=" + regionInfoRow);
            }
            HRegionInfo regionInfo = locations.getRegionLocation(replicaId).getRegionInfo();
            if (regionInfo == null) {
                throw new IOException("HRegionInfo was null or empty in " + TableName.META_TABLE_NAME + ", row=" + regionInfoRow);
            }
            // possible we got a region of a different table...
            if (!regionInfo.getTable().equals(tableName)) {
                throw new TableNotFoundException("Table '" + tableName + "' was not found, got: " + regionInfo.getTable() + ".");
            }
            if (regionInfo.isSplit()) {
                throw new RegionOfflineException("the only available region for" + " the required row is a split parent," + " the daughters should be online soon: " + regionInfo.getRegionNameAsString());
            }
            if (regionInfo.isOffline()) {
                throw new RegionOfflineException("the region is offline, could" + " be caused by a disable table call: " + regionInfo.getRegionNameAsString());
            }
            ServerName serverName = locations.getRegionLocation(replicaId).getServerName();
            if (serverName == null) {
                throw new NoServerForRegionException("No server address listed " + "in " + TableName.META_TABLE_NAME + " for region " + regionInfo.getRegionNameAsString() + " containing row " + Bytes.toStringBinary(row));
            }
            if (isDeadServer(serverName)) {
                throw new RegionServerStoppedException("hbase:meta says the region " + regionInfo.getRegionNameAsString() + " is managed by the server " + serverName + ", but it is dead.");
            }
            // Instantiate the location
            cacheLocation(tableName, locations);
            return locations;
        } catch (TableNotFoundException e) {
            // from the HTable constructor.
            throw e;
        } catch (IOException e) {
            ExceptionUtil.rethrowIfInterrupt(e);
            if (e instanceof RemoteException) {
                e = ((RemoteException) e).unwrapRemoteException();
            }
            if (e instanceof CallQueueTooBigException) {
                // Give a special check on CallQueueTooBigException, see #HBASE-17114
                pauseBase = this.pauseForCQTBE;
            }
            if (tries < maxAttempts - 1) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("locateRegionInMeta parentTable=" + TableName.META_TABLE_NAME + ", metaLocation=" + ", attempt=" + tries + " of " + maxAttempts + " failed; retrying after sleep of " + ConnectionUtils.getPauseTime(pauseBase, tries) + " because: " + e.getMessage());
                }
            } else {
                throw e;
            }
            // Only relocate the parent region if necessary
            if (!(e instanceof RegionOfflineException || e instanceof NoServerForRegionException)) {
                relocateRegion(TableName.META_TABLE_NAME, metaKey, replicaId);
            }
        }
        try {
            Thread.sleep(ConnectionUtils.getPauseTime(pauseBase, tries));
        } catch (InterruptedException e) {
            throw new InterruptedIOException("Giving up trying to location region in " + "meta: thread is interrupted.");
        }
    }
}
Also used : RegionLocations(org.apache.hadoop.hbase.RegionLocations) InterruptedIOException(java.io.InterruptedIOException) CallQueueTooBigException(org.apache.hadoop.hbase.CallQueueTooBigException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableNotFoundException(org.apache.hadoop.hbase.TableNotFoundException) RegionServerStoppedException(org.apache.hadoop.hbase.regionserver.RegionServerStoppedException) ServerName(org.apache.hadoop.hbase.ServerName) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 34 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class AssignmentManager method retrySendRegionOpen.

/**
   * At master failover, for pending_open region, make sure
   * sendRegionOpen RPC call is sent to the target regionserver
   */
private void retrySendRegionOpen(final RegionState regionState) {
    this.executorService.submit(new EventHandler(server, EventType.M_MASTER_RECOVERY) {

        @Override
        public void process() throws IOException {
            HRegionInfo hri = regionState.getRegion();
            ServerName serverName = regionState.getServerName();
            ReentrantLock lock = locker.acquireLock(hri.getEncodedName());
            try {
                for (int i = 1; i <= maximumAttempts; i++) {
                    if (!serverManager.isServerOnline(serverName) || server.isStopped() || server.isAborted()) {
                        // No need any more
                        return;
                    }
                    try {
                        if (!regionState.equals(regionStates.getRegionState(hri))) {
                            // Region is not in the expected state any more
                            return;
                        }
                        List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
                        if (shouldAssignFavoredNodes(hri)) {
                            FavoredNodesManager fnm = ((MasterServices) server).getFavoredNodesManager();
                            favoredNodes = fnm.getFavoredNodesWithDNPort(hri);
                        }
                        serverManager.sendRegionOpen(serverName, hri, favoredNodes);
                        // we're done
                        return;
                    } catch (Throwable t) {
                        if (t instanceof RemoteException) {
                            t = ((RemoteException) t).unwrapRemoteException();
                        }
                        if (t instanceof FailedServerException && i < maximumAttempts) {
                            // retry too soon. Retry after the failed_server_expiry time
                            try {
                                Configuration conf = this.server.getConfiguration();
                                long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
                                if (LOG.isDebugEnabled()) {
                                    LOG.debug(serverName + " is on failed server list; waiting " + sleepTime + "ms", t);
                                }
                                Thread.sleep(sleepTime);
                                continue;
                            } catch (InterruptedException ie) {
                                LOG.warn("Failed to assign " + hri.getRegionNameAsString() + " since interrupted", ie);
                                regionStates.updateRegionState(hri, State.FAILED_OPEN);
                                Thread.currentThread().interrupt();
                                return;
                            }
                        }
                        if (serverManager.isServerOnline(serverName) && t instanceof java.net.SocketTimeoutException) {
                            // reset the try count
                            i--;
                        } else {
                            LOG.info("Got exception in retrying sendRegionOpen for " + regionState + "; try=" + i + " of " + maximumAttempts, t);
                        }
                        Threads.sleep(100);
                    }
                }
                // Run out of attempts
                regionStates.updateRegionState(hri, State.FAILED_OPEN);
            } finally {
                lock.unlock();
            }
        }
    });
}
Also used : ReentrantLock(java.util.concurrent.locks.ReentrantLock) Configuration(org.apache.hadoop.conf.Configuration) EventHandler(org.apache.hadoop.hbase.executor.EventHandler) FavoredNodesManager(org.apache.hadoop.hbase.favored.FavoredNodesManager) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException(java.io.IOException) FailedServerException(org.apache.hadoop.hbase.ipc.FailedServerException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) List(java.util.List) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 35 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class AssignmentManager method assign.

/**
   * Bulk assign regions to <code>destination</code>.
   * @param destination
   * @param regions Regions to assign.
   * @return true if successful
   */
boolean assign(final ServerName destination, final List<HRegionInfo> regions) throws InterruptedException {
    long startTime = EnvironmentEdgeManager.currentTime();
    try {
        int regionCount = regions.size();
        if (regionCount == 0) {
            return true;
        }
        LOG.info("Assigning " + regionCount + " region(s) to " + destination.toString());
        Set<String> encodedNames = new HashSet<>(regionCount);
        for (HRegionInfo region : regions) {
            encodedNames.add(region.getEncodedName());
        }
        List<HRegionInfo> failedToOpenRegions = new ArrayList<>();
        Map<String, Lock> locks = locker.acquireLocks(encodedNames);
        try {
            Map<String, RegionPlan> plans = new HashMap<>(regionCount);
            List<RegionState> states = new ArrayList<>(regionCount);
            for (HRegionInfo region : regions) {
                String encodedName = region.getEncodedName();
                if (!isDisabledorDisablingRegionInRIT(region)) {
                    RegionState state = forceRegionStateToOffline(region, false);
                    boolean onDeadServer = false;
                    if (state != null) {
                        if (regionStates.wasRegionOnDeadServer(encodedName)) {
                            LOG.info("Skip assigning " + region.getRegionNameAsString() + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName) + " is dead but not processed yet");
                            onDeadServer = true;
                        } else {
                            RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
                            plans.put(encodedName, plan);
                            states.add(state);
                            continue;
                        }
                    }
                    // Reassign if the region wasn't on a dead server
                    if (!onDeadServer) {
                        LOG.info("failed to force region state to offline, " + "will reassign later: " + region);
                        // assign individually later
                        failedToOpenRegions.add(region);
                    }
                }
                // Release the lock, this region is excluded from bulk assign because
                // we can't update its state, or set its znode to offline.
                Lock lock = locks.remove(encodedName);
                lock.unlock();
            }
            if (server.isStopped()) {
                return false;
            }
            // Add region plans, so we can updateTimers when one region is opened so
            // that unnecessary timeout on RIT is reduced.
            this.addPlans(plans);
            List<Pair<HRegionInfo, List<ServerName>>> regionOpenInfos = new ArrayList<>(states.size());
            for (RegionState state : states) {
                HRegionInfo region = state.getRegion();
                regionStates.updateRegionState(region, State.PENDING_OPEN, destination);
                List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
                if (shouldAssignFavoredNodes(region)) {
                    favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region);
                }
                regionOpenInfos.add(new Pair<>(region, favoredNodes));
            }
            // Move on to open regions.
            try {
                // Send OPEN RPC. If it fails on a IOE or RemoteException,
                // regions will be assigned individually.
                Configuration conf = server.getConfiguration();
                long maxWaitTime = System.currentTimeMillis() + conf.getLong("hbase.regionserver.rpc.startup.waittime", 60000);
                for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
                    try {
                        List<RegionOpeningState> regionOpeningStateList = serverManager.sendRegionOpen(destination, regionOpenInfos);
                        for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
                            RegionOpeningState openingState = regionOpeningStateList.get(k);
                            if (openingState != RegionOpeningState.OPENED) {
                                HRegionInfo region = regionOpenInfos.get(k).getFirst();
                                LOG.info("Got opening state " + openingState + ", will reassign later: " + region);
                                // Failed opening this region, reassign it later
                                forceRegionStateToOffline(region, true);
                                failedToOpenRegions.add(region);
                            }
                        }
                        break;
                    } catch (IOException e) {
                        if (e instanceof RemoteException) {
                            e = ((RemoteException) e).unwrapRemoteException();
                        }
                        if (e instanceof RegionServerStoppedException) {
                            LOG.warn("The region server was shut down, ", e);
                            // No need to retry, the region server is a goner.
                            return false;
                        } else if (e instanceof ServerNotRunningYetException) {
                            long now = System.currentTimeMillis();
                            if (now < maxWaitTime) {
                                if (LOG.isDebugEnabled()) {
                                    LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", e);
                                }
                                Thread.sleep(100);
                                // reset the try count
                                i--;
                                continue;
                            }
                        } else if (e instanceof java.net.SocketTimeoutException && this.serverManager.isServerOnline(destination)) {
                            // open the region on the same server.
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Bulk assigner openRegion() to " + destination + " has timed out, but the regions might" + " already be opened on it.", e);
                            }
                            // wait and reset the re-try count, server might be just busy.
                            Thread.sleep(100);
                            i--;
                            continue;
                        } else if (e instanceof FailedServerException && i < maximumAttempts) {
                            // In case the server is in the failed server list, no point to
                            // retry too soon. Retry after the failed_server_expiry time
                            long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
                            if (LOG.isDebugEnabled()) {
                                LOG.debug(destination + " is on failed server list; waiting " + sleepTime + "ms", e);
                            }
                            Thread.sleep(sleepTime);
                            continue;
                        }
                        throw e;
                    }
                }
            } catch (IOException e) {
                // Can be a socket timeout, EOF, NoRouteToHost, etc
                LOG.info("Unable to communicate with " + destination + " in order to assign regions, ", e);
                for (RegionState state : states) {
                    HRegionInfo region = state.getRegion();
                    forceRegionStateToOffline(region, true);
                }
                return false;
            }
        } finally {
            for (Lock lock : locks.values()) {
                lock.unlock();
            }
        }
        if (!failedToOpenRegions.isEmpty()) {
            for (HRegionInfo region : failedToOpenRegions) {
                if (!regionStates.isRegionOnline(region)) {
                    invokeAssign(region);
                }
            }
        }
        // wait for assignment completion
        ArrayList<HRegionInfo> userRegionSet = new ArrayList<>(regions.size());
        for (HRegionInfo region : regions) {
            if (!region.getTable().isSystemTable()) {
                userRegionSet.add(region);
            }
        }
        if (!waitForAssignment(userRegionSet, true, userRegionSet.size(), System.currentTimeMillis())) {
            LOG.debug("some user regions are still in transition: " + userRegionSet);
        }
        LOG.debug("Bulk assigning done for " + destination);
        return true;
    } finally {
        metricsAssignmentManager.updateBulkAssignTime(EnvironmentEdgeManager.currentTime() - startTime);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) FailedServerException(org.apache.hadoop.hbase.ipc.FailedServerException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) RegionServerStoppedException(org.apache.hadoop.hbase.regionserver.RegionServerStoppedException) HashSet(java.util.HashSet) Pair(org.apache.hadoop.hbase.util.Pair) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException(java.io.IOException) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Lock(java.util.concurrent.locks.Lock) ServerName(org.apache.hadoop.hbase.ServerName) RegionOpeningState(org.apache.hadoop.hbase.regionserver.RegionOpeningState) RemoteException(org.apache.hadoop.ipc.RemoteException)

Aggregations

RemoteException (org.apache.hadoop.ipc.RemoteException)99 IOException (java.io.IOException)53 Test (org.junit.Test)39 Path (org.apache.hadoop.fs.Path)36 Configuration (org.apache.hadoop.conf.Configuration)20 FileNotFoundException (java.io.FileNotFoundException)19 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)13 FileSystem (org.apache.hadoop.fs.FileSystem)12 InterruptedIOException (java.io.InterruptedIOException)10 AccessControlException (org.apache.hadoop.security.AccessControlException)10 ServerName (org.apache.hadoop.hbase.ServerName)9 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)8 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)8 FileAlreadyExistsException (org.apache.hadoop.fs.FileAlreadyExistsException)7 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)7 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)7 EOFException (java.io.EOFException)6 ArrayList (java.util.ArrayList)6 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)6 HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)6