Search in sources :

Example 46 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class TestFanOutOneBlockAsyncDFSOutput method testCreateParentFailed.

/**
   * This is important for fencing when recover from RS crash.
   */
@Test
public void testCreateParentFailed() throws IOException {
    Path f = new Path("/" + name.getMethodName() + "/test");
    EventLoop eventLoop = EVENT_LOOP_GROUP.next();
    try {
        FanOutOneBlockAsyncDFSOutputHelper.createOutput(FS, f, true, false, (short) 3, FS.getDefaultBlockSize(), eventLoop);
        fail("should fail with parent does not exist");
    } catch (RemoteException e) {
        LOG.info("expected exception caught", e);
        assertTrue(e.unwrapRemoteException() instanceof FileNotFoundException);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) EventLoop(io.netty.channel.EventLoop) FileNotFoundException(java.io.FileNotFoundException) RemoteException(org.apache.hadoop.ipc.RemoteException) Test(org.junit.Test)

Example 47 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hive by apache.

the class LlapTaskCommunicator method registerRunningTaskAttempt.

@Override
public void registerRunningTaskAttempt(final ContainerId containerId, final TaskSpec taskSpec, Map<String, LocalResource> additionalResources, Credentials credentials, boolean credentialsChanged, int priority) {
    super.registerRunningTaskAttempt(containerId, taskSpec, additionalResources, credentials, credentialsChanged, priority);
    int dagId = taskSpec.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getId();
    if (currentQueryIdentifierProto == null || (dagId != currentQueryIdentifierProto.getDagIndex())) {
        // TODO HiveQueryId extraction by parsing the Processor payload is ugly. This can be improved
        // once TEZ-2672 is fixed.
        String hiveQueryId;
        try {
            hiveQueryId = extractQueryId(taskSpec);
        } catch (IOException e) {
            throw new RuntimeException("Failed to extract query id from task spec: " + taskSpec, e);
        }
        Preconditions.checkNotNull(hiveQueryId, "Unexpected null query id");
        resetCurrentDag(dagId, hiveQueryId);
    }
    ContainerInfo containerInfo = getContainerInfo(containerId);
    String host;
    int port;
    if (containerInfo != null) {
        synchronized (containerInfo) {
            host = containerInfo.host;
            port = containerInfo.port;
        }
    } else {
        // TODO Handle this properly
        throw new RuntimeException("ContainerInfo not found for container: " + containerId + ", while trying to launch task: " + taskSpec.getTaskAttemptID());
    }
    LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
    registerKnownNode(nodeId);
    entityTracker.registerTaskAttempt(containerId, taskSpec.getTaskAttemptID(), host, port);
    nodesForQuery.add(nodeId);
    sourceStateTracker.registerTaskForStateUpdates(host, port, taskSpec.getInputs());
    FragmentRuntimeInfo fragmentRuntimeInfo;
    try {
        fragmentRuntimeInfo = sourceStateTracker.getFragmentRuntimeInfo(taskSpec.getVertexName(), taskSpec.getTaskAttemptID().getTaskID().getId(), priority);
    } catch (Exception e) {
        LOG.error("Error while trying to get runtimeFragmentInfo for fragmentId={}, containerId={}, currentQI={}, currentQueryId={}", taskSpec.getTaskAttemptID(), containerId, currentQueryIdentifierProto, currentHiveQueryId, e);
        if (e instanceof RuntimeException) {
            throw (RuntimeException) e;
        } else {
            throw new RuntimeException(e);
        }
    }
    SubmitWorkRequestProto requestProto;
    try {
        requestProto = constructSubmitWorkRequest(containerId, taskSpec, fragmentRuntimeInfo, currentHiveQueryId);
    } catch (IOException e) {
        throw new RuntimeException("Failed to construct request", e);
    }
    // Have to register this up front right now. Otherwise, it's possible for the task to start
    // sending out status/DONE/KILLED/FAILED messages before TAImpl knows how to handle them.
    getContext().taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
    communicator.sendSubmitWork(requestProto, host, port, new LlapProtocolClientProxy.ExecuteRequestCallback<SubmitWorkResponseProto>() {

        @Override
        public void setResponse(SubmitWorkResponseProto response) {
            if (response.hasSubmissionState()) {
                LlapDaemonProtocolProtos.SubmissionStateProto ss = response.getSubmissionState();
                if (ss.equals(LlapDaemonProtocolProtos.SubmissionStateProto.REJECTED)) {
                    LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Service Busy");
                    getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.EXECUTOR_BUSY, "Service Busy");
                    return;
                }
            } else {
                // This should never happen as server always returns a valid status on success
                throw new RuntimeException("SubmissionState in response is expected!");
            }
            if (response.hasUniqueNodeId()) {
                entityTracker.registerTaskSubmittedToNode(taskSpec.getTaskAttemptID(), response.getUniqueNodeId());
            }
            LOG.info("Successfully launched task: " + taskSpec.getTaskAttemptID());
        }

        @Override
        public void indicateError(Throwable t) {
            Throwable originalError = t;
            if (t instanceof ServiceException) {
                ServiceException se = (ServiceException) t;
                t = se.getCause();
            }
            if (t instanceof RemoteException) {
                // All others from the remote service cause the task to FAIL.
                LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
                processSendError(originalError);
                getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.toString());
            } else {
                // Exception from the RPC layer - communication failure, consider as KILLED / service down.
                if (t instanceof IOException) {
                    LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Communication Error");
                    processSendError(originalError);
                    getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.COMMUNICATION_ERROR, "Communication Error");
                } else {
                    // Anything else is a FAIL.
                    LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
                    processSendError(originalError);
                    getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.getMessage());
                }
            }
        }
    });
}
Also used : FragmentRuntimeInfo(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException) ServiceException(com.google.protobuf.ServiceException) RetriableException(org.apache.hadoop.ipc.RetriableException) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException) TezException(org.apache.tez.dag.api.TezException) LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) ServiceException(com.google.protobuf.ServiceException) SubmitWorkRequestProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto) SubmitWorkResponseProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto) LlapProtocolClientProxy(org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 48 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project lucene-solr by apache.

the class HdfsLockFactory method obtainLock.

@Override
public Lock obtainLock(Directory dir, String lockName) throws IOException {
    if (!(dir instanceof HdfsDirectory)) {
        throw new UnsupportedOperationException("HdfsLockFactory can only be used with HdfsDirectory subclasses, got: " + dir);
    }
    final HdfsDirectory hdfsDir = (HdfsDirectory) dir;
    final Configuration conf = hdfsDir.getConfiguration();
    final Path lockPath = hdfsDir.getHdfsDirPath();
    final Path lockFile = new Path(lockPath, lockName);
    FSDataOutputStream file = null;
    final FileSystem fs = FileSystem.get(lockPath.toUri(), conf);
    while (true) {
        try {
            if (!fs.exists(lockPath)) {
                boolean success = fs.mkdirs(lockPath);
                if (!success) {
                    throw new RuntimeException("Could not create directory: " + lockPath);
                }
            } else {
                // just to check for safe mode
                fs.mkdirs(lockPath);
            }
            file = fs.create(lockFile, false);
            break;
        } catch (FileAlreadyExistsException e) {
            throw new LockObtainFailedException("Cannot obtain lock file: " + lockFile, e);
        } catch (RemoteException e) {
            if (e.getClassName().equals("org.apache.hadoop.hdfs.server.namenode.SafeModeException")) {
                log.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
                try {
                    Thread.sleep(5000);
                } catch (InterruptedException e1) {
                    Thread.interrupted();
                }
                continue;
            }
            throw new LockObtainFailedException("Cannot obtain lock file: " + lockFile, e);
        } catch (IOException e) {
            throw new LockObtainFailedException("Cannot obtain lock file: " + lockFile, e);
        } finally {
            IOUtils.closeQuietly(file);
        }
    }
    return new HdfsLock(conf, lockFile);
}
Also used : Path(org.apache.hadoop.fs.Path) FileAlreadyExistsException(org.apache.hadoop.fs.FileAlreadyExistsException) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 49 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project lucene-solr by apache.

the class HdfsUpdateLog method init.

@Override
public void init(UpdateHandler uhandler, SolrCore core) {
    // ulogDir from CoreDescriptor overrides
    String ulogDir = core.getCoreDescriptor().getUlogDir();
    this.uhandler = uhandler;
    synchronized (fsLock) {
        // moving the tlog dir on reload
        if (fs == null) {
            if (ulogDir != null) {
                dataDir = ulogDir;
            }
            if (dataDir == null || dataDir.length() == 0) {
                dataDir = core.getDataDir();
            }
            if (!core.getDirectoryFactory().isAbsolute(dataDir)) {
                try {
                    dataDir = core.getDirectoryFactory().getDataHome(core.getCoreDescriptor());
                } catch (IOException e) {
                    throw new SolrException(ErrorCode.SERVER_ERROR, e);
                }
            }
            try {
                fs = FileSystem.get(new Path(dataDir).toUri(), getConf());
            } catch (IOException e) {
                throw new SolrException(ErrorCode.SERVER_ERROR, e);
            }
        } else {
            if (debug) {
                log.debug("UpdateHandler init: tlogDir=" + tlogDir + ", next id=" + id, " this is a reopen or double init ... nothing else to do.");
            }
            versionInfo.reload();
            return;
        }
    }
    tlogDir = new Path(dataDir, TLOG_NAME);
    while (true) {
        try {
            if (!fs.exists(tlogDir)) {
                boolean success = fs.mkdirs(tlogDir);
                if (!success) {
                    throw new RuntimeException("Could not create directory:" + tlogDir);
                }
            } else {
                // To check for safe mode
                fs.mkdirs(tlogDir);
            }
            break;
        } catch (RemoteException e) {
            if (e.getClassName().equals("org.apache.hadoop.hdfs.server.namenode.SafeModeException")) {
                log.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
                try {
                    Thread.sleep(5000);
                } catch (InterruptedException e1) {
                    Thread.interrupted();
                }
                continue;
            }
            throw new RuntimeException("Problem creating directory: " + tlogDir, e);
        } catch (IOException e) {
            throw new RuntimeException("Problem creating directory: " + tlogDir, e);
        }
    }
    tlogFiles = getLogList(fs, tlogDir);
    // add 1 since we will create a new log for the
    id = getLastLogId() + 1;
    if (debug) {
        log.debug("UpdateHandler init: tlogDir=" + tlogDir + ", existing tlogs=" + Arrays.asList(tlogFiles) + ", next id=" + id);
    }
    TransactionLog oldLog = null;
    for (String oldLogName : tlogFiles) {
        Path f = new Path(tlogDir, oldLogName);
        try {
            oldLog = new HdfsTransactionLog(fs, f, null, true, tlogDfsReplication);
            // don't remove old logs on startup since more
            addOldLog(oldLog, false);
        // than one may be uncapped.
        } catch (Exception e) {
            INIT_FAILED_LOGS_COUNT.incrementAndGet();
            SolrException.log(log, "Failure to open existing log file (non fatal) " + f, e);
            try {
                fs.delete(f, false);
            } catch (IOException e1) {
                throw new RuntimeException(e1);
            }
        }
    }
    // uncapped.
    for (TransactionLog ll : logs) {
        if (newestLogsOnStartup.size() < 2) {
            newestLogsOnStartup.addFirst(ll);
        } else {
            // We're never going to modify old non-recovery logs - no need to hold their output open
            log.info("Closing output for old non-recovery log " + ll);
            ll.closeOutput();
        }
    }
    try {
        versionInfo = new VersionInfo(this, numVersionBuckets);
    } catch (SolrException e) {
        log.error("Unable to use updateLog: " + e.getMessage(), e);
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to use updateLog: " + e.getMessage(), e);
    }
    // non-complete tlogs.
    try (RecentUpdates startingUpdates = getRecentUpdates()) {
        startingVersions = startingUpdates.getVersions(getNumRecordsToKeep());
        startingOperation = startingUpdates.getLatestOperation();
        // index)
        for (int i = startingUpdates.deleteList.size() - 1; i >= 0; i--) {
            DeleteUpdate du = startingUpdates.deleteList.get(i);
            oldDeletes.put(new BytesRef(du.id), new LogPtr(-1, du.version));
        }
        // populate recent deleteByQuery commands
        for (int i = startingUpdates.deleteByQueryList.size() - 1; i >= 0; i--) {
            Update update = startingUpdates.deleteByQueryList.get(i);
            List<Object> dbq = (List<Object>) update.log.lookup(update.pointer);
            long version = (Long) dbq.get(1);
            String q = (String) dbq.get(2);
            trackDeleteByQuery(q, version);
        }
    }
    // initialize metrics
    core.getCoreMetricManager().registerMetricProducer(SolrInfoBean.Category.TLOG.toString(), this);
}
Also used : Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException) FileNotFoundException(java.io.FileNotFoundException) SolrException(org.apache.solr.common.SolrException) AtomicLong(java.util.concurrent.atomic.AtomicLong) ArrayList(java.util.ArrayList) List(java.util.List) RemoteException(org.apache.hadoop.ipc.RemoteException) SolrException(org.apache.solr.common.SolrException) BytesRef(org.apache.lucene.util.BytesRef)

Example 50 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class AssignmentManager method retrySendRegionClose.

/**
   * At master failover, for pending_close region, make sure
   * sendRegionClose RPC call is sent to the target regionserver
   */
private void retrySendRegionClose(final RegionState regionState) {
    this.executorService.submit(new EventHandler(server, EventType.M_MASTER_RECOVERY) {

        @Override
        public void process() throws IOException {
            HRegionInfo hri = regionState.getRegion();
            ServerName serverName = regionState.getServerName();
            ReentrantLock lock = locker.acquireLock(hri.getEncodedName());
            try {
                for (int i = 1; i <= maximumAttempts; i++) {
                    if (!serverManager.isServerOnline(serverName) || server.isStopped() || server.isAborted()) {
                        // No need any more
                        return;
                    }
                    try {
                        if (!regionState.equals(regionStates.getRegionState(hri))) {
                            // Region is not in the expected state any more
                            return;
                        }
                        serverManager.sendRegionClose(serverName, hri, null);
                        // Done.
                        return;
                    } catch (Throwable t) {
                        if (t instanceof RemoteException) {
                            t = ((RemoteException) t).unwrapRemoteException();
                        }
                        if (t instanceof FailedServerException && i < maximumAttempts) {
                            // retry too soon. Retry after the failed_server_expiry time
                            try {
                                Configuration conf = this.server.getConfiguration();
                                long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
                                if (LOG.isDebugEnabled()) {
                                    LOG.debug(serverName + " is on failed server list; waiting " + sleepTime + "ms", t);
                                }
                                Thread.sleep(sleepTime);
                                continue;
                            } catch (InterruptedException ie) {
                                LOG.warn("Failed to unassign " + hri.getRegionNameAsString() + " since interrupted", ie);
                                regionStates.updateRegionState(hri, RegionState.State.FAILED_CLOSE);
                                Thread.currentThread().interrupt();
                                return;
                            }
                        }
                        if (serverManager.isServerOnline(serverName) && t instanceof java.net.SocketTimeoutException) {
                            // reset the try count
                            i--;
                        } else {
                            LOG.info("Got exception in retrying sendRegionClose for " + regionState + "; try=" + i + " of " + maximumAttempts, t);
                        }
                        Threads.sleep(100);
                    }
                }
                // Run out of attempts
                regionStates.updateRegionState(hri, State.FAILED_CLOSE);
            } finally {
                lock.unlock();
            }
        }
    });
}
Also used : ReentrantLock(java.util.concurrent.locks.ReentrantLock) Configuration(org.apache.hadoop.conf.Configuration) EventHandler(org.apache.hadoop.hbase.executor.EventHandler) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException(java.io.IOException) FailedServerException(org.apache.hadoop.hbase.ipc.FailedServerException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) RemoteException(org.apache.hadoop.ipc.RemoteException)

Aggregations

RemoteException (org.apache.hadoop.ipc.RemoteException)99 IOException (java.io.IOException)53 Test (org.junit.Test)39 Path (org.apache.hadoop.fs.Path)36 Configuration (org.apache.hadoop.conf.Configuration)20 FileNotFoundException (java.io.FileNotFoundException)19 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)13 FileSystem (org.apache.hadoop.fs.FileSystem)12 InterruptedIOException (java.io.InterruptedIOException)10 AccessControlException (org.apache.hadoop.security.AccessControlException)10 ServerName (org.apache.hadoop.hbase.ServerName)9 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)8 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)8 FileAlreadyExistsException (org.apache.hadoop.fs.FileAlreadyExistsException)7 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)7 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)7 EOFException (java.io.EOFException)6 ArrayList (java.util.ArrayList)6 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)6 HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)6