Search in sources :

Example 1 with DAGClientAMProtocolBlockingPB

use of org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB in project tez by apache.

the class TezClientUtils method getAMProxy.

@Private
public static DAGClientAMProtocolBlockingPB getAMProxy(final Configuration conf, String amHost, int amRpcPort, org.apache.hadoop.yarn.api.records.Token clientToAMToken) throws IOException {
    final InetSocketAddress serviceAddr = NetUtils.createSocketAddrForHost(amHost, amRpcPort);
    UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(UserGroupInformation.getCurrentUser().getUserName());
    if (clientToAMToken != null) {
        Token<ClientToAMTokenIdentifier> token = ConverterUtils.convertFromYarn(clientToAMToken, serviceAddr);
        userUgi.addToken(token);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Connecting to Tez AM at " + serviceAddr);
    }
    DAGClientAMProtocolBlockingPB proxy = null;
    try {
        proxy = userUgi.doAs(new PrivilegedExceptionAction<DAGClientAMProtocolBlockingPB>() {

            @Override
            public DAGClientAMProtocolBlockingPB run() throws IOException {
                RPC.setProtocolEngine(conf, DAGClientAMProtocolBlockingPB.class, ProtobufRpcEngine.class);
                return (DAGClientAMProtocolBlockingPB) RPC.getProxy(DAGClientAMProtocolBlockingPB.class, 0, serviceAddr, conf);
            }
        });
    } catch (InterruptedException e) {
        throw new IOException("Failed to connect to AM", e);
    }
    return proxy;
}
Also used : ClientToAMTokenIdentifier(org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier) DAGClientAMProtocolBlockingPB(org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB) InetSocketAddress(java.net.InetSocketAddress) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) IOException(java.io.IOException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Private(org.apache.hadoop.classification.InterfaceAudience.Private)

Example 2 with DAGClientAMProtocolBlockingPB

use of org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB in project tez by apache.

the class TezClient method startClientHeartbeat.

private void startClientHeartbeat() {
    long amClientKeepAliveTimeoutIntervalMillis = TezCommonUtils.getAMClientHeartBeatTimeoutMillis(amConfig.getTezConfiguration());
    // Poll at minimum of 1 second interval
    long pollPeriod = TezCommonUtils.getAMClientHeartBeatPollIntervalMillis(amConfig.getTezConfiguration(), amClientKeepAliveTimeoutIntervalMillis, 10);
    boolean isLocal = amConfig.getTezConfiguration().getBoolean(TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
    if (!isLocal && amClientKeepAliveTimeoutIntervalMillis > 0) {
        amKeepAliveService = Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setDaemon(true).setNameFormat("AMKeepAliveThread #%d").build());
        amKeepAliveService.scheduleWithFixedDelay(new Runnable() {

            private DAGClientAMProtocolBlockingPB proxy;

            @Override
            public void run() {
                proxy = sendAMHeartbeat(proxy);
            }
        }, pollPeriod, pollPeriod, TimeUnit.MILLISECONDS);
    }
}
Also used : DAGClientAMProtocolBlockingPB(org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder)

Example 3 with DAGClientAMProtocolBlockingPB

use of org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB in project tez by apache.

the class TezClient method getAppMasterStatus.

/**
 * Get the status of the App Master executing the DAG
 * In non-session mode it returns the status of the last submitted DAG App Master
 * In session mode, it returns the status of the App Master hosting the session
 *
 * @return State of the session
 * @throws TezException
 * @throws IOException
 */
public synchronized TezAppMasterStatus getAppMasterStatus() throws TezException, IOException {
    // Supporting per-DAG app master case since user may choose to run the same
    // code in that mode and the code should continue to work. Its easy to provide
    // the correct view for per-DAG app master too.
    ApplicationId appId = null;
    if (isSession) {
        appId = sessionAppId;
    } else {
        appId = lastSubmittedAppId;
    }
    Preconditions.checkState(appId != null, "Cannot get status without starting an application");
    try {
        ApplicationReport appReport = frameworkClient.getApplicationReport(appId);
        switch(appReport.getYarnApplicationState()) {
            case NEW:
            case NEW_SAVING:
            case ACCEPTED:
            case SUBMITTED:
                return TezAppMasterStatus.INITIALIZING;
            case FAILED:
            case KILLED:
                diagnostics = appReport.getDiagnostics();
                LOG.info("App did not succeed. Diagnostics: " + (appReport.getDiagnostics() != null ? appReport.getDiagnostics() : NO_CLUSTER_DIAGNOSTICS_MSG));
                return TezAppMasterStatus.SHUTDOWN;
            case FINISHED:
                return TezAppMasterStatus.SHUTDOWN;
            case RUNNING:
                try {
                    DAGClientAMProtocolBlockingPB proxy = getAMProxy(appId);
                    if (proxy == null) {
                        return TezAppMasterStatus.INITIALIZING;
                    }
                    GetAMStatusResponseProto response = proxy.getAMStatus(null, GetAMStatusRequestProto.newBuilder().build());
                    return DagTypeConverters.convertTezAppMasterStatusFromProto(response.getStatus());
                } catch (TezException e) {
                    LOG.info("Failed to retrieve AM Status via proxy", e);
                } catch (ServiceException e) {
                    LOG.info("Failed to retrieve AM Status via proxy", e);
                }
        }
    } catch (ApplicationNotFoundException e) {
        return TezAppMasterStatus.SHUTDOWN;
    } catch (YarnException e) {
        throw new TezException(e);
    }
    return TezAppMasterStatus.INITIALIZING;
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) TezException(org.apache.tez.dag.api.TezException) DAGClientAMProtocolBlockingPB(org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB) ServiceException(com.google.protobuf.ServiceException) ApplicationNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) GetAMStatusResponseProto(org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetAMStatusResponseProto) YarnException(org.apache.hadoop.yarn.exceptions.YarnException)

Example 4 with DAGClientAMProtocolBlockingPB

use of org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB in project tez by apache.

the class TezClient method stop.

/**
 * Stop the client. This terminates the connection to the YARN cluster.
 * In session mode, this shuts down the session DAG App Master
 * @throws TezException
 * @throws IOException
 */
public synchronized void stop() throws TezException, IOException {
    try {
        if (amKeepAliveService != null) {
            amKeepAliveService.shutdownNow();
        }
        if (sessionStarted.get()) {
            LOG.info("Shutting down Tez Session" + ", sessionName=" + clientName + ", applicationId=" + sessionAppId);
            sessionStopped.set(true);
            boolean sessionShutdownSuccessful = false;
            try {
                DAGClientAMProtocolBlockingPB proxy = getAMProxy(sessionAppId);
                if (proxy != null) {
                    ShutdownSessionRequestProto request = ShutdownSessionRequestProto.newBuilder().build();
                    proxy.shutdownSession(null, request);
                    sessionShutdownSuccessful = true;
                    boolean asynchronousStop = amConfig.getTezConfiguration().getBoolean(TezConfiguration.TEZ_CLIENT_ASYNCHRONOUS_STOP, TezConfiguration.TEZ_CLIENT_ASYNCHRONOUS_STOP_DEFAULT);
                    if (!asynchronousStop) {
                        LOG.info("Waiting until application is in a final state");
                        long currentTimeMillis = System.currentTimeMillis();
                        long timeKillIssued = currentTimeMillis;
                        long killTimeOut = amConfig.getTezConfiguration().getLong(TezConfiguration.TEZ_CLIENT_HARD_KILL_TIMEOUT_MS, TezConfiguration.TEZ_CLIENT_HARD_KILL_TIMEOUT_MS_DEFAULT);
                        ApplicationReport appReport = frameworkClient.getApplicationReport(sessionAppId);
                        while ((currentTimeMillis < timeKillIssued + killTimeOut) && !isJobInTerminalState(appReport.getYarnApplicationState())) {
                            try {
                                Thread.sleep(1000L);
                            } catch (InterruptedException ie) {
                                /**
                                 * interrupted, just break
                                 */
                                break;
                            }
                            currentTimeMillis = System.currentTimeMillis();
                            appReport = frameworkClient.getApplicationReport(sessionAppId);
                        }
                        if (!isJobInTerminalState(appReport.getYarnApplicationState())) {
                            frameworkClient.killApplication(sessionAppId);
                        }
                    }
                }
            } catch (TezException e) {
                LOG.info("Failed to shutdown Tez Session via proxy", e);
            } catch (ServiceException e) {
                LOG.info("Failed to shutdown Tez Session via proxy", e);
            } catch (ApplicationNotFoundException e) {
                LOG.info("Failed to kill nonexistent application " + sessionAppId, e);
            } catch (YarnException e) {
                throw new TezException(e);
            }
            if (!sessionShutdownSuccessful) {
                LOG.info("Could not connect to AM, killing session via YARN" + ", sessionName=" + clientName + ", applicationId=" + sessionAppId);
                try {
                    frameworkClient.killApplication(sessionAppId);
                } catch (ApplicationNotFoundException e) {
                    LOG.info("Failed to kill nonexistent application " + sessionAppId, e);
                } catch (YarnException e) {
                    throw new TezException(e);
                }
            }
        }
    } finally {
        if (frameworkClient != null) {
            frameworkClient.close();
        }
    }
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) TezException(org.apache.tez.dag.api.TezException) DAGClientAMProtocolBlockingPB(org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB) ServiceException(com.google.protobuf.ServiceException) ApplicationNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) ShutdownSessionRequestProto(org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.ShutdownSessionRequestProto)

Example 5 with DAGClientAMProtocolBlockingPB

use of org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB in project tez by apache.

the class TezClient method submitDAGSession.

private DAGClient submitDAGSession(DAG dag) throws TezException, IOException {
    Preconditions.checkState(isSession == true, "submitDAG with additional resources applies to only session mode. " + "In non-session mode please specify all resources in the initial configuration");
    verifySessionStateForSubmission();
    String dagId = null;
    String callerContextStr = "";
    if (dag.getCallerContext() != null) {
        callerContextStr = ", callerContext=" + dag.getCallerContext().contextAsSimpleString();
    }
    LOG.info("Submitting dag to TezSession" + ", sessionName=" + clientName + ", applicationId=" + sessionAppId + ", dagName=" + dag.getName() + callerContextStr);
    if (!additionalLocalResources.isEmpty()) {
        for (LocalResource lr : additionalLocalResources.values()) {
            Preconditions.checkArgument(lr.getType() == LocalResourceType.FILE, "LocalResourceType: " + lr.getType() + " is not supported, only " + LocalResourceType.FILE + " is supported");
        }
    }
    Map<String, LocalResource> tezJarResources = getTezJarResources(sessionCredentials);
    DAGPlan dagPlan = TezClientUtils.prepareAndCreateDAGPlan(dag, amConfig, tezJarResources, usingTezArchiveDeploy, sessionCredentials, servicePluginsDescriptor, javaOptsChecker);
    SubmitDAGRequestProto.Builder requestBuilder = SubmitDAGRequestProto.newBuilder();
    requestBuilder.setDAGPlan(dagPlan);
    if (!additionalLocalResources.isEmpty()) {
        requestBuilder.setAdditionalAmResources(DagTypeConverters.convertFromLocalResources(additionalLocalResources));
    }
    additionalLocalResources.clear();
    // if request size exceeds maxSubmitDAGRequestSizeThroughIPC, we serialize them to HDFS
    SubmitDAGRequestProto request = requestBuilder.build();
    if (request.getSerializedSize() > maxSubmitDAGRequestSizeThroughIPC) {
        Path dagPlanPath = new Path(TezCommonUtils.getTezSystemStagingPath(amConfig.getTezConfiguration(), sessionAppId.toString()), TezConstants.TEZ_PB_PLAN_BINARY_NAME + serializedSubmitDAGPlanRequestCounter.incrementAndGet());
        try (FSDataOutputStream fsDataOutputStream = stagingFs.create(dagPlanPath, false)) {
            LOG.info("Send dag plan using YARN local resources since it's too large" + ", dag plan size=" + request.getSerializedSize() + ", max dag plan size through IPC=" + maxSubmitDAGRequestSizeThroughIPC + ", max IPC message size= " + amConfig.getTezConfiguration().getInt(CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH_DEFAULT));
            request.writeTo(fsDataOutputStream);
            request = requestBuilder.clear().setSerializedRequestPath(stagingFs.resolvePath(dagPlanPath).toString()).build();
        }
    }
    DAGClientAMProtocolBlockingPB proxy = null;
    try {
        proxy = waitForProxy();
    } catch (InterruptedException e) {
        throw new IOException("Interrupted while trying to create a connection to the AM", e);
    }
    if (proxy == null) {
        try {
            LOG.warn("DAG submission to session timed out, stopping session");
            stop();
        } catch (Throwable t) {
            LOG.info("Got an exception when trying to stop session", t);
        }
        throw new DAGSubmissionTimedOut("Could not submit DAG to Tez Session" + ", timed out after " + clientTimeout + " seconds");
    }
    try {
        SubmitDAGResponseProto response = proxy.submitDAG(null, request);
        // SubmitDAGResponseProto cannot be mocked
        if (response != null) {
            dagId = response.getDagId();
        }
    } catch (ServiceException e) {
        RPCUtil.unwrapAndThrowException(e);
    }
    LOG.info("Submitted dag to TezSession" + ", sessionName=" + clientName + ", applicationId=" + sessionAppId + ", dagId=" + dagId + ", dagName=" + dag.getName());
    return new DAGClientImpl(sessionAppId, dagId, amConfig.getTezConfiguration(), amConfig.getYarnConfiguration(), frameworkClient);
}
Also used : Path(org.apache.hadoop.fs.Path) DAGClientAMProtocolBlockingPB(org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB) DAGSubmissionTimedOut(org.apache.tez.dag.api.DAGSubmissionTimedOut) IOException(java.io.IOException) DAGClientImpl(org.apache.tez.dag.api.client.DAGClientImpl) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan) SubmitDAGResponseProto(org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.SubmitDAGResponseProto) ServiceException(com.google.protobuf.ServiceException) SubmitDAGRequestProto(org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.SubmitDAGRequestProto) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream)

Aggregations

DAGClientAMProtocolBlockingPB (org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB)7 ServiceException (com.google.protobuf.ServiceException)3 IOException (java.io.IOException)2 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)2 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)2 ApplicationNotFoundException (org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException)2 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)2 TezException (org.apache.tez.dag.api.TezException)2 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)1 RpcController (com.google.protobuf.RpcController)1 InetSocketAddress (java.net.InetSocketAddress)1 PrivilegedExceptionAction (java.security.PrivilegedExceptionAction)1 Private (org.apache.hadoop.classification.InterfaceAudience.Private)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 Path (org.apache.hadoop.fs.Path)1 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)1 ApplicationSubmissionContext (org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext)1 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)1 YarnClient (org.apache.hadoop.yarn.client.api.YarnClient)1 ClientToAMTokenIdentifier (org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier)1