Search in sources :

Example 6 with LlapTokenIdentifier

use of org.apache.hadoop.hive.llap.security.LlapTokenIdentifier in project hive by apache.

the class GenericUDTFGetSplits method getSplits.

public InputSplit[] getSplits(JobConf job, int numSplits, TezWork work, Schema schema, ApplicationId applicationId) throws IOException {
    DAG dag = DAG.create(work.getName());
    dag.setCredentials(job.getCredentials());
    DagUtils utils = DagUtils.getInstance();
    Context ctx = new Context(job);
    MapWork mapWork = (MapWork) work.getAllWork().get(0);
    // bunch of things get setup in the context based on conf but we need only the MR tmp directory
    // for the following method.
    JobConf wxConf = utils.initializeVertexConf(job, ctx, mapWork);
    // TODO: should we also whitelist input formats here? from mapred.input.format.class
    Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), job);
    FileSystem fs = scratchDir.getFileSystem(job);
    try {
        LocalResource appJarLr = createJarLocalResource(utils.getExecJarPathLocal(ctx.getConf()), utils, job);
        LlapCoordinator coordinator = LlapCoordinator.getInstance();
        if (coordinator == null) {
            throw new IOException("LLAP coordinator is not initialized; must be running in HS2 with " + ConfVars.LLAP_HS2_ENABLE_COORDINATOR.varname + " enabled");
        }
        // Update the queryId to use the generated applicationId. See comment below about
        // why this is done.
        HiveConf.setVar(wxConf, HiveConf.ConfVars.HIVEQUERYID, applicationId.toString());
        Vertex wx = utils.createVertex(wxConf, mapWork, scratchDir, fs, ctx, false, work, work.getVertexType(mapWork), DagUtils.createTezLrMap(appJarLr, null));
        String vertexName = wx.getName();
        dag.addVertex(wx);
        utils.addCredentials(mapWork, dag);
        // we have the dag now proceed to get the splits:
        Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS));
        Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS));
        HiveSplitGenerator splitGenerator = new HiveSplitGenerator(wxConf, mapWork);
        List<Event> eventList = splitGenerator.initialize();
        InputSplit[] result = new InputSplit[eventList.size() - 1];
        InputConfigureVertexTasksEvent configureEvent = (InputConfigureVertexTasksEvent) eventList.get(0);
        List<TaskLocationHint> hints = configureEvent.getLocationHint().getTaskLocationHints();
        Preconditions.checkState(hints.size() == eventList.size() - 1);
        if (LOG.isDebugEnabled()) {
            LOG.debug("NumEvents=" + eventList.size() + ", NumSplits=" + result.length);
        }
        // This assumes LLAP cluster owner is always the HS2 user.
        String llapUser = UserGroupInformation.getLoginUser().getShortUserName();
        String queryUser = null;
        byte[] tokenBytes = null;
        LlapSigner signer = null;
        if (UserGroupInformation.isSecurityEnabled()) {
            signer = coordinator.getLlapSigner(job);
            // 1. Generate the token for query user (applies to all splits).
            queryUser = SessionState.getUserFromAuthenticator();
            if (queryUser == null) {
                queryUser = UserGroupInformation.getCurrentUser().getUserName();
                LOG.warn("Cannot determine the session user; using " + queryUser + " instead");
            }
            LlapTokenLocalClient tokenClient = coordinator.getLocalTokenClient(job, llapUser);
            // We put the query user, not LLAP user, into the message and token.
            Token<LlapTokenIdentifier> token = tokenClient.createToken(applicationId.toString(), queryUser, true);
            LOG.info("Created the token for remote user: {}", token);
            bos.reset();
            token.write(dos);
            tokenBytes = bos.toByteArray();
        } else {
            queryUser = UserGroupInformation.getCurrentUser().getUserName();
        }
        // Generate umbilical token (applies to all splits)
        Token<JobTokenIdentifier> umbilicalToken = JobTokenCreator.createJobToken(applicationId);
        LOG.info("Number of splits: " + (eventList.size() - 1));
        SignedMessage signedSvs = null;
        for (int i = 0; i < eventList.size() - 1; i++) {
            TaskSpec taskSpec = new TaskSpecBuilder().constructTaskSpec(dag, vertexName, eventList.size() - 1, applicationId, i);
            // 2. Generate the vertex/submit information for all events.
            if (i == 0) {
                // The queryId could either be picked up from the current request being processed, or
                // generated. The current request isn't exactly correct since the query is 'done' once we
                // return the results. Generating a new one has the added benefit of working once this
                // is moved out of a UDTF into a proper API.
                // Setting this to the generated AppId which is unique.
                // Despite the differences in TaskSpec, the vertex spec should be the same.
                signedSvs = createSignedVertexSpec(signer, taskSpec, applicationId, queryUser, applicationId.toString());
            }
            SubmitWorkInfo submitWorkInfo = new SubmitWorkInfo(applicationId, System.currentTimeMillis(), taskSpec.getVertexParallelism(), signedSvs.message, signedSvs.signature, umbilicalToken);
            byte[] submitWorkBytes = SubmitWorkInfo.toBytes(submitWorkInfo);
            // 3. Generate input event.
            SignedMessage eventBytes = makeEventBytes(wx, vertexName, eventList.get(i + 1), signer);
            // 4. Make location hints.
            SplitLocationInfo[] locations = makeLocationHints(hints.get(i));
            result[i] = new LlapInputSplit(i, submitWorkBytes, eventBytes.message, eventBytes.signature, locations, schema, llapUser, tokenBytes);
        }
        return result;
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) SubmitWorkInfo(org.apache.hadoop.hive.llap.SubmitWorkInfo) LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier) SplitLocationInfo(org.apache.hadoop.mapred.SplitLocationInfo) HiveSplitGenerator(org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator) TaskSpecBuilder(org.apache.tez.dag.api.TaskSpecBuilder) LlapSigner(org.apache.hadoop.hive.llap.security.LlapSigner) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) LlapTokenLocalClient(org.apache.hadoop.hive.llap.security.LlapTokenLocalClient) DagUtils(org.apache.hadoop.hive.ql.exec.tez.DagUtils) LlapInputSplit(org.apache.hadoop.hive.llap.LlapInputSplit) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) LlapInputSplit(org.apache.hadoop.hive.llap.LlapInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) Context(org.apache.hadoop.hive.ql.Context) Path(org.apache.hadoop.fs.Path) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) SignedMessage(org.apache.hadoop.hive.llap.security.LlapSigner.SignedMessage) JobTokenIdentifier(org.apache.tez.common.security.JobTokenIdentifier) DAG(org.apache.tez.dag.api.DAG) IOException(java.io.IOException) LlapCoordinator(org.apache.hadoop.hive.llap.coordinator.LlapCoordinator) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) LoginException(javax.security.auth.login.LoginException) URISyntaxException(java.net.URISyntaxException) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) IOException(java.io.IOException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) Event(org.apache.tez.runtime.api.Event) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent)

Example 7 with LlapTokenIdentifier

use of org.apache.hadoop.hive.llap.security.LlapTokenIdentifier in project hive by apache.

the class SecretManager method verifyToken.

/**
 * Verifies the token available as serialized bytes.
 */
public void verifyToken(byte[] tokenBytes) throws IOException {
    if (!UserGroupInformation.isSecurityEnabled())
        return;
    if (tokenBytes == null)
        throw new SecurityException("Token required for authentication");
    Token<LlapTokenIdentifier> token = new Token<>();
    token.readFields(new DataInputStream(new ByteArrayInputStream(tokenBytes)));
    verifyToken(token.decodeIdentifier(), token.getPassword());
}
Also used : LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier) ByteArrayInputStream(java.io.ByteArrayInputStream) Token(org.apache.hadoop.security.token.Token) DataInputStream(java.io.DataInputStream)

Example 8 with LlapTokenIdentifier

use of org.apache.hadoop.hive.llap.security.LlapTokenIdentifier in project hive by apache.

the class LlapBaseInputFormat method getRecordReader.

@SuppressWarnings("unchecked")
@Override
public RecordReader<NullWritable, V> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
    LlapInputSplit llapSplit = (LlapInputSplit) split;
    // Set conf to use LLAP user rather than current user for LLAP Zk registry.
    HiveConf.setVar(job, HiveConf.ConfVars.LLAP_ZK_REGISTRY_USER, llapSplit.getLlapUser());
    SubmitWorkInfo submitWorkInfo = SubmitWorkInfo.fromBytes(llapSplit.getPlanBytes());
    LlapServiceInstance serviceInstance = getServiceInstance(job, llapSplit);
    String host = serviceInstance.getHost();
    int llapSubmitPort = serviceInstance.getRpcPort();
    LOG.info("Found service instance for host " + host + " with rpc port " + llapSubmitPort + " and outputformat port " + serviceInstance.getOutputFormatPort());
    byte[] llapTokenBytes = llapSplit.getTokenBytes();
    Token<LlapTokenIdentifier> llapToken = null;
    if (llapTokenBytes != null) {
        DataInputBuffer in = new DataInputBuffer();
        in.reset(llapTokenBytes, 0, llapTokenBytes.length);
        llapToken = new Token<LlapTokenIdentifier>();
        llapToken.readFields(in);
    }
    LlapRecordReaderTaskUmbilicalExternalResponder umbilicalResponder = new LlapRecordReaderTaskUmbilicalExternalResponder();
    LlapTaskUmbilicalExternalClient llapClient = new LlapTaskUmbilicalExternalClient(job, submitWorkInfo.getTokenIdentifier(), submitWorkInfo.getToken(), umbilicalResponder, llapToken);
    int attemptNum = 0;
    // Use task attempt number from conf if provided
    TaskAttemptID taskAttemptId = TaskAttemptID.forName(job.get(MRJobConfig.TASK_ATTEMPT_ID));
    if (taskAttemptId != null) {
        attemptNum = taskAttemptId.getId();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Setting attempt number to " + attemptNum + " from task attempt ID in conf: " + job.get(MRJobConfig.TASK_ATTEMPT_ID));
        }
    }
    SubmitWorkRequestProto request = constructSubmitWorkRequestProto(submitWorkInfo, llapSplit.getSplitNum(), attemptNum, llapClient.getAddress(), submitWorkInfo.getToken(), llapSplit.getFragmentBytes(), llapSplit.getFragmentBytesSignature(), job);
    llapClient.submitWork(request, host, llapSubmitPort);
    Socket socket = new Socket(host, serviceInstance.getOutputFormatPort());
    LOG.debug("Socket connected");
    SignableVertexSpec vertex = SignableVertexSpec.parseFrom(submitWorkInfo.getVertexBinary());
    String fragmentId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber()).toString();
    OutputStream socketStream = socket.getOutputStream();
    LlapOutputSocketInitMessage.Builder builder = LlapOutputSocketInitMessage.newBuilder().setFragmentId(fragmentId);
    if (llapSplit.getTokenBytes() != null) {
        builder.setToken(ByteString.copyFrom(llapSplit.getTokenBytes()));
    }
    builder.build().writeDelimitedTo(socketStream);
    socketStream.flush();
    LOG.info("Registered id: " + fragmentId);
    @SuppressWarnings("rawtypes") LlapBaseRecordReader recordReader = new LlapBaseRecordReader(socket.getInputStream(), llapSplit.getSchema(), Text.class, job, llapClient, (java.io.Closeable) socket);
    umbilicalResponder.setRecordReader(recordReader);
    return recordReader;
}
Also used : LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier) LlapOutputSocketInitMessage(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.LlapOutputSocketInitMessage) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) OutputStream(java.io.OutputStream) LlapServiceInstance(org.apache.hadoop.hive.llap.registry.LlapServiceInstance) ByteString(com.google.protobuf.ByteString) LlapTaskUmbilicalExternalClient(org.apache.hadoop.hive.llap.ext.LlapTaskUmbilicalExternalClient) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) SubmitWorkRequestProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto) SignableVertexSpec(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec) Socket(java.net.Socket)

Example 9 with LlapTokenIdentifier

use of org.apache.hadoop.hive.llap.security.LlapTokenIdentifier in project hive by apache.

the class LlapTokenClient method getDelegationToken.

public Token<LlapTokenIdentifier> getDelegationToken(String appId) throws IOException {
    if (!UserGroupInformation.isSecurityEnabled())
        return null;
    Iterator<LlapServiceInstance> llaps = null;
    if (clientInstance == null) {
        assert client == null;
        llaps = getLlapServices(false).iterator();
        clientInstance = llaps.next();
    }
    ByteString tokenBytes = null;
    boolean hasRefreshed = false;
    while (true) {
        try {
            tokenBytes = getTokenBytes(appId);
            break;
        } catch (IOException | ServiceException ex) {
            LOG.error("Cannot get a token, trying a different instance", ex);
            client = null;
            clientInstance = null;
        }
        if (llaps == null || !llaps.hasNext()) {
            if (hasRefreshed) {
                // Only refresh once.
                throw new RuntimeException("Cannot find any LLAPs to get the token from");
            }
            llaps = getLlapServices(true).iterator();
            hasRefreshed = true;
        }
        clientInstance = llaps.next();
    }
    Token<LlapTokenIdentifier> token = extractToken(tokenBytes);
    if (LOG.isInfoEnabled()) {
        LOG.info("Obtained a LLAP delegation token from " + clientInstance + ": " + token);
    }
    return token;
}
Also used : LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier) ServiceException(com.google.protobuf.ServiceException) ByteString(com.google.protobuf.ByteString) LlapServiceInstance(org.apache.hadoop.hive.llap.registry.LlapServiceInstance) IOException(java.io.IOException)

Example 10 with LlapTokenIdentifier

use of org.apache.hadoop.hive.llap.security.LlapTokenIdentifier in project hive by apache.

the class LlapTokenChecker method getLlapTokens.

static List<LlapTokenIdentifier> getLlapTokens(UserGroupInformation ugi, String clusterId) {
    List<LlapTokenIdentifier> tokens = null;
    for (TokenIdentifier id : ugi.getTokenIdentifiers()) {
        if (!LlapTokenIdentifier.KIND_NAME.equals(id.getKind()))
            continue;
        if (LOG.isDebugEnabled()) {
            LOG.debug("Token {}", id);
        }
        LlapTokenIdentifier llapId = (LlapTokenIdentifier) id;
        if (clusterId != null && !clusterId.equals(llapId.getClusterId()))
            continue;
        if (tokens == null) {
            tokens = new ArrayList<>();
        }
        tokens.add((LlapTokenIdentifier) id);
    }
    return tokens;
}
Also used : TokenIdentifier(org.apache.hadoop.security.token.TokenIdentifier) LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier) LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier)

Aggregations

LlapTokenIdentifier (org.apache.hadoop.hive.llap.security.LlapTokenIdentifier)16 IOException (java.io.IOException)5 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)5 ByteString (com.google.protobuf.ByteString)3 LlapCoordinator (org.apache.hadoop.hive.llap.coordinator.LlapCoordinator)3 Token (org.apache.hadoop.security.token.Token)3 ServiceException (com.google.protobuf.ServiceException)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 FileNotFoundException (java.io.FileNotFoundException)2 URISyntaxException (java.net.URISyntaxException)2 LoginException (javax.security.auth.login.LoginException)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 LlapInputSplit (org.apache.hadoop.hive.llap.LlapInputSplit)2 SubmitWorkInfo (org.apache.hadoop.hive.llap.SubmitWorkInfo)2 LlapServiceInstance (org.apache.hadoop.hive.llap.registry.LlapServiceInstance)2 LlapSigner (org.apache.hadoop.hive.llap.security.LlapSigner)2 SignedMessage (org.apache.hadoop.hive.llap.security.LlapSigner.SignedMessage)2 LlapTokenLocalClient (org.apache.hadoop.hive.llap.security.LlapTokenLocalClient)2