Search in sources :

Example 1 with LlapDaemonInfo

use of org.apache.hadoop.hive.llap.ext.LlapDaemonInfo in project hive by apache.

the class LlapBaseInputFormat method getRecordReader.

@SuppressWarnings("unchecked")
@Override
public RecordReader<NullWritable, V> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
    LlapInputSplit llapSplit = (LlapInputSplit) split;
    // Set conf to use LLAP user rather than current user for LLAP Zk registry.
    HiveConf.setVar(job, HiveConf.ConfVars.LLAP_ZK_REGISTRY_USER, llapSplit.getLlapUser());
    SubmitWorkInfo submitWorkInfo = SubmitWorkInfo.fromBytes(llapSplit.getPlanBytes());
    // llapSplit.getLlapDaemonInfos() will never be empty as of now, also validated this in GenericUDTFGetSplits while populating.
    final LlapDaemonInfo llapDaemonInfo = llapSplit.getLlapDaemonInfos()[0];
    final String host = llapDaemonInfo.getHost();
    final int outputPort = llapDaemonInfo.getOutputFormatPort();
    final int llapSubmitPort = llapDaemonInfo.getRpcPort();
    LOG.info("Will try to submit request to first Llap Daemon in the split - {}", llapDaemonInfo);
    byte[] llapTokenBytes = llapSplit.getTokenBytes();
    Token<LlapTokenIdentifier> llapToken = null;
    if (llapTokenBytes != null) {
        DataInputBuffer in = new DataInputBuffer();
        in.reset(llapTokenBytes, 0, llapTokenBytes.length);
        llapToken = new Token<LlapTokenIdentifier>();
        llapToken.readFields(in);
    }
    LlapRecordReaderTaskUmbilicalExternalResponder umbilicalResponder = new LlapRecordReaderTaskUmbilicalExternalResponder();
    LlapTaskUmbilicalExternalClient llapClient = new LlapTaskUmbilicalExternalClient(job, submitWorkInfo.getTokenIdentifier(), submitWorkInfo.getToken(), umbilicalResponder, llapToken);
    int attemptNum = 0;
    final int taskNum;
    // Use task attempt number, task number from conf if provided
    TaskAttemptID taskAttemptId = TaskAttemptID.forName(job.get(MRJobConfig.TASK_ATTEMPT_ID));
    if (taskAttemptId != null) {
        attemptNum = taskAttemptId.getId();
        taskNum = taskAttemptId.getTaskID().getId();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Setting attempt number to: {}, task number to: {} from given taskAttemptId: {} in conf", attemptNum, taskNum, taskAttemptId);
        }
    } else {
        taskNum = llapSplit.getSplitNum();
    }
    SubmitWorkRequestProto request = constructSubmitWorkRequestProto(submitWorkInfo, taskNum, attemptNum, llapClient.getAddress(), submitWorkInfo.getToken(), llapSplit, job);
    SignableVertexSpec vertex = SignableVertexSpec.parseFrom(submitWorkInfo.getVertexBinary());
    String fragmentId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber()).toString();
    LOG.info("Submitting fragment:{} to llap [host = {}, port = {}] ", fragmentId, host, llapSubmitPort);
    llapClient.submitWork(request, host, llapSubmitPort);
    Socket socket = new Socket(host, outputPort);
    OutputStream socketStream = socket.getOutputStream();
    LlapOutputSocketInitMessage.Builder builder = LlapOutputSocketInitMessage.newBuilder().setFragmentId(fragmentId);
    if (llapSplit.getTokenBytes() != null) {
        builder.setToken(ByteString.copyFrom(llapSplit.getTokenBytes()));
    }
    LOG.info("Registering fragment:{} to llap [host = {}, output port = {}] to read output", fragmentId, host, outputPort);
    builder.build().writeDelimitedTo(socketStream);
    socketStream.flush();
    LOG.info("Registered id: " + fragmentId);
    @SuppressWarnings("rawtypes") LlapBaseRecordReader recordReader;
    if (useArrow) {
        if (allocator != null) {
            // Client provided their own allocator
            recordReader = new LlapArrowBatchRecordReader(socket.getInputStream(), llapSplit.getSchema(), ArrowWrapperWritable.class, job, llapClient, socket, allocator);
        } else {
            // Client did not provide their own allocator, use constructor for global allocator
            recordReader = new LlapArrowBatchRecordReader(socket.getInputStream(), llapSplit.getSchema(), ArrowWrapperWritable.class, job, llapClient, socket, arrowAllocatorLimit);
        }
    } else {
        recordReader = new LlapBaseRecordReader(socket.getInputStream(), llapSplit.getSchema(), BytesWritable.class, job, llapClient, (java.io.Closeable) socket);
    }
    umbilicalResponder.setRecordReader(recordReader);
    return recordReader;
}
Also used : LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier) LlapOutputSocketInitMessage(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.LlapOutputSocketInitMessage) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) OutputStream(java.io.OutputStream) ArrowWrapperWritable(org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable) ByteString(com.google.protobuf.ByteString) LlapTaskUmbilicalExternalClient(org.apache.hadoop.hive.llap.ext.LlapTaskUmbilicalExternalClient) SubmitWorkRequestProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto) LlapDaemonInfo(org.apache.hadoop.hive.llap.ext.LlapDaemonInfo) BytesWritable(org.apache.hadoop.io.BytesWritable) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) SignableVertexSpec(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec) Socket(java.net.Socket)

Example 2 with LlapDaemonInfo

use of org.apache.hadoop.hive.llap.ext.LlapDaemonInfo in project hive by apache.

the class LlapInputSplit method readFields.

@Override
public void readFields(DataInput in) throws IOException {
    splitNum = in.readInt();
    int length = in.readInt();
    planBytes = new byte[length];
    in.readFully(planBytes);
    length = in.readInt();
    fragmentBytes = new byte[length];
    in.readFully(fragmentBytes);
    length = in.readInt();
    if (length > 0) {
        fragmentBytesSignature = new byte[length];
        in.readFully(fragmentBytesSignature);
    }
    length = in.readInt();
    locations = new SplitLocationInfo[length];
    for (int i = 0; i < length; ++i) {
        locations[i] = new SplitLocationInfo(in.readUTF(), false);
    }
    llapDaemonInfos = new LlapDaemonInfo[in.readInt()];
    for (int i = 0; i < llapDaemonInfos.length; i++) {
        llapDaemonInfos[i] = new LlapDaemonInfo();
        llapDaemonInfos[i].readFields(in);
    }
    schema = new Schema();
    schema.readFields(in);
    llapUser = in.readUTF();
    length = in.readInt();
    if (length > 0) {
        tokenBytes = new byte[length];
        in.readFully(tokenBytes);
    }
    jwt = in.readUTF();
}
Also used : LlapDaemonInfo(org.apache.hadoop.hive.llap.ext.LlapDaemonInfo) SplitLocationInfo(org.apache.hadoop.mapred.SplitLocationInfo)

Example 3 with LlapDaemonInfo

use of org.apache.hadoop.hive.llap.ext.LlapDaemonInfo in project hive by apache.

the class LlapInputSplit method write.

@Override
public void write(DataOutput out) throws IOException {
    out.writeInt(splitNum);
    out.writeInt(planBytes.length);
    out.write(planBytes);
    out.writeInt(fragmentBytes.length);
    out.write(fragmentBytes);
    if (fragmentBytesSignature != null) {
        out.writeInt(fragmentBytesSignature.length);
        out.write(fragmentBytesSignature);
    } else {
        out.writeInt(0);
    }
    out.writeInt(locations.length);
    for (int i = 0; i < locations.length; ++i) {
        out.writeUTF(locations[i].getLocation());
    }
    out.writeInt(llapDaemonInfos.length);
    for (LlapDaemonInfo llapDaemonInfo : llapDaemonInfos) {
        llapDaemonInfo.write(out);
    }
    schema.write(out);
    out.writeUTF(llapUser);
    if (tokenBytes != null) {
        out.writeInt(tokenBytes.length);
        out.write(tokenBytes);
    } else {
        out.writeInt(0);
    }
    if (jwt != null) {
        out.writeUTF(jwt);
    }
}
Also used : LlapDaemonInfo(org.apache.hadoop.hive.llap.ext.LlapDaemonInfo)

Example 4 with LlapDaemonInfo

use of org.apache.hadoop.hive.llap.ext.LlapDaemonInfo in project hive by apache.

the class GenericUDTFGetSplits method getSplits.

// generateLightWeightSplits - if true then
// 1) schema and planBytes[] in each LlapInputSplit are not populated
// 2) schemaSplit(contains only schema) and planSplit(contains only planBytes[]) are populated in SplitResult
private SplitResult getSplits(JobConf job, TezWork work, Schema schema, ApplicationId extClientAppId, boolean generateLightWeightSplits) throws IOException {
    SplitResult splitResult = new SplitResult();
    splitResult.schemaSplit = new LlapInputSplit(0, new byte[0], new byte[0], new byte[0], new SplitLocationInfo[0], new LlapDaemonInfo[0], schema, "", new byte[0], "");
    if (schemaSplitOnly) {
        // schema only
        return splitResult;
    }
    DAG dag = DAG.create(work.getName());
    dag.setCredentials(job.getCredentials());
    DagUtils utils = DagUtils.getInstance();
    Context ctx = new Context(job);
    MapWork mapWork = (MapWork) work.getAllWork().get(0);
    // bunch of things get setup in the context based on conf but we need only the MR tmp directory
    // for the following method.
    JobConf wxConf = utils.initializeVertexConf(job, ctx, mapWork);
    // TODO: should we also whitelist input formats here? from mapred.input.format.class
    Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), job);
    try {
        LocalResource appJarLr = createJarLocalResource(utils.getExecJarPathLocal(ctx.getConf()), utils, job);
        LlapCoordinator coordinator = LlapCoordinator.getInstance();
        if (coordinator == null) {
            throw new IOException("LLAP coordinator is not initialized; must be running in HS2 with " + ConfVars.LLAP_HS2_ENABLE_COORDINATOR.varname + " enabled");
        }
        // Update the queryId to use the generated extClientAppId. See comment below about
        // why this is done.
        HiveConf.setVar(wxConf, HiveConf.ConfVars.HIVEQUERYID, extClientAppId.toString());
        Vertex wx = utils.createVertex(wxConf, mapWork, scratchDir, work, DagUtils.createTezLrMap(appJarLr, null));
        String vertexName = wx.getName();
        dag.addVertex(wx);
        utils.addCredentials(mapWork, dag, job);
        // we have the dag now proceed to get the splits:
        Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS));
        Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS));
        HiveSplitGenerator splitGenerator = new HiveSplitGenerator(wxConf, mapWork, false, inputArgNumSplits);
        List<Event> eventList = splitGenerator.initialize();
        int numGroupedSplitsGenerated = eventList.size() - 1;
        InputSplit[] result = new InputSplit[numGroupedSplitsGenerated];
        InputConfigureVertexTasksEvent configureEvent = (InputConfigureVertexTasksEvent) eventList.get(0);
        List<TaskLocationHint> hints = configureEvent.getLocationHint().getTaskLocationHints();
        Preconditions.checkState(hints.size() == numGroupedSplitsGenerated);
        if (LOG.isDebugEnabled()) {
            LOG.debug("NumEvents=" + eventList.size() + ", NumSplits=" + result.length);
        }
        // This assumes LLAP cluster owner is always the HS2 user.
        String llapUser = LlapRegistryService.currentUser();
        String queryUser = null;
        byte[] tokenBytes = null;
        LlapSigner signer = null;
        if (UserGroupInformation.isSecurityEnabled()) {
            signer = coordinator.getLlapSigner(job);
            // 1. Generate the token for query user (applies to all splits).
            queryUser = SessionState.getUserFromAuthenticator();
            if (queryUser == null) {
                queryUser = UserGroupInformation.getCurrentUser().getUserName();
                LOG.warn("Cannot determine the session user; using " + queryUser + " instead");
            }
            LlapTokenLocalClient tokenClient = coordinator.getLocalTokenClient(job, llapUser);
            // We put the query user, not LLAP user, into the message and token.
            Token<LlapTokenIdentifier> token = tokenClient.createToken(extClientAppId.toString(), queryUser, true);
            LOG.info("Created the token for remote user: {}", token);
            bos.reset();
            token.write(dos);
            tokenBytes = bos.toByteArray();
        } else {
            queryUser = UserGroupInformation.getCurrentUser().getUserName();
        }
        // Generate umbilical token (applies to all splits)
        Token<JobTokenIdentifier> umbilicalToken = JobTokenCreator.createJobToken(extClientAppId);
        LOG.info("Number of splits: " + numGroupedSplitsGenerated);
        SignedMessage signedSvs = null;
        byte[] submitWorkBytes = null;
        final byte[] emptySubmitWorkBytes = new byte[0];
        final Schema emptySchema = new Schema();
        for (int i = 0; i < numGroupedSplitsGenerated; i++) {
            TaskSpec taskSpec = new TaskSpecBuilder().constructTaskSpec(dag, vertexName, numGroupedSplitsGenerated, extClientAppId, i);
            // 2. Generate the vertex/submit information for all events.
            if (i == 0) {
                // The queryId could either be picked up from the current request being processed, or
                // generated. The current request isn't exactly correct since the query is 'done' once we
                // return the results. Generating a new one has the added benefit of working once this
                // is moved out of a UDTF into a proper API.
                // Setting this to the generated AppId which is unique.
                // Despite the differences in TaskSpec, the vertex spec should be the same.
                signedSvs = createSignedVertexSpec(signer, taskSpec, extClientAppId, queryUser, extClientAppId.toString());
                SubmitWorkInfo submitWorkInfo = new SubmitWorkInfo(extClientAppId, System.currentTimeMillis(), numGroupedSplitsGenerated, signedSvs.message, signedSvs.signature, umbilicalToken);
                submitWorkBytes = SubmitWorkInfo.toBytes(submitWorkInfo);
                if (generateLightWeightSplits) {
                    splitResult.planSplit = new LlapInputSplit(0, submitWorkBytes, new byte[0], new byte[0], new SplitLocationInfo[0], new LlapDaemonInfo[0], new Schema(), "", new byte[0], "");
                }
            }
            // 3. Generate input event.
            SignedMessage eventBytes = makeEventBytes(wx, vertexName, eventList.get(i + 1), signer);
            // 4. Make location hints.
            SplitLocationInfo[] locations = makeLocationHints(hints.get(i));
            // 5. populate info about llap daemons(to help client submit request and read data)
            LlapDaemonInfo[] llapDaemonInfos = populateLlapDaemonInfos(job, locations);
            // 6. Generate JWT for external clients if it's a cloud deployment
            // we inject extClientAppId in JWT which is same as what fragment contains.
            // extClientAppId in JWT and in fragment are compared on LLAP when a fragment is submitted.
            // see method ContainerRunnerImpl#verifyJwtForExternalClient
            String jwt = "";
            if (LlapUtil.isCloudDeployment(job)) {
                LlapExtClientJwtHelper llapExtClientJwtHelper = new LlapExtClientJwtHelper(job);
                jwt = llapExtClientJwtHelper.buildJwtForLlap(extClientAppId);
            }
            if (generateLightWeightSplits) {
                result[i] = new LlapInputSplit(i, emptySubmitWorkBytes, eventBytes.message, eventBytes.signature, locations, llapDaemonInfos, emptySchema, llapUser, tokenBytes, jwt);
            } else {
                result[i] = new LlapInputSplit(i, submitWorkBytes, eventBytes.message, eventBytes.signature, locations, llapDaemonInfos, schema, llapUser, tokenBytes, jwt);
            }
        }
        splitResult.actualSplits = result;
        return splitResult;
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) SubmitWorkInfo(org.apache.hadoop.hive.llap.SubmitWorkInfo) LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier) SplitLocationInfo(org.apache.hadoop.mapred.SplitLocationInfo) HiveSplitGenerator(org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Schema(org.apache.hadoop.hive.llap.Schema) TaskSpecBuilder(org.apache.tez.dag.api.TaskSpecBuilder) LlapSigner(org.apache.hadoop.hive.llap.security.LlapSigner) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) LlapTokenLocalClient(org.apache.hadoop.hive.llap.security.LlapTokenLocalClient) DagUtils(org.apache.hadoop.hive.ql.exec.tez.DagUtils) LlapInputSplit(org.apache.hadoop.hive.llap.LlapInputSplit) JobConf(org.apache.hadoop.mapred.JobConf) LlapInputSplit(org.apache.hadoop.hive.llap.LlapInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) Context(org.apache.hadoop.hive.ql.Context) Path(org.apache.hadoop.fs.Path) LlapDaemonInfo(org.apache.hadoop.hive.llap.ext.LlapDaemonInfo) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) SignedMessage(org.apache.hadoop.hive.llap.security.LlapSigner.SignedMessage) JobTokenIdentifier(org.apache.tez.common.security.JobTokenIdentifier) DAG(org.apache.tez.dag.api.DAG) IOException(java.io.IOException) LlapCoordinator(org.apache.hadoop.hive.llap.coordinator.LlapCoordinator) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) CommandProcessorException(org.apache.hadoop.hive.ql.processors.CommandProcessorException) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) IOException(java.io.IOException) LoginException(javax.security.auth.login.LoginException) URISyntaxException(java.net.URISyntaxException) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Event(org.apache.tez.runtime.api.Event) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) LlapExtClientJwtHelper(org.apache.hadoop.hive.llap.security.LlapExtClientJwtHelper)

Example 5 with LlapDaemonInfo

use of org.apache.hadoop.hive.llap.ext.LlapDaemonInfo in project hive by apache.

the class GenericUDTFGetSplits method populateLlapDaemonInfos.

private LlapDaemonInfo[] populateLlapDaemonInfos(JobConf job, SplitLocationInfo[] locations) throws IOException {
    LlapRegistryService registryService = LlapRegistryService.getClient(job);
    LlapServiceInstanceSet instanceSet = registryService.getInstances();
    Collection<LlapServiceInstance> llapServiceInstances = null;
    // this means a valid location, see makeLocationHints()
    if (locations.length == 1 && locations[0].getLocation() != null) {
        llapServiceInstances = instanceSet.getByHost(locations[0].getLocation());
    }
    // let's populate them all so that we can fetch data from any of them.
    if (CollectionUtils.isEmpty(llapServiceInstances)) {
        llapServiceInstances = instanceSet.getAll();
    }
    Preconditions.checkState(llapServiceInstances.size() > 0, "Unable to find any of the llap instances in zk registry");
    LlapDaemonInfo[] llapDaemonInfos = new LlapDaemonInfo[llapServiceInstances.size()];
    int count = 0;
    for (LlapServiceInstance inst : llapServiceInstances) {
        LlapDaemonInfo info;
        if (LlapUtil.isCloudDeployment(job)) {
            info = new LlapDaemonInfo(inst.getExternalHostname(), inst.getExternalClientsRpcPort(), inst.getOutputFormatPort());
        } else {
            info = new LlapDaemonInfo(inst.getHost(), inst.getRpcPort(), inst.getOutputFormatPort());
        }
        llapDaemonInfos[count++] = info;
    }
    return llapDaemonInfos;
}
Also used : LlapDaemonInfo(org.apache.hadoop.hive.llap.ext.LlapDaemonInfo) LlapServiceInstance(org.apache.hadoop.hive.llap.registry.LlapServiceInstance) LlapRegistryService(org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService) LlapServiceInstanceSet(org.apache.hadoop.hive.llap.registry.LlapServiceInstanceSet) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint)

Aggregations

LlapDaemonInfo (org.apache.hadoop.hive.llap.ext.LlapDaemonInfo)5 LlapTokenIdentifier (org.apache.hadoop.hive.llap.security.LlapTokenIdentifier)2 SplitLocationInfo (org.apache.hadoop.mapred.SplitLocationInfo)2 ByteString (com.google.protobuf.ByteString)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 Socket (java.net.Socket)1 URISyntaxException (java.net.URISyntaxException)1 LoginException (javax.security.auth.login.LoginException)1 Path (org.apache.hadoop.fs.Path)1 LlapInputSplit (org.apache.hadoop.hive.llap.LlapInputSplit)1 Schema (org.apache.hadoop.hive.llap.Schema)1 SubmitWorkInfo (org.apache.hadoop.hive.llap.SubmitWorkInfo)1 LlapCoordinator (org.apache.hadoop.hive.llap.coordinator.LlapCoordinator)1 LlapOutputSocketInitMessage (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.LlapOutputSocketInitMessage)1 SignableVertexSpec (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec)1 SubmitWorkRequestProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto)1 LlapTaskUmbilicalExternalClient (org.apache.hadoop.hive.llap.ext.LlapTaskUmbilicalExternalClient)1 LlapServiceInstance (org.apache.hadoop.hive.llap.registry.LlapServiceInstance)1