Search in sources :

Example 1 with LlapTaskUmbilicalExternalClient

use of org.apache.hadoop.hive.llap.ext.LlapTaskUmbilicalExternalClient in project hive by apache.

the class LlapBaseInputFormat method getRecordReader.

@SuppressWarnings("unchecked")
@Override
public RecordReader<NullWritable, V> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
    LlapInputSplit llapSplit = (LlapInputSplit) split;
    // Set conf to use LLAP user rather than current user for LLAP Zk registry.
    HiveConf.setVar(job, HiveConf.ConfVars.LLAP_ZK_REGISTRY_USER, llapSplit.getLlapUser());
    SubmitWorkInfo submitWorkInfo = SubmitWorkInfo.fromBytes(llapSplit.getPlanBytes());
    // llapSplit.getLlapDaemonInfos() will never be empty as of now, also validated this in GenericUDTFGetSplits while populating.
    final LlapDaemonInfo llapDaemonInfo = llapSplit.getLlapDaemonInfos()[0];
    final String host = llapDaemonInfo.getHost();
    final int outputPort = llapDaemonInfo.getOutputFormatPort();
    final int llapSubmitPort = llapDaemonInfo.getRpcPort();
    LOG.info("Will try to submit request to first Llap Daemon in the split - {}", llapDaemonInfo);
    byte[] llapTokenBytes = llapSplit.getTokenBytes();
    Token<LlapTokenIdentifier> llapToken = null;
    if (llapTokenBytes != null) {
        DataInputBuffer in = new DataInputBuffer();
        in.reset(llapTokenBytes, 0, llapTokenBytes.length);
        llapToken = new Token<LlapTokenIdentifier>();
        llapToken.readFields(in);
    }
    LlapRecordReaderTaskUmbilicalExternalResponder umbilicalResponder = new LlapRecordReaderTaskUmbilicalExternalResponder();
    LlapTaskUmbilicalExternalClient llapClient = new LlapTaskUmbilicalExternalClient(job, submitWorkInfo.getTokenIdentifier(), submitWorkInfo.getToken(), umbilicalResponder, llapToken);
    int attemptNum = 0;
    final int taskNum;
    // Use task attempt number, task number from conf if provided
    TaskAttemptID taskAttemptId = TaskAttemptID.forName(job.get(MRJobConfig.TASK_ATTEMPT_ID));
    if (taskAttemptId != null) {
        attemptNum = taskAttemptId.getId();
        taskNum = taskAttemptId.getTaskID().getId();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Setting attempt number to: {}, task number to: {} from given taskAttemptId: {} in conf", attemptNum, taskNum, taskAttemptId);
        }
    } else {
        taskNum = llapSplit.getSplitNum();
    }
    SubmitWorkRequestProto request = constructSubmitWorkRequestProto(submitWorkInfo, taskNum, attemptNum, llapClient.getAddress(), submitWorkInfo.getToken(), llapSplit, job);
    SignableVertexSpec vertex = SignableVertexSpec.parseFrom(submitWorkInfo.getVertexBinary());
    String fragmentId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber()).toString();
    LOG.info("Submitting fragment:{} to llap [host = {}, port = {}] ", fragmentId, host, llapSubmitPort);
    llapClient.submitWork(request, host, llapSubmitPort);
    Socket socket = new Socket(host, outputPort);
    OutputStream socketStream = socket.getOutputStream();
    LlapOutputSocketInitMessage.Builder builder = LlapOutputSocketInitMessage.newBuilder().setFragmentId(fragmentId);
    if (llapSplit.getTokenBytes() != null) {
        builder.setToken(ByteString.copyFrom(llapSplit.getTokenBytes()));
    }
    LOG.info("Registering fragment:{} to llap [host = {}, output port = {}] to read output", fragmentId, host, outputPort);
    builder.build().writeDelimitedTo(socketStream);
    socketStream.flush();
    LOG.info("Registered id: " + fragmentId);
    @SuppressWarnings("rawtypes") LlapBaseRecordReader recordReader;
    if (useArrow) {
        if (allocator != null) {
            // Client provided their own allocator
            recordReader = new LlapArrowBatchRecordReader(socket.getInputStream(), llapSplit.getSchema(), ArrowWrapperWritable.class, job, llapClient, socket, allocator);
        } else {
            // Client did not provide their own allocator, use constructor for global allocator
            recordReader = new LlapArrowBatchRecordReader(socket.getInputStream(), llapSplit.getSchema(), ArrowWrapperWritable.class, job, llapClient, socket, arrowAllocatorLimit);
        }
    } else {
        recordReader = new LlapBaseRecordReader(socket.getInputStream(), llapSplit.getSchema(), BytesWritable.class, job, llapClient, (java.io.Closeable) socket);
    }
    umbilicalResponder.setRecordReader(recordReader);
    return recordReader;
}
Also used : LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier) LlapOutputSocketInitMessage(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.LlapOutputSocketInitMessage) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) OutputStream(java.io.OutputStream) ArrowWrapperWritable(org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable) ByteString(com.google.protobuf.ByteString) LlapTaskUmbilicalExternalClient(org.apache.hadoop.hive.llap.ext.LlapTaskUmbilicalExternalClient) SubmitWorkRequestProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto) LlapDaemonInfo(org.apache.hadoop.hive.llap.ext.LlapDaemonInfo) BytesWritable(org.apache.hadoop.io.BytesWritable) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) SignableVertexSpec(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec) Socket(java.net.Socket)

Aggregations

ByteString (com.google.protobuf.ByteString)1 OutputStream (java.io.OutputStream)1 Socket (java.net.Socket)1 LlapOutputSocketInitMessage (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.LlapOutputSocketInitMessage)1 SignableVertexSpec (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec)1 SubmitWorkRequestProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto)1 LlapDaemonInfo (org.apache.hadoop.hive.llap.ext.LlapDaemonInfo)1 LlapTaskUmbilicalExternalClient (org.apache.hadoop.hive.llap.ext.LlapTaskUmbilicalExternalClient)1 LlapTokenIdentifier (org.apache.hadoop.hive.llap.security.LlapTokenIdentifier)1 ArrowWrapperWritable (org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable)1 BytesWritable (org.apache.hadoop.io.BytesWritable)1 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)1 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)1 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)1