use of org.apache.hadoop.hive.llap.ext.LlapDaemonInfo in project hive by apache.
the class LlapBaseInputFormat method getRecordReader.
@SuppressWarnings("unchecked")
@Override
public RecordReader<NullWritable, V> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
LlapInputSplit llapSplit = (LlapInputSplit) split;
// Set conf to use LLAP user rather than current user for LLAP Zk registry.
HiveConf.setVar(job, HiveConf.ConfVars.LLAP_ZK_REGISTRY_USER, llapSplit.getLlapUser());
SubmitWorkInfo submitWorkInfo = SubmitWorkInfo.fromBytes(llapSplit.getPlanBytes());
// llapSplit.getLlapDaemonInfos() will never be empty as of now, also validated this in GenericUDTFGetSplits while populating.
final LlapDaemonInfo llapDaemonInfo = llapSplit.getLlapDaemonInfos()[0];
final String host = llapDaemonInfo.getHost();
final int outputPort = llapDaemonInfo.getOutputFormatPort();
final int llapSubmitPort = llapDaemonInfo.getRpcPort();
LOG.info("Will try to submit request to first Llap Daemon in the split - {}", llapDaemonInfo);
byte[] llapTokenBytes = llapSplit.getTokenBytes();
Token<LlapTokenIdentifier> llapToken = null;
if (llapTokenBytes != null) {
DataInputBuffer in = new DataInputBuffer();
in.reset(llapTokenBytes, 0, llapTokenBytes.length);
llapToken = new Token<LlapTokenIdentifier>();
llapToken.readFields(in);
}
LlapRecordReaderTaskUmbilicalExternalResponder umbilicalResponder = new LlapRecordReaderTaskUmbilicalExternalResponder();
LlapTaskUmbilicalExternalClient llapClient = new LlapTaskUmbilicalExternalClient(job, submitWorkInfo.getTokenIdentifier(), submitWorkInfo.getToken(), umbilicalResponder, llapToken);
int attemptNum = 0;
final int taskNum;
// Use task attempt number, task number from conf if provided
TaskAttemptID taskAttemptId = TaskAttemptID.forName(job.get(MRJobConfig.TASK_ATTEMPT_ID));
if (taskAttemptId != null) {
attemptNum = taskAttemptId.getId();
taskNum = taskAttemptId.getTaskID().getId();
if (LOG.isDebugEnabled()) {
LOG.debug("Setting attempt number to: {}, task number to: {} from given taskAttemptId: {} in conf", attemptNum, taskNum, taskAttemptId);
}
} else {
taskNum = llapSplit.getSplitNum();
}
SubmitWorkRequestProto request = constructSubmitWorkRequestProto(submitWorkInfo, taskNum, attemptNum, llapClient.getAddress(), submitWorkInfo.getToken(), llapSplit, job);
SignableVertexSpec vertex = SignableVertexSpec.parseFrom(submitWorkInfo.getVertexBinary());
String fragmentId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber()).toString();
LOG.info("Submitting fragment:{} to llap [host = {}, port = {}] ", fragmentId, host, llapSubmitPort);
llapClient.submitWork(request, host, llapSubmitPort);
Socket socket = new Socket(host, outputPort);
OutputStream socketStream = socket.getOutputStream();
LlapOutputSocketInitMessage.Builder builder = LlapOutputSocketInitMessage.newBuilder().setFragmentId(fragmentId);
if (llapSplit.getTokenBytes() != null) {
builder.setToken(ByteString.copyFrom(llapSplit.getTokenBytes()));
}
LOG.info("Registering fragment:{} to llap [host = {}, output port = {}] to read output", fragmentId, host, outputPort);
builder.build().writeDelimitedTo(socketStream);
socketStream.flush();
LOG.info("Registered id: " + fragmentId);
@SuppressWarnings("rawtypes") LlapBaseRecordReader recordReader;
if (useArrow) {
if (allocator != null) {
// Client provided their own allocator
recordReader = new LlapArrowBatchRecordReader(socket.getInputStream(), llapSplit.getSchema(), ArrowWrapperWritable.class, job, llapClient, socket, allocator);
} else {
// Client did not provide their own allocator, use constructor for global allocator
recordReader = new LlapArrowBatchRecordReader(socket.getInputStream(), llapSplit.getSchema(), ArrowWrapperWritable.class, job, llapClient, socket, arrowAllocatorLimit);
}
} else {
recordReader = new LlapBaseRecordReader(socket.getInputStream(), llapSplit.getSchema(), BytesWritable.class, job, llapClient, (java.io.Closeable) socket);
}
umbilicalResponder.setRecordReader(recordReader);
return recordReader;
}
use of org.apache.hadoop.hive.llap.ext.LlapDaemonInfo in project hive by apache.
the class LlapInputSplit method readFields.
@Override
public void readFields(DataInput in) throws IOException {
splitNum = in.readInt();
int length = in.readInt();
planBytes = new byte[length];
in.readFully(planBytes);
length = in.readInt();
fragmentBytes = new byte[length];
in.readFully(fragmentBytes);
length = in.readInt();
if (length > 0) {
fragmentBytesSignature = new byte[length];
in.readFully(fragmentBytesSignature);
}
length = in.readInt();
locations = new SplitLocationInfo[length];
for (int i = 0; i < length; ++i) {
locations[i] = new SplitLocationInfo(in.readUTF(), false);
}
llapDaemonInfos = new LlapDaemonInfo[in.readInt()];
for (int i = 0; i < llapDaemonInfos.length; i++) {
llapDaemonInfos[i] = new LlapDaemonInfo();
llapDaemonInfos[i].readFields(in);
}
schema = new Schema();
schema.readFields(in);
llapUser = in.readUTF();
length = in.readInt();
if (length > 0) {
tokenBytes = new byte[length];
in.readFully(tokenBytes);
}
jwt = in.readUTF();
}
use of org.apache.hadoop.hive.llap.ext.LlapDaemonInfo in project hive by apache.
the class LlapInputSplit method write.
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(splitNum);
out.writeInt(planBytes.length);
out.write(planBytes);
out.writeInt(fragmentBytes.length);
out.write(fragmentBytes);
if (fragmentBytesSignature != null) {
out.writeInt(fragmentBytesSignature.length);
out.write(fragmentBytesSignature);
} else {
out.writeInt(0);
}
out.writeInt(locations.length);
for (int i = 0; i < locations.length; ++i) {
out.writeUTF(locations[i].getLocation());
}
out.writeInt(llapDaemonInfos.length);
for (LlapDaemonInfo llapDaemonInfo : llapDaemonInfos) {
llapDaemonInfo.write(out);
}
schema.write(out);
out.writeUTF(llapUser);
if (tokenBytes != null) {
out.writeInt(tokenBytes.length);
out.write(tokenBytes);
} else {
out.writeInt(0);
}
if (jwt != null) {
out.writeUTF(jwt);
}
}
use of org.apache.hadoop.hive.llap.ext.LlapDaemonInfo in project hive by apache.
the class GenericUDTFGetSplits method getSplits.
// generateLightWeightSplits - if true then
// 1) schema and planBytes[] in each LlapInputSplit are not populated
// 2) schemaSplit(contains only schema) and planSplit(contains only planBytes[]) are populated in SplitResult
private SplitResult getSplits(JobConf job, TezWork work, Schema schema, ApplicationId extClientAppId, boolean generateLightWeightSplits) throws IOException {
SplitResult splitResult = new SplitResult();
splitResult.schemaSplit = new LlapInputSplit(0, new byte[0], new byte[0], new byte[0], new SplitLocationInfo[0], new LlapDaemonInfo[0], schema, "", new byte[0], "");
if (schemaSplitOnly) {
// schema only
return splitResult;
}
DAG dag = DAG.create(work.getName());
dag.setCredentials(job.getCredentials());
DagUtils utils = DagUtils.getInstance();
Context ctx = new Context(job);
MapWork mapWork = (MapWork) work.getAllWork().get(0);
// bunch of things get setup in the context based on conf but we need only the MR tmp directory
// for the following method.
JobConf wxConf = utils.initializeVertexConf(job, ctx, mapWork);
// TODO: should we also whitelist input formats here? from mapred.input.format.class
Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), job);
try {
LocalResource appJarLr = createJarLocalResource(utils.getExecJarPathLocal(ctx.getConf()), utils, job);
LlapCoordinator coordinator = LlapCoordinator.getInstance();
if (coordinator == null) {
throw new IOException("LLAP coordinator is not initialized; must be running in HS2 with " + ConfVars.LLAP_HS2_ENABLE_COORDINATOR.varname + " enabled");
}
// Update the queryId to use the generated extClientAppId. See comment below about
// why this is done.
HiveConf.setVar(wxConf, HiveConf.ConfVars.HIVEQUERYID, extClientAppId.toString());
Vertex wx = utils.createVertex(wxConf, mapWork, scratchDir, work, DagUtils.createTezLrMap(appJarLr, null));
String vertexName = wx.getName();
dag.addVertex(wx);
utils.addCredentials(mapWork, dag, job);
// we have the dag now proceed to get the splits:
Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS));
Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS));
HiveSplitGenerator splitGenerator = new HiveSplitGenerator(wxConf, mapWork, false, inputArgNumSplits);
List<Event> eventList = splitGenerator.initialize();
int numGroupedSplitsGenerated = eventList.size() - 1;
InputSplit[] result = new InputSplit[numGroupedSplitsGenerated];
InputConfigureVertexTasksEvent configureEvent = (InputConfigureVertexTasksEvent) eventList.get(0);
List<TaskLocationHint> hints = configureEvent.getLocationHint().getTaskLocationHints();
Preconditions.checkState(hints.size() == numGroupedSplitsGenerated);
if (LOG.isDebugEnabled()) {
LOG.debug("NumEvents=" + eventList.size() + ", NumSplits=" + result.length);
}
// This assumes LLAP cluster owner is always the HS2 user.
String llapUser = LlapRegistryService.currentUser();
String queryUser = null;
byte[] tokenBytes = null;
LlapSigner signer = null;
if (UserGroupInformation.isSecurityEnabled()) {
signer = coordinator.getLlapSigner(job);
// 1. Generate the token for query user (applies to all splits).
queryUser = SessionState.getUserFromAuthenticator();
if (queryUser == null) {
queryUser = UserGroupInformation.getCurrentUser().getUserName();
LOG.warn("Cannot determine the session user; using " + queryUser + " instead");
}
LlapTokenLocalClient tokenClient = coordinator.getLocalTokenClient(job, llapUser);
// We put the query user, not LLAP user, into the message and token.
Token<LlapTokenIdentifier> token = tokenClient.createToken(extClientAppId.toString(), queryUser, true);
LOG.info("Created the token for remote user: {}", token);
bos.reset();
token.write(dos);
tokenBytes = bos.toByteArray();
} else {
queryUser = UserGroupInformation.getCurrentUser().getUserName();
}
// Generate umbilical token (applies to all splits)
Token<JobTokenIdentifier> umbilicalToken = JobTokenCreator.createJobToken(extClientAppId);
LOG.info("Number of splits: " + numGroupedSplitsGenerated);
SignedMessage signedSvs = null;
byte[] submitWorkBytes = null;
final byte[] emptySubmitWorkBytes = new byte[0];
final Schema emptySchema = new Schema();
for (int i = 0; i < numGroupedSplitsGenerated; i++) {
TaskSpec taskSpec = new TaskSpecBuilder().constructTaskSpec(dag, vertexName, numGroupedSplitsGenerated, extClientAppId, i);
// 2. Generate the vertex/submit information for all events.
if (i == 0) {
// The queryId could either be picked up from the current request being processed, or
// generated. The current request isn't exactly correct since the query is 'done' once we
// return the results. Generating a new one has the added benefit of working once this
// is moved out of a UDTF into a proper API.
// Setting this to the generated AppId which is unique.
// Despite the differences in TaskSpec, the vertex spec should be the same.
signedSvs = createSignedVertexSpec(signer, taskSpec, extClientAppId, queryUser, extClientAppId.toString());
SubmitWorkInfo submitWorkInfo = new SubmitWorkInfo(extClientAppId, System.currentTimeMillis(), numGroupedSplitsGenerated, signedSvs.message, signedSvs.signature, umbilicalToken);
submitWorkBytes = SubmitWorkInfo.toBytes(submitWorkInfo);
if (generateLightWeightSplits) {
splitResult.planSplit = new LlapInputSplit(0, submitWorkBytes, new byte[0], new byte[0], new SplitLocationInfo[0], new LlapDaemonInfo[0], new Schema(), "", new byte[0], "");
}
}
// 3. Generate input event.
SignedMessage eventBytes = makeEventBytes(wx, vertexName, eventList.get(i + 1), signer);
// 4. Make location hints.
SplitLocationInfo[] locations = makeLocationHints(hints.get(i));
// 5. populate info about llap daemons(to help client submit request and read data)
LlapDaemonInfo[] llapDaemonInfos = populateLlapDaemonInfos(job, locations);
// 6. Generate JWT for external clients if it's a cloud deployment
// we inject extClientAppId in JWT which is same as what fragment contains.
// extClientAppId in JWT and in fragment are compared on LLAP when a fragment is submitted.
// see method ContainerRunnerImpl#verifyJwtForExternalClient
String jwt = "";
if (LlapUtil.isCloudDeployment(job)) {
LlapExtClientJwtHelper llapExtClientJwtHelper = new LlapExtClientJwtHelper(job);
jwt = llapExtClientJwtHelper.buildJwtForLlap(extClientAppId);
}
if (generateLightWeightSplits) {
result[i] = new LlapInputSplit(i, emptySubmitWorkBytes, eventBytes.message, eventBytes.signature, locations, llapDaemonInfos, emptySchema, llapUser, tokenBytes, jwt);
} else {
result[i] = new LlapInputSplit(i, submitWorkBytes, eventBytes.message, eventBytes.signature, locations, llapDaemonInfos, schema, llapUser, tokenBytes, jwt);
}
}
splitResult.actualSplits = result;
return splitResult;
} catch (Exception e) {
throw new IOException(e);
}
}
use of org.apache.hadoop.hive.llap.ext.LlapDaemonInfo in project hive by apache.
the class GenericUDTFGetSplits method populateLlapDaemonInfos.
private LlapDaemonInfo[] populateLlapDaemonInfos(JobConf job, SplitLocationInfo[] locations) throws IOException {
LlapRegistryService registryService = LlapRegistryService.getClient(job);
LlapServiceInstanceSet instanceSet = registryService.getInstances();
Collection<LlapServiceInstance> llapServiceInstances = null;
// this means a valid location, see makeLocationHints()
if (locations.length == 1 && locations[0].getLocation() != null) {
llapServiceInstances = instanceSet.getByHost(locations[0].getLocation());
}
// let's populate them all so that we can fetch data from any of them.
if (CollectionUtils.isEmpty(llapServiceInstances)) {
llapServiceInstances = instanceSet.getAll();
}
Preconditions.checkState(llapServiceInstances.size() > 0, "Unable to find any of the llap instances in zk registry");
LlapDaemonInfo[] llapDaemonInfos = new LlapDaemonInfo[llapServiceInstances.size()];
int count = 0;
for (LlapServiceInstance inst : llapServiceInstances) {
LlapDaemonInfo info;
if (LlapUtil.isCloudDeployment(job)) {
info = new LlapDaemonInfo(inst.getExternalHostname(), inst.getExternalClientsRpcPort(), inst.getOutputFormatPort());
} else {
info = new LlapDaemonInfo(inst.getHost(), inst.getRpcPort(), inst.getOutputFormatPort());
}
llapDaemonInfos[count++] = info;
}
return llapDaemonInfos;
}
Aggregations