use of org.apache.tez.runtime.api.impl.TaskSpec in project hive by apache.
the class GenericUDTFGetSplits method getSplits.
public InputSplit[] getSplits(JobConf job, int numSplits, TezWork work, Schema schema) throws IOException {
DAG dag = DAG.create(work.getName());
dag.setCredentials(job.getCredentials());
DagUtils utils = DagUtils.getInstance();
Context ctx = new Context(job);
MapWork mapWork = (MapWork) work.getAllWork().get(0);
// bunch of things get setup in the context based on conf but we need only the MR tmp directory
// for the following method.
JobConf wxConf = utils.initializeVertexConf(job, ctx, mapWork);
// TODO: should we also whitelist input formats here? from mapred.input.format.class
Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), job);
FileSystem fs = scratchDir.getFileSystem(job);
try {
LocalResource appJarLr = createJarLocalResource(utils.getExecJarPathLocal(), utils, job);
Vertex wx = utils.createVertex(wxConf, mapWork, scratchDir, appJarLr, new ArrayList<LocalResource>(), fs, ctx, false, work, work.getVertexType(mapWork));
String vertexName = wx.getName();
dag.addVertex(wx);
utils.addCredentials(mapWork, dag);
// we have the dag now proceed to get the splits:
Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS));
Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS));
HiveSplitGenerator splitGenerator = new HiveSplitGenerator(wxConf, mapWork);
List<Event> eventList = splitGenerator.initialize();
InputSplit[] result = new InputSplit[eventList.size() - 1];
InputConfigureVertexTasksEvent configureEvent = (InputConfigureVertexTasksEvent) eventList.get(0);
List<TaskLocationHint> hints = configureEvent.getLocationHint().getTaskLocationHints();
Preconditions.checkState(hints.size() == eventList.size() - 1);
if (LOG.isDebugEnabled()) {
LOG.debug("NumEvents=" + eventList.size() + ", NumSplits=" + result.length);
}
LlapCoordinator coordinator = LlapCoordinator.getInstance();
if (coordinator == null) {
throw new IOException("LLAP coordinator is not initialized; must be running in HS2 with " + ConfVars.LLAP_HS2_ENABLE_COORDINATOR.varname + " enabled");
}
// See the discussion in the implementation as to why we generate app ID.
ApplicationId applicationId = coordinator.createExtClientAppId();
// This assumes LLAP cluster owner is always the HS2 user.
String llapUser = UserGroupInformation.getLoginUser().getShortUserName();
String queryUser = null;
byte[] tokenBytes = null;
LlapSigner signer = null;
if (UserGroupInformation.isSecurityEnabled()) {
signer = coordinator.getLlapSigner(job);
// 1. Generate the token for query user (applies to all splits).
queryUser = SessionState.getUserFromAuthenticator();
if (queryUser == null) {
queryUser = UserGroupInformation.getCurrentUser().getUserName();
LOG.warn("Cannot determine the session user; using " + queryUser + " instead");
}
LlapTokenLocalClient tokenClient = coordinator.getLocalTokenClient(job, llapUser);
// We put the query user, not LLAP user, into the message and token.
Token<LlapTokenIdentifier> token = tokenClient.createToken(applicationId.toString(), queryUser, true);
LOG.info("Created the token for remote user: {}", token);
bos.reset();
token.write(dos);
tokenBytes = bos.toByteArray();
} else {
queryUser = UserGroupInformation.getCurrentUser().getUserName();
}
LOG.info("Number of splits: " + (eventList.size() - 1));
SignedMessage signedSvs = null;
for (int i = 0; i < eventList.size() - 1; i++) {
TaskSpec taskSpec = new TaskSpecBuilder().constructTaskSpec(dag, vertexName, eventList.size() - 1, applicationId, i);
// 2. Generate the vertex/submit information for all events.
if (i == 0) {
// The queryId could either be picked up from the current request being processed, or
// generated. The current request isn't exactly correct since the query is 'done' once we
// return the results. Generating a new one has the added benefit of working once this
// is moved out of a UDTF into a proper API.
// Setting this to the generated AppId which is unique.
// Despite the differences in TaskSpec, the vertex spec should be the same.
signedSvs = createSignedVertexSpec(signer, taskSpec, applicationId, queryUser, applicationId.toString());
}
SubmitWorkInfo submitWorkInfo = new SubmitWorkInfo(applicationId, System.currentTimeMillis(), taskSpec.getVertexParallelism(), signedSvs.message, signedSvs.signature);
byte[] submitWorkBytes = SubmitWorkInfo.toBytes(submitWorkInfo);
// 3. Generate input event.
SignedMessage eventBytes = makeEventBytes(wx, vertexName, eventList.get(i + 1), signer);
// 4. Make location hints.
SplitLocationInfo[] locations = makeLocationHints(hints.get(i));
result[i] = new LlapInputSplit(i, submitWorkBytes, eventBytes.message, eventBytes.signature, locations, schema, llapUser, tokenBytes);
}
return result;
} catch (Exception e) {
throw new IOException(e);
}
}
use of org.apache.tez.runtime.api.impl.TaskSpec in project hive by apache.
the class TaskSpecBuilder method constructTaskSpec.
public TaskSpec constructTaskSpec(DAG dag, String vertexName, int numSplits, ApplicationId appId, int index) {
Vertex vertex = dag.getVertex(vertexName);
ProcessorDescriptor processorDescriptor = vertex.getProcessorDescriptor();
List<RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>> inputs = vertex.getInputs();
List<RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor>> outputs = vertex.getOutputs();
Preconditions.checkState(inputs.size() == 1);
Preconditions.checkState(outputs.size() == 1);
List<InputSpec> inputSpecs = new ArrayList<>();
for (RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> input : inputs) {
InputSpec inputSpec = new InputSpec(input.getName(), input.getIODescriptor(), 1);
inputSpecs.add(inputSpec);
}
List<OutputSpec> outputSpecs = new ArrayList<>();
for (RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor> output : outputs) {
OutputSpec outputSpec = new OutputSpec(output.getName(), output.getIODescriptor(), 1);
outputSpecs.add(outputSpec);
}
TezDAGID dagId = TezDAGID.getInstance(appId, 0);
TezVertexID vertexId = TezVertexID.getInstance(dagId, 0);
TezTaskID taskId = TezTaskID.getInstance(vertexId, index);
TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 0);
return new TaskSpec(taskAttemptId, dag.getName(), vertexName, numSplits, processorDescriptor, inputSpecs, outputSpecs, null);
}
use of org.apache.tez.runtime.api.impl.TaskSpec in project hive by apache.
the class TestConverters method testTaskSpecToFragmentSpec.
@Test(timeout = 10000)
public void testTaskSpecToFragmentSpec() {
ByteBuffer procBb = ByteBuffer.allocate(4);
procBb.putInt(0, 200);
UserPayload processorPayload = UserPayload.create(procBb);
ProcessorDescriptor processorDescriptor = ProcessorDescriptor.create("fakeProcessorName").setUserPayload(processorPayload);
ByteBuffer input1Bb = ByteBuffer.allocate(4);
input1Bb.putInt(0, 300);
UserPayload input1Payload = UserPayload.create(input1Bb);
InputDescriptor id1 = InputDescriptor.create("input1ClassName").setUserPayload(input1Payload);
InputSpec inputSpec1 = new InputSpec("sourceVertexName1", id1, 33);
InputSpec inputSpec2 = new InputSpec("sourceVertexName2", id1, 44);
List<InputSpec> inputSpecList = Lists.newArrayList(inputSpec1, inputSpec2);
ByteBuffer output1Bb = ByteBuffer.allocate(4);
output1Bb.putInt(0, 400);
UserPayload output1Payload = UserPayload.create(output1Bb);
OutputDescriptor od1 = OutputDescriptor.create("output1ClassName").setUserPayload(output1Payload);
OutputSpec outputSpec1 = new OutputSpec("destVertexName1", od1, 55);
OutputSpec outputSpec2 = new OutputSpec("destVertexName2", od1, 66);
List<OutputSpec> outputSpecList = Lists.newArrayList(outputSpec1, outputSpec2);
ApplicationId appId = ApplicationId.newInstance(1000, 100);
TezDAGID tezDagId = TezDAGID.getInstance(appId, 300);
TezVertexID tezVertexId = TezVertexID.getInstance(tezDagId, 400);
TezTaskID tezTaskId = TezTaskID.getInstance(tezVertexId, 500);
TezTaskAttemptID tezTaskAttemptId = TezTaskAttemptID.getInstance(tezTaskId, 600);
TaskSpec taskSpec = new TaskSpec(tezTaskAttemptId, "dagName", "vertexName", 10, processorDescriptor, inputSpecList, outputSpecList, null);
QueryIdentifierProto queryIdentifierProto = QueryIdentifierProto.newBuilder().setApplicationIdString(appId.toString()).setAppAttemptNumber(333).setDagIndex(300).build();
SignableVertexSpec vertexProto = Converters.constructSignableVertexSpec(taskSpec, queryIdentifierProto, "", "", "hiveQueryId").build();
assertEquals("dagName", vertexProto.getDagName());
assertEquals("vertexName", vertexProto.getVertexName());
assertEquals("hiveQueryId", vertexProto.getHiveQueryId());
assertEquals(appId.toString(), vertexProto.getQueryIdentifier().getApplicationIdString());
assertEquals(tezDagId.getId(), vertexProto.getQueryIdentifier().getDagIndex());
assertEquals(333, vertexProto.getQueryIdentifier().getAppAttemptNumber());
assertEquals(tezVertexId.getId(), vertexProto.getVertexIndex());
assertEquals(processorDescriptor.getClassName(), vertexProto.getProcessorDescriptor().getClassName());
assertEquals(processorDescriptor.getUserPayload().getPayload(), vertexProto.getProcessorDescriptor().getUserPayload().getUserPayload().asReadOnlyByteBuffer());
assertEquals(2, vertexProto.getInputSpecsCount());
assertEquals(2, vertexProto.getOutputSpecsCount());
verifyInputSpecAndProto(inputSpec1, vertexProto.getInputSpecs(0));
verifyInputSpecAndProto(inputSpec2, vertexProto.getInputSpecs(1));
verifyOutputSpecAndProto(outputSpec1, vertexProto.getOutputSpecs(0));
verifyOutputSpecAndProto(outputSpec2, vertexProto.getOutputSpecs(1));
}
use of org.apache.tez.runtime.api.impl.TaskSpec in project hive by apache.
the class TestConverters method testFragmentSpecToTaskSpec.
@Test(timeout = 10000)
public void testFragmentSpecToTaskSpec() {
ByteBuffer procBb = ByteBuffer.allocate(4);
procBb.putInt(0, 200);
ByteBuffer input1Bb = ByteBuffer.allocate(4);
input1Bb.putInt(0, 300);
ByteBuffer output1Bb = ByteBuffer.allocate(4);
output1Bb.putInt(0, 400);
ApplicationId appId = ApplicationId.newInstance(1000, 100);
TezDAGID tezDagId = TezDAGID.getInstance(appId, 300);
TezVertexID tezVertexId = TezVertexID.getInstance(tezDagId, 400);
TezTaskID tezTaskId = TezTaskID.getInstance(tezVertexId, 500);
TezTaskAttemptID tezTaskAttemptId = TezTaskAttemptID.getInstance(tezTaskId, 600);
QueryIdentifierProto queryIdentifierProto = QueryIdentifierProto.newBuilder().setApplicationIdString(appId.toString()).setAppAttemptNumber(333).setDagIndex(tezDagId.getId()).build();
SignableVertexSpec.Builder builder = SignableVertexSpec.newBuilder();
builder.setQueryIdentifier(queryIdentifierProto);
builder.setHiveQueryId("hiveQueryId");
builder.setVertexIndex(tezVertexId.getId());
builder.setDagName("dagName");
builder.setVertexName("vertexName");
builder.setProcessorDescriptor(EntityDescriptorProto.newBuilder().setClassName("fakeProcessorName").setUserPayload(UserPayloadProto.newBuilder().setUserPayload(ByteString.copyFrom(procBb))));
builder.addInputSpecs(IOSpecProto.newBuilder().setConnectedVertexName("sourceVertexName1").setPhysicalEdgeCount(33).setIoDescriptor(EntityDescriptorProto.newBuilder().setClassName("input1ClassName").setUserPayload(UserPayloadProto.newBuilder().setUserPayload(ByteString.copyFrom(input1Bb)))));
builder.addInputSpecs(IOSpecProto.newBuilder().setConnectedVertexName("sourceVertexName2").setPhysicalEdgeCount(44).setIoDescriptor(EntityDescriptorProto.newBuilder().setClassName("input1ClassName").setUserPayload(UserPayloadProto.newBuilder().setUserPayload(ByteString.copyFrom(input1Bb)))));
builder.addOutputSpecs(IOSpecProto.newBuilder().setConnectedVertexName("destVertexName1").setPhysicalEdgeCount(55).setIoDescriptor(EntityDescriptorProto.newBuilder().setClassName("outputClassName").setUserPayload(UserPayloadProto.newBuilder().setUserPayload(ByteString.copyFrom(output1Bb)))));
builder.addOutputSpecs(IOSpecProto.newBuilder().setConnectedVertexName("destVertexName2").setPhysicalEdgeCount(66).setIoDescriptor(EntityDescriptorProto.newBuilder().setClassName("outputClassName").setUserPayload(UserPayloadProto.newBuilder().setUserPayload(ByteString.copyFrom(output1Bb)))));
SignableVertexSpec vertexProto = builder.build();
TaskSpec taskSpec = Converters.getTaskSpecfromProto(vertexProto, 0, 0, tezTaskAttemptId);
assertEquals("dagName", taskSpec.getDAGName());
assertEquals("vertexName", taskSpec.getVertexName());
assertEquals(tezTaskAttemptId, taskSpec.getTaskAttemptID());
assertEquals("fakeProcessorName", taskSpec.getProcessorDescriptor().getClassName());
byte[] serialized = new byte[taskSpec.getProcessorDescriptor().getUserPayload().getPayload().remaining()];
taskSpec.getProcessorDescriptor().getUserPayload().getPayload().get(serialized);
assertArrayEquals(procBb.array(), serialized);
assertEquals(2, taskSpec.getInputs().size());
assertEquals(2, taskSpec.getOutputs().size());
verifyInputSpecAndProto(taskSpec.getInputs().get(0), vertexProto.getInputSpecs(0));
verifyInputSpecAndProto(taskSpec.getInputs().get(1), vertexProto.getInputSpecs(1));
verifyOutputSpecAndProto(taskSpec.getOutputs().get(0), vertexProto.getOutputSpecs(0));
verifyOutputSpecAndProto(taskSpec.getOutputs().get(1), vertexProto.getOutputSpecs(1));
}
use of org.apache.tez.runtime.api.impl.TaskSpec in project hive by apache.
the class Converters method getTaskSpecfromProto.
public static TaskSpec getTaskSpecfromProto(SignableVertexSpec vectorProto, int fragmentNum, int attemptNum, TezTaskAttemptID attemptId) {
TezTaskAttemptID taskAttemptID = attemptId != null ? attemptId : createTaskAttemptId(vectorProto.getQueryIdentifier(), vectorProto.getVertexIndex(), fragmentNum, attemptNum);
ProcessorDescriptor processorDescriptor = null;
if (vectorProto.hasProcessorDescriptor()) {
processorDescriptor = convertProcessorDescriptorFromProto(vectorProto.getProcessorDescriptor());
}
List<InputSpec> inputSpecList = new ArrayList<InputSpec>(vectorProto.getInputSpecsCount());
if (vectorProto.getInputSpecsCount() > 0) {
for (IOSpecProto inputSpecProto : vectorProto.getInputSpecsList()) {
inputSpecList.add(getInputSpecFromProto(inputSpecProto));
}
}
List<OutputSpec> outputSpecList = new ArrayList<OutputSpec>(vectorProto.getOutputSpecsCount());
if (vectorProto.getOutputSpecsCount() > 0) {
for (IOSpecProto outputSpecProto : vectorProto.getOutputSpecsList()) {
outputSpecList.add(getOutputSpecFromProto(outputSpecProto));
}
}
List<GroupInputSpec> groupInputSpecs = new ArrayList<GroupInputSpec>(vectorProto.getGroupedInputSpecsCount());
if (vectorProto.getGroupedInputSpecsCount() > 0) {
for (GroupInputSpecProto groupInputSpecProto : vectorProto.getGroupedInputSpecsList()) {
groupInputSpecs.add(getGroupInputSpecFromProto(groupInputSpecProto));
}
}
TaskSpec taskSpec = new TaskSpec(taskAttemptID, vectorProto.getDagName(), vectorProto.getVertexName(), vectorProto.getVertexParallelism(), processorDescriptor, inputSpecList, outputSpecList, groupInputSpecs);
return taskSpec;
}
Aggregations