use of org.apache.tez.dag.api.UserPayload in project hive by apache.
the class TestCustomPartitionVertex method testGetBytePayload.
@Test(timeout = 5000)
public void testGetBytePayload() throws IOException {
int numBuckets = 10;
VertexManagerPluginContext context = mock(VertexManagerPluginContext.class);
CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, TezWork.VertexType.INITIALIZED_EDGES);
DataOutputBuffer dob = new DataOutputBuffer();
vertexConf.write(dob);
UserPayload payload = UserPayload.create(ByteBuffer.wrap(dob.getData()));
when(context.getUserPayload()).thenReturn(payload);
CustomPartitionVertex vm = new CustomPartitionVertex(context);
vm.initialize();
// prepare empty routing table
Multimap<Integer, Integer> routingTable = HashMultimap.<Integer, Integer>create();
payload = vm.getBytePayload(routingTable);
// get conf from user payload
CustomEdgeConfiguration edgeConf = new CustomEdgeConfiguration();
DataInputByteBuffer dibb = new DataInputByteBuffer();
dibb.reset(payload.getPayload());
edgeConf.readFields(dibb);
assertEquals(numBuckets, edgeConf.getNumBuckets());
}
use of org.apache.tez.dag.api.UserPayload in project hive by apache.
the class CustomPartitionVertex method processAllEvents.
private void processAllEvents(String inputName, Multimap<Integer, InputSplit> bucketToGroupedSplitMap, boolean secondLevelGroupingDone) throws IOException {
int totalInputsCount = 0;
List<Integer> numSplitsForTask = new ArrayList<Integer>();
for (Entry<Integer, Collection<InputSplit>> entry : bucketToGroupedSplitMap.asMap().entrySet()) {
int bucketNum = entry.getKey();
Collection<InputSplit> initialSplits = entry.getValue();
finalSplits.addAll(initialSplits);
for (InputSplit inputSplit : initialSplits) {
bucketToTaskMap.put(bucketNum, taskCount);
if (secondLevelGroupingDone) {
TezGroupedSplit groupedSplit = (TezGroupedSplit) inputSplit;
numSplitsForTask.add(groupedSplit.getGroupedSplits().size());
totalInputsCount += groupedSplit.getGroupedSplits().size();
} else {
numSplitsForTask.add(1);
totalInputsCount += 1;
}
taskCount++;
}
}
inputNameInputSpecMap.put(inputName, InputSpecUpdate.createPerTaskInputSpecUpdate(numSplitsForTask));
// Construct the EdgeManager descriptor to be used by all edges which need
// the routing table.
EdgeManagerPluginDescriptor hiveEdgeManagerDesc = null;
if ((vertexType == VertexType.MULTI_INPUT_INITIALIZED_EDGES) || (vertexType == VertexType.INITIALIZED_EDGES)) {
hiveEdgeManagerDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName());
UserPayload payload = getBytePayload(bucketToTaskMap);
hiveEdgeManagerDesc.setUserPayload(payload);
}
// Replace the edge manager for all vertices which have routing type custom.
for (Entry<String, EdgeProperty> edgeEntry : context.getInputVertexEdgeProperties().entrySet()) {
if (edgeEntry.getValue().getDataMovementType() == DataMovementType.CUSTOM && edgeEntry.getValue().getEdgeManagerDescriptor().getClassName().equals(CustomPartitionEdge.class.getName())) {
emMap.put(edgeEntry.getKey(), hiveEdgeManagerDesc);
}
}
LOG.info("Task count is " + taskCount + " for input name: " + inputName);
List<InputDataInformationEvent> taskEvents = Lists.newArrayListWithCapacity(totalInputsCount);
// Re-serialize the splits after grouping.
int count = 0;
for (InputSplit inputSplit : finalSplits) {
if (secondLevelGroupingDone) {
TezGroupedSplit tezGroupedSplit = (TezGroupedSplit) inputSplit;
for (InputSplit subSplit : tezGroupedSplit.getGroupedSplits()) {
if ((subSplit instanceof TezGroupedSplit) == false) {
throw new IOException("Unexpected split type found: " + subSplit.getClass().getCanonicalName());
}
MRSplitProto serializedSplit = MRInputHelpers.createSplitProto(subSplit);
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count, serializedSplit.toByteString().asReadOnlyByteBuffer());
diEvent.setTargetIndex(count);
taskEvents.add(diEvent);
}
} else {
MRSplitProto serializedSplit = MRInputHelpers.createSplitProto(inputSplit);
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count, serializedSplit.toByteString().asReadOnlyByteBuffer());
diEvent.setTargetIndex(count);
taskEvents.add(diEvent);
}
count++;
}
// Set the actual events for the tasks.
LOG.info("For input name: " + inputName + " task events size is " + taskEvents.size());
context.addRootInputEvents(inputName, taskEvents);
if (inputToGroupedSplitMap.isEmpty() == false) {
for (Entry<String, Multimap<Integer, InputSplit>> entry : inputToGroupedSplitMap.entrySet()) {
processAllSideEvents(entry.getKey(), entry.getValue());
}
setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
inputToGroupedSplitMap.clear();
}
// Only done when it is a bucket map join only no SMB.
if (numInputsAffectingRootInputSpecUpdate == 1) {
setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
}
}
use of org.apache.tez.dag.api.UserPayload in project hive by apache.
the class HivePreWarmProcessor method initialize.
@Override
public void initialize() throws Exception {
UserPayload userPayload = getContext().getUserPayload();
this.conf = TezUtils.createConfFromUserPayload(userPayload);
}
use of org.apache.tez.dag.api.UserPayload in project hive by apache.
the class TestConverters method testTaskSpecToFragmentSpec.
@Test(timeout = 10000)
public void testTaskSpecToFragmentSpec() {
ByteBuffer procBb = ByteBuffer.allocate(4);
procBb.putInt(0, 200);
UserPayload processorPayload = UserPayload.create(procBb);
ProcessorDescriptor processorDescriptor = ProcessorDescriptor.create("fakeProcessorName").setUserPayload(processorPayload);
ByteBuffer input1Bb = ByteBuffer.allocate(4);
input1Bb.putInt(0, 300);
UserPayload input1Payload = UserPayload.create(input1Bb);
InputDescriptor id1 = InputDescriptor.create("input1ClassName").setUserPayload(input1Payload);
InputSpec inputSpec1 = new InputSpec("sourceVertexName1", id1, 33);
InputSpec inputSpec2 = new InputSpec("sourceVertexName2", id1, 44);
List<InputSpec> inputSpecList = Lists.newArrayList(inputSpec1, inputSpec2);
ByteBuffer output1Bb = ByteBuffer.allocate(4);
output1Bb.putInt(0, 400);
UserPayload output1Payload = UserPayload.create(output1Bb);
OutputDescriptor od1 = OutputDescriptor.create("output1ClassName").setUserPayload(output1Payload);
OutputSpec outputSpec1 = new OutputSpec("destVertexName1", od1, 55);
OutputSpec outputSpec2 = new OutputSpec("destVertexName2", od1, 66);
List<OutputSpec> outputSpecList = Lists.newArrayList(outputSpec1, outputSpec2);
ApplicationId appId = ApplicationId.newInstance(1000, 100);
TezDAGID tezDagId = TezDAGID.getInstance(appId, 300);
TezVertexID tezVertexId = TezVertexID.getInstance(tezDagId, 400);
TezTaskID tezTaskId = TezTaskID.getInstance(tezVertexId, 500);
TezTaskAttemptID tezTaskAttemptId = TezTaskAttemptID.getInstance(tezTaskId, 600);
TaskSpec taskSpec = new TaskSpec(tezTaskAttemptId, "dagName", "vertexName", 10, processorDescriptor, inputSpecList, outputSpecList, null);
QueryIdentifierProto queryIdentifierProto = QueryIdentifierProto.newBuilder().setApplicationIdString(appId.toString()).setAppAttemptNumber(333).setDagIndex(300).build();
SignableVertexSpec vertexProto = Converters.constructSignableVertexSpec(taskSpec, queryIdentifierProto, "", "", "hiveQueryId").build();
assertEquals("dagName", vertexProto.getDagName());
assertEquals("vertexName", vertexProto.getVertexName());
assertEquals("hiveQueryId", vertexProto.getHiveQueryId());
assertEquals(appId.toString(), vertexProto.getQueryIdentifier().getApplicationIdString());
assertEquals(tezDagId.getId(), vertexProto.getQueryIdentifier().getDagIndex());
assertEquals(333, vertexProto.getQueryIdentifier().getAppAttemptNumber());
assertEquals(tezVertexId.getId(), vertexProto.getVertexIndex());
assertEquals(processorDescriptor.getClassName(), vertexProto.getProcessorDescriptor().getClassName());
assertEquals(processorDescriptor.getUserPayload().getPayload(), vertexProto.getProcessorDescriptor().getUserPayload().getUserPayload().asReadOnlyByteBuffer());
assertEquals(2, vertexProto.getInputSpecsCount());
assertEquals(2, vertexProto.getOutputSpecsCount());
verifyInputSpecAndProto(inputSpec1, vertexProto.getInputSpecs(0));
verifyInputSpecAndProto(inputSpec2, vertexProto.getInputSpecs(1));
verifyOutputSpecAndProto(outputSpec1, vertexProto.getOutputSpecs(0));
verifyOutputSpecAndProto(outputSpec2, vertexProto.getOutputSpecs(1));
}
use of org.apache.tez.dag.api.UserPayload in project hive by apache.
the class Converters method convertOutputDescriptorFromProto.
private static OutputDescriptor convertOutputDescriptorFromProto(EntityDescriptorProto proto) {
String className = proto.getClassName();
UserPayload payload = convertPayloadFromProto(proto);
OutputDescriptor od = OutputDescriptor.create(className);
setUserPayload(od, payload);
return od;
}
Aggregations