Search in sources :

Example 1 with UserPayload

use of org.apache.tez.dag.api.UserPayload in project hive by apache.

the class TestCustomPartitionVertex method testGetBytePayload.

@Test(timeout = 5000)
public void testGetBytePayload() throws IOException {
    int numBuckets = 10;
    VertexManagerPluginContext context = mock(VertexManagerPluginContext.class);
    CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, TezWork.VertexType.INITIALIZED_EDGES);
    DataOutputBuffer dob = new DataOutputBuffer();
    vertexConf.write(dob);
    UserPayload payload = UserPayload.create(ByteBuffer.wrap(dob.getData()));
    when(context.getUserPayload()).thenReturn(payload);
    CustomPartitionVertex vm = new CustomPartitionVertex(context);
    vm.initialize();
    // prepare empty routing table
    Multimap<Integer, Integer> routingTable = HashMultimap.<Integer, Integer>create();
    payload = vm.getBytePayload(routingTable);
    // get conf from user payload
    CustomEdgeConfiguration edgeConf = new CustomEdgeConfiguration();
    DataInputByteBuffer dibb = new DataInputByteBuffer();
    dibb.reset(payload.getPayload());
    edgeConf.readFields(dibb);
    assertEquals(numBuckets, edgeConf.getNumBuckets());
}
Also used : VertexManagerPluginContext(org.apache.tez.dag.api.VertexManagerPluginContext) UserPayload(org.apache.tez.dag.api.UserPayload) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Test(org.junit.Test)

Example 2 with UserPayload

use of org.apache.tez.dag.api.UserPayload in project hive by apache.

the class CustomPartitionVertex method processAllEvents.

private void processAllEvents(String inputName, Multimap<Integer, InputSplit> bucketToGroupedSplitMap, boolean secondLevelGroupingDone) throws IOException {
    int totalInputsCount = 0;
    List<Integer> numSplitsForTask = new ArrayList<Integer>();
    for (Entry<Integer, Collection<InputSplit>> entry : bucketToGroupedSplitMap.asMap().entrySet()) {
        int bucketNum = entry.getKey();
        Collection<InputSplit> initialSplits = entry.getValue();
        finalSplits.addAll(initialSplits);
        for (InputSplit inputSplit : initialSplits) {
            bucketToTaskMap.put(bucketNum, taskCount);
            if (secondLevelGroupingDone) {
                TezGroupedSplit groupedSplit = (TezGroupedSplit) inputSplit;
                numSplitsForTask.add(groupedSplit.getGroupedSplits().size());
                totalInputsCount += groupedSplit.getGroupedSplits().size();
            } else {
                numSplitsForTask.add(1);
                totalInputsCount += 1;
            }
            taskCount++;
        }
    }
    inputNameInputSpecMap.put(inputName, InputSpecUpdate.createPerTaskInputSpecUpdate(numSplitsForTask));
    // Construct the EdgeManager descriptor to be used by all edges which need
    // the routing table.
    EdgeManagerPluginDescriptor hiveEdgeManagerDesc = null;
    if ((vertexType == VertexType.MULTI_INPUT_INITIALIZED_EDGES) || (vertexType == VertexType.INITIALIZED_EDGES)) {
        hiveEdgeManagerDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName());
        UserPayload payload = getBytePayload(bucketToTaskMap);
        hiveEdgeManagerDesc.setUserPayload(payload);
    }
    // Replace the edge manager for all vertices which have routing type custom.
    for (Entry<String, EdgeProperty> edgeEntry : context.getInputVertexEdgeProperties().entrySet()) {
        if (edgeEntry.getValue().getDataMovementType() == DataMovementType.CUSTOM && edgeEntry.getValue().getEdgeManagerDescriptor().getClassName().equals(CustomPartitionEdge.class.getName())) {
            emMap.put(edgeEntry.getKey(), hiveEdgeManagerDesc);
        }
    }
    LOG.info("Task count is " + taskCount + " for input name: " + inputName);
    List<InputDataInformationEvent> taskEvents = Lists.newArrayListWithCapacity(totalInputsCount);
    // Re-serialize the splits after grouping.
    int count = 0;
    for (InputSplit inputSplit : finalSplits) {
        if (secondLevelGroupingDone) {
            TezGroupedSplit tezGroupedSplit = (TezGroupedSplit) inputSplit;
            for (InputSplit subSplit : tezGroupedSplit.getGroupedSplits()) {
                if ((subSplit instanceof TezGroupedSplit) == false) {
                    throw new IOException("Unexpected split type found: " + subSplit.getClass().getCanonicalName());
                }
                MRSplitProto serializedSplit = MRInputHelpers.createSplitProto(subSplit);
                InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count, serializedSplit.toByteString().asReadOnlyByteBuffer());
                diEvent.setTargetIndex(count);
                taskEvents.add(diEvent);
            }
        } else {
            MRSplitProto serializedSplit = MRInputHelpers.createSplitProto(inputSplit);
            InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count, serializedSplit.toByteString().asReadOnlyByteBuffer());
            diEvent.setTargetIndex(count);
            taskEvents.add(diEvent);
        }
        count++;
    }
    // Set the actual events for the tasks.
    LOG.info("For input name: " + inputName + " task events size is " + taskEvents.size());
    context.addRootInputEvents(inputName, taskEvents);
    if (inputToGroupedSplitMap.isEmpty() == false) {
        for (Entry<String, Multimap<Integer, InputSplit>> entry : inputToGroupedSplitMap.entrySet()) {
            processAllSideEvents(entry.getKey(), entry.getValue());
        }
        setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
        inputToGroupedSplitMap.clear();
    }
    // Only done when it is a bucket map join only no SMB.
    if (numInputsAffectingRootInputSpecUpdate == 1) {
        setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
    }
}
Also used : UserPayload(org.apache.tez.dag.api.UserPayload) ArrayList(java.util.ArrayList) TezGroupedSplit(org.apache.hadoop.mapred.split.TezGroupedSplit) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) ArrayListMultimap(com.google.common.collect.ArrayListMultimap) HashMultimap(com.google.common.collect.HashMultimap) LinkedListMultimap(com.google.common.collect.LinkedListMultimap) Multimap(com.google.common.collect.Multimap) EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) Collection(java.util.Collection) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) InputSplit(org.apache.hadoop.mapred.InputSplit) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)

Example 3 with UserPayload

use of org.apache.tez.dag.api.UserPayload in project hive by apache.

the class HivePreWarmProcessor method initialize.

@Override
public void initialize() throws Exception {
    UserPayload userPayload = getContext().getUserPayload();
    this.conf = TezUtils.createConfFromUserPayload(userPayload);
}
Also used : UserPayload(org.apache.tez.dag.api.UserPayload)

Example 4 with UserPayload

use of org.apache.tez.dag.api.UserPayload in project hive by apache.

the class TestConverters method testTaskSpecToFragmentSpec.

@Test(timeout = 10000)
public void testTaskSpecToFragmentSpec() {
    ByteBuffer procBb = ByteBuffer.allocate(4);
    procBb.putInt(0, 200);
    UserPayload processorPayload = UserPayload.create(procBb);
    ProcessorDescriptor processorDescriptor = ProcessorDescriptor.create("fakeProcessorName").setUserPayload(processorPayload);
    ByteBuffer input1Bb = ByteBuffer.allocate(4);
    input1Bb.putInt(0, 300);
    UserPayload input1Payload = UserPayload.create(input1Bb);
    InputDescriptor id1 = InputDescriptor.create("input1ClassName").setUserPayload(input1Payload);
    InputSpec inputSpec1 = new InputSpec("sourceVertexName1", id1, 33);
    InputSpec inputSpec2 = new InputSpec("sourceVertexName2", id1, 44);
    List<InputSpec> inputSpecList = Lists.newArrayList(inputSpec1, inputSpec2);
    ByteBuffer output1Bb = ByteBuffer.allocate(4);
    output1Bb.putInt(0, 400);
    UserPayload output1Payload = UserPayload.create(output1Bb);
    OutputDescriptor od1 = OutputDescriptor.create("output1ClassName").setUserPayload(output1Payload);
    OutputSpec outputSpec1 = new OutputSpec("destVertexName1", od1, 55);
    OutputSpec outputSpec2 = new OutputSpec("destVertexName2", od1, 66);
    List<OutputSpec> outputSpecList = Lists.newArrayList(outputSpec1, outputSpec2);
    ApplicationId appId = ApplicationId.newInstance(1000, 100);
    TezDAGID tezDagId = TezDAGID.getInstance(appId, 300);
    TezVertexID tezVertexId = TezVertexID.getInstance(tezDagId, 400);
    TezTaskID tezTaskId = TezTaskID.getInstance(tezVertexId, 500);
    TezTaskAttemptID tezTaskAttemptId = TezTaskAttemptID.getInstance(tezTaskId, 600);
    TaskSpec taskSpec = new TaskSpec(tezTaskAttemptId, "dagName", "vertexName", 10, processorDescriptor, inputSpecList, outputSpecList, null);
    QueryIdentifierProto queryIdentifierProto = QueryIdentifierProto.newBuilder().setApplicationIdString(appId.toString()).setAppAttemptNumber(333).setDagIndex(300).build();
    SignableVertexSpec vertexProto = Converters.constructSignableVertexSpec(taskSpec, queryIdentifierProto, "", "", "hiveQueryId").build();
    assertEquals("dagName", vertexProto.getDagName());
    assertEquals("vertexName", vertexProto.getVertexName());
    assertEquals("hiveQueryId", vertexProto.getHiveQueryId());
    assertEquals(appId.toString(), vertexProto.getQueryIdentifier().getApplicationIdString());
    assertEquals(tezDagId.getId(), vertexProto.getQueryIdentifier().getDagIndex());
    assertEquals(333, vertexProto.getQueryIdentifier().getAppAttemptNumber());
    assertEquals(tezVertexId.getId(), vertexProto.getVertexIndex());
    assertEquals(processorDescriptor.getClassName(), vertexProto.getProcessorDescriptor().getClassName());
    assertEquals(processorDescriptor.getUserPayload().getPayload(), vertexProto.getProcessorDescriptor().getUserPayload().getUserPayload().asReadOnlyByteBuffer());
    assertEquals(2, vertexProto.getInputSpecsCount());
    assertEquals(2, vertexProto.getOutputSpecsCount());
    verifyInputSpecAndProto(inputSpec1, vertexProto.getInputSpecs(0));
    verifyInputSpecAndProto(inputSpec2, vertexProto.getInputSpecs(1));
    verifyOutputSpecAndProto(outputSpec1, vertexProto.getOutputSpecs(0));
    verifyOutputSpecAndProto(outputSpec2, vertexProto.getOutputSpecs(1));
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) UserPayload(org.apache.tez.dag.api.UserPayload) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) ByteBuffer(java.nio.ByteBuffer) TezTaskID(org.apache.tez.dag.records.TezTaskID) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) TezDAGID(org.apache.tez.dag.records.TezDAGID) SignableVertexSpec(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec) QueryIdentifierProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezVertexID(org.apache.tez.dag.records.TezVertexID) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 5 with UserPayload

use of org.apache.tez.dag.api.UserPayload in project hive by apache.

the class Converters method convertOutputDescriptorFromProto.

private static OutputDescriptor convertOutputDescriptorFromProto(EntityDescriptorProto proto) {
    String className = proto.getClassName();
    UserPayload payload = convertPayloadFromProto(proto);
    OutputDescriptor od = OutputDescriptor.create(className);
    setUserPayload(od, payload);
    return od;
}
Also used : UserPayload(org.apache.tez.dag.api.UserPayload) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) ByteString(com.google.protobuf.ByteString)

Aggregations

UserPayload (org.apache.tez.dag.api.UserPayload)12 ByteString (com.google.protobuf.ByteString)4 IOException (java.io.IOException)4 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)3 HashMap (java.util.HashMap)2 Configuration (org.apache.hadoop.conf.Configuration)2 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)2 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)2 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)2 ProcessorDescriptor (org.apache.tez.dag.api.ProcessorDescriptor)2 Test (org.junit.Test)2 ArrayListMultimap (com.google.common.collect.ArrayListMultimap)1 HashMultimap (com.google.common.collect.HashMultimap)1 LinkedListMultimap (com.google.common.collect.LinkedListMultimap)1 Multimap (com.google.common.collect.Multimap)1 FileNotFoundException (java.io.FileNotFoundException)1 URISyntaxException (java.net.URISyntaxException)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1