Search in sources :

Example 11 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class ProtoConverters method convertRootInputDataInformationEventFromProto.

public static InputDataInformationEvent convertRootInputDataInformationEventFromProto(EventProtos.RootInputDataInformationEventProto proto) {
    InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(proto.getSourceIndex(), proto.hasUserPayload() ? proto.getUserPayload().asReadOnlyByteBuffer() : null);
    diEvent.setTargetIndex(proto.getTargetIndex());
    return diEvent;
}
Also used : InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 12 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class MRInputAMSplitGenerator method initialize.

@Override
public List<Event> initialize() throws Exception {
    StopWatch sw = new StopWatch().start();
    MRInputUserPayloadProto userPayloadProto = MRInputHelpers.parseMRInputPayload(getContext().getInputUserPayload());
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Time to parse MRInput payload into prot: " + sw.now(TimeUnit.MILLISECONDS));
    }
    sw.reset().start();
    Configuration conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes());
    sendSerializedEvents = conf.getBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Emitting serialized splits: " + sendSerializedEvents + " for input " + getContext().getInputName());
        LOG.debug("Time converting ByteString to configuration: " + sw.now(TimeUnit.MILLISECONDS));
    }
    sw.reset().start();
    int totalResource = getContext().getTotalAvailableResource().getMemory();
    int taskResource = getContext().getVertexTaskResource().getMemory();
    float waves = conf.getFloat(TezSplitGrouper.TEZ_GROUPING_SPLIT_WAVES, TezSplitGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT);
    int numTasks = (int) ((totalResource * waves) / taskResource);
    boolean groupSplits = userPayloadProto.getGroupingEnabled();
    boolean sortSplits = userPayloadProto.getSortSplitsEnabled();
    LOG.info("Input " + getContext().getInputName() + " asking for " + numTasks + " tasks. Headroom: " + totalResource + ". Task Resource: " + taskResource + ". waves: " + waves + ". groupingEnabled: " + groupSplits + ". SortSplitsEnabled: " + sortSplits);
    // Read all credentials into the credentials instance stored in JobConf.
    JobConf jobConf = new JobConf(conf);
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
    InputSplitInfoMem inputSplitInfo = null;
    inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, groupSplits, sortSplits, groupSplits ? numTasks : 0);
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Time to create splits to mem: " + sw.now(TimeUnit.MILLISECONDS));
    }
    List<Event> events = Lists.newArrayListWithCapacity(inputSplitInfo.getNumTasks() + 1);
    InputConfigureVertexTasksEvent configureVertexEvent = InputConfigureVertexTasksEvent.create(inputSplitInfo.getNumTasks(), VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate());
    events.add(configureVertexEvent);
    if (sendSerializedEvents) {
        MRSplitsProto splitsProto = inputSplitInfo.getSplitsProto();
        int count = 0;
        for (MRSplitProto mrSplit : splitsProto.getSplitsList()) {
            // Unnecessary array copy, can be avoided by using ByteBuffer instead of a raw array.
            InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count++, mrSplit.toByteString().asReadOnlyByteBuffer());
            events.add(diEvent);
        }
    } else {
        int count = 0;
        if (inputSplitInfo.holdsNewFormatSplits()) {
            for (org.apache.hadoop.mapreduce.InputSplit split : inputSplitInfo.getNewFormatSplits()) {
                InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++, split);
                events.add(diEvent);
            }
        } else {
            for (org.apache.hadoop.mapred.InputSplit split : inputSplitInfo.getOldFormatSplits()) {
                InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++, split);
                events.add(diEvent);
            }
        }
    }
    return events;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) StopWatch(org.apache.tez.util.StopWatch) MRSplitsProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto) InputSplitInfoMem(org.apache.tez.mapreduce.hadoop.InputSplitInfoMem) MRInputUserPayloadProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto) InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) JobConf(org.apache.hadoop.mapred.JobConf) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 13 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project hive by apache.

the class HiveSplitGenerator method createEventList.

private List<Event> createEventList(boolean sendSerializedEvents, InputSplitInfoMem inputSplitInfo) {
    List<Event> events = Lists.newArrayListWithCapacity(inputSplitInfo.getNumTasks() + 1);
    InputConfigureVertexTasksEvent configureVertexEvent = InputConfigureVertexTasksEvent.create(inputSplitInfo.getNumTasks(), VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate());
    events.add(configureVertexEvent);
    if (sendSerializedEvents) {
        MRSplitsProto splitsProto = inputSplitInfo.getSplitsProto();
        int count = 0;
        for (MRSplitProto mrSplit : splitsProto.getSplitsList()) {
            InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count++, mrSplit.toByteString().asReadOnlyByteBuffer());
            events.add(diEvent);
        }
    } else {
        int count = 0;
        for (org.apache.hadoop.mapred.InputSplit split : inputSplitInfo.getOldFormatSplits()) {
            InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++, split);
            events.add(diEvent);
        }
    }
    return events;
}
Also used : MRSplitsProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto) InputSplit(org.apache.hadoop.mapred.InputSplit) InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 14 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project hive by apache.

the class GenericUDTFGetSplits method makeEventBytes.

private SignedMessage makeEventBytes(Vertex wx, String vertexName, Event event, LlapSigner signer) throws IOException {
    assert event instanceof InputDataInformationEvent;
    List<RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>> inputs = TaskSpecBuilder.getVertexInputs(wx);
    Preconditions.checkState(inputs.size() == 1);
    Signable signableNte = NotTezEventHelper.createSignableNotTezEvent((InputDataInformationEvent) event, vertexName, inputs.get(0).getName());
    if (signer != null) {
        return signer.serializeAndSign(signableNte);
    } else {
        SignedMessage sm = new SignedMessage();
        sm.message = signableNte.serialize();
        return sm;
    }
}
Also used : RootInputLeafOutput(org.apache.tez.dag.api.RootInputLeafOutput) SignedMessage(org.apache.hadoop.hive.llap.security.LlapSigner.SignedMessage) Signable(org.apache.hadoop.hive.llap.security.LlapSigner.Signable) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 15 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project hive by apache.

the class NotTezEventHelper method toTezEvent.

public static TezEvent toTezEvent(NotTezEvent nte) throws InvalidProtocolBufferException {
    EventMetaData sourceMetaData = new EventMetaData(EventMetaData.EventProducerConsumerType.INPUT, nte.getVertexName(), "NULL_VERTEX", null);
    EventMetaData destMetaData = new EventMetaData(EventMetaData.EventProducerConsumerType.INPUT, nte.getVertexName(), nte.getDestInputName(), null);
    InputDataInformationEvent event = ProtoConverters.convertRootInputDataInformationEventFromProto(RootInputDataInformationEventProto.parseFrom(nte.getInputEventProtoBytes()));
    TezEvent tezEvent = new TezEvent(event, sourceMetaData, System.currentTimeMillis());
    tezEvent.setDestinationInfo(destMetaData);
    return tezEvent;
}
Also used : NotTezEvent(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.NotTezEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Aggregations

InputDataInformationEvent (org.apache.tez.runtime.api.events.InputDataInformationEvent)22 Event (org.apache.tez.runtime.api.Event)16 MRSplitProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)10 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)8 InputSplit (org.apache.hadoop.mapred.InputSplit)7 JobConf (org.apache.hadoop.mapred.JobConf)5 InputContext (org.apache.tez.runtime.api.InputContext)5 InputConfigureVertexTasksEvent (org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent)5 LinkedList (java.util.LinkedList)4 AtomicLong (java.util.concurrent.atomic.AtomicLong)4 Path (org.apache.hadoop.fs.Path)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 SequenceFileInputFormat (org.apache.hadoop.mapred.SequenceFileInputFormat)4 UserPayload (org.apache.tez.dag.api.UserPayload)4 MRInputUserPayloadProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto)4 MRSplitsProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto)4 ByteString (com.google.protobuf.ByteString)3 ArrayList (java.util.ArrayList)3