use of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto in project tez by apache.
the class MRInputAMSplitGenerator method initialize.
@Override
public List<Event> initialize() throws Exception {
StopWatch sw = new StopWatch().start();
MRInputUserPayloadProto userPayloadProto = MRInputHelpers.parseMRInputPayload(getContext().getInputUserPayload());
sw.stop();
if (LOG.isDebugEnabled()) {
LOG.debug("Time to parse MRInput payload into prot: " + sw.now(TimeUnit.MILLISECONDS));
}
sw.reset().start();
Configuration conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes());
sendSerializedEvents = conf.getBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
sw.stop();
if (LOG.isDebugEnabled()) {
LOG.debug("Emitting serialized splits: " + sendSerializedEvents + " for input " + getContext().getInputName());
LOG.debug("Time converting ByteString to configuration: " + sw.now(TimeUnit.MILLISECONDS));
}
sw.reset().start();
int totalResource = getContext().getTotalAvailableResource().getMemory();
int taskResource = getContext().getVertexTaskResource().getMemory();
float waves = conf.getFloat(TezSplitGrouper.TEZ_GROUPING_SPLIT_WAVES, TezSplitGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT);
int numTasks = (int) ((totalResource * waves) / taskResource);
boolean groupSplits = userPayloadProto.getGroupingEnabled();
boolean sortSplits = userPayloadProto.getSortSplitsEnabled();
LOG.info("Input " + getContext().getInputName() + " asking for " + numTasks + " tasks. Headroom: " + totalResource + ". Task Resource: " + taskResource + ". waves: " + waves + ". groupingEnabled: " + groupSplits + ". SortSplitsEnabled: " + sortSplits);
// Read all credentials into the credentials instance stored in JobConf.
JobConf jobConf = new JobConf(conf);
jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
InputSplitInfoMem inputSplitInfo = null;
inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, groupSplits, sortSplits, groupSplits ? numTasks : 0);
sw.stop();
if (LOG.isDebugEnabled()) {
LOG.debug("Time to create splits to mem: " + sw.now(TimeUnit.MILLISECONDS));
}
List<Event> events = Lists.newArrayListWithCapacity(inputSplitInfo.getNumTasks() + 1);
InputConfigureVertexTasksEvent configureVertexEvent = InputConfigureVertexTasksEvent.create(inputSplitInfo.getNumTasks(), VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate());
events.add(configureVertexEvent);
if (sendSerializedEvents) {
MRSplitsProto splitsProto = inputSplitInfo.getSplitsProto();
int count = 0;
for (MRSplitProto mrSplit : splitsProto.getSplitsList()) {
// Unnecessary array copy, can be avoided by using ByteBuffer instead of a raw array.
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count++, mrSplit.toByteString().asReadOnlyByteBuffer());
events.add(diEvent);
}
} else {
int count = 0;
if (inputSplitInfo.holdsNewFormatSplits()) {
for (org.apache.hadoop.mapreduce.InputSplit split : inputSplitInfo.getNewFormatSplits()) {
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++, split);
events.add(diEvent);
}
} else {
for (org.apache.hadoop.mapred.InputSplit split : inputSplitInfo.getOldFormatSplits()) {
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++, split);
events.add(diEvent);
}
}
}
return events;
}
use of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto in project hive by apache.
the class HiveSplitGenerator method createEventList.
private List<Event> createEventList(boolean sendSerializedEvents, InputSplitInfoMem inputSplitInfo) {
List<Event> events = Lists.newArrayListWithCapacity(inputSplitInfo.getNumTasks() + 1);
InputConfigureVertexTasksEvent configureVertexEvent = InputConfigureVertexTasksEvent.create(inputSplitInfo.getNumTasks(), VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate());
events.add(configureVertexEvent);
if (sendSerializedEvents) {
MRSplitsProto splitsProto = inputSplitInfo.getSplitsProto();
int count = 0;
for (MRSplitProto mrSplit : splitsProto.getSplitsList()) {
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count++, mrSplit.toByteString().asReadOnlyByteBuffer());
events.add(diEvent);
}
} else {
int count = 0;
for (org.apache.hadoop.mapred.InputSplit split : inputSplitInfo.getOldFormatSplits()) {
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++, split);
events.add(diEvent);
}
}
return events;
}
Aggregations