use of org.apache.tez.dag.api.EdgeManagerPluginDescriptor in project hive by apache.
the class CustomPartitionVertex method processAllEvents.
private void processAllEvents(String inputName, Multimap<Integer, InputSplit> bucketToGroupedSplitMap, boolean secondLevelGroupingDone) throws IOException {
int totalInputsCount = 0;
List<Integer> numSplitsForTask = new ArrayList<Integer>();
for (Entry<Integer, Collection<InputSplit>> entry : bucketToGroupedSplitMap.asMap().entrySet()) {
int bucketNum = entry.getKey();
Collection<InputSplit> initialSplits = entry.getValue();
finalSplits.addAll(initialSplits);
for (InputSplit inputSplit : initialSplits) {
bucketToTaskMap.put(bucketNum, taskCount);
if (secondLevelGroupingDone) {
TezGroupedSplit groupedSplit = (TezGroupedSplit) inputSplit;
numSplitsForTask.add(groupedSplit.getGroupedSplits().size());
totalInputsCount += groupedSplit.getGroupedSplits().size();
} else {
numSplitsForTask.add(1);
totalInputsCount += 1;
}
taskCount++;
}
}
inputNameInputSpecMap.put(inputName, InputSpecUpdate.createPerTaskInputSpecUpdate(numSplitsForTask));
// Construct the EdgeManager descriptor to be used by all edges which need
// the routing table.
EdgeManagerPluginDescriptor hiveEdgeManagerDesc = null;
if ((vertexType == VertexType.MULTI_INPUT_INITIALIZED_EDGES) || (vertexType == VertexType.INITIALIZED_EDGES)) {
hiveEdgeManagerDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName());
UserPayload payload = getBytePayload(bucketToTaskMap);
hiveEdgeManagerDesc.setUserPayload(payload);
}
// Replace the edge manager for all vertices which have routing type custom.
for (Entry<String, EdgeProperty> edgeEntry : context.getInputVertexEdgeProperties().entrySet()) {
if (edgeEntry.getValue().getDataMovementType() == DataMovementType.CUSTOM && edgeEntry.getValue().getEdgeManagerDescriptor().getClassName().equals(CustomPartitionEdge.class.getName())) {
emMap.put(edgeEntry.getKey(), hiveEdgeManagerDesc);
}
}
LOG.info("Task count is " + taskCount + " for input name: " + inputName);
List<InputDataInformationEvent> taskEvents = Lists.newArrayListWithCapacity(totalInputsCount);
// Re-serialize the splits after grouping.
int count = 0;
for (InputSplit inputSplit : finalSplits) {
if (secondLevelGroupingDone) {
TezGroupedSplit tezGroupedSplit = (TezGroupedSplit) inputSplit;
for (InputSplit subSplit : tezGroupedSplit.getGroupedSplits()) {
if ((subSplit instanceof TezGroupedSplit) == false) {
throw new IOException("Unexpected split type found: " + subSplit.getClass().getCanonicalName());
}
MRSplitProto serializedSplit = MRInputHelpers.createSplitProto(subSplit);
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count, serializedSplit.toByteString().asReadOnlyByteBuffer());
diEvent.setTargetIndex(count);
taskEvents.add(diEvent);
}
} else {
MRSplitProto serializedSplit = MRInputHelpers.createSplitProto(inputSplit);
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count, serializedSplit.toByteString().asReadOnlyByteBuffer());
diEvent.setTargetIndex(count);
taskEvents.add(diEvent);
}
count++;
}
// Set the actual events for the tasks.
LOG.info("For input name: " + inputName + " task events size is " + taskEvents.size());
context.addRootInputEvents(inputName, taskEvents);
if (inputToGroupedSplitMap.isEmpty() == false) {
for (Entry<String, Multimap<Integer, InputSplit>> entry : inputToGroupedSplitMap.entrySet()) {
processAllSideEvents(entry.getKey(), entry.getValue());
}
setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
inputToGroupedSplitMap.clear();
}
// Only done when it is a bucket map join only no SMB.
if (numInputsAffectingRootInputSpecUpdate == 1) {
setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
}
}
use of org.apache.tez.dag.api.EdgeManagerPluginDescriptor in project hive by apache.
the class DagUtils method createEdgeProperty.
/*
* Helper function to create an edge property from an edge type.
*/
private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration conf) throws IOException {
MRHelpers.translateMRConfToTez(conf);
String keyClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS);
String valClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS);
String partitionerClassName = conf.get("mapred.partitioner.class");
Map<String, String> partitionerConf;
EdgeType edgeType = edgeProp.getEdgeType();
switch(edgeType) {
case BROADCAST_EDGE:
UnorderedKVEdgeConfig et1Conf = UnorderedKVEdgeConfig.newBuilder(keyClass, valClass).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
return et1Conf.createDefaultBroadcastEdgeProperty();
case CUSTOM_EDGE:
assert partitionerClassName != null;
partitionerConf = createPartitionerConf(partitionerClassName, conf);
UnorderedPartitionedKVEdgeConfig et2Conf = UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
EdgeManagerPluginDescriptor edgeDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName());
CustomEdgeConfiguration edgeConf = new CustomEdgeConfiguration(edgeProp.getNumBuckets(), null);
DataOutputBuffer dob = new DataOutputBuffer();
edgeConf.write(dob);
byte[] userPayload = dob.getData();
edgeDesc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
return et2Conf.createDefaultCustomEdgeProperty(edgeDesc);
case CUSTOM_SIMPLE_EDGE:
assert partitionerClassName != null;
partitionerConf = createPartitionerConf(partitionerClassName, conf);
UnorderedPartitionedKVEdgeConfig et3Conf = UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
return et3Conf.createDefaultEdgeProperty();
case SIMPLE_EDGE:
default:
assert partitionerClassName != null;
partitionerConf = createPartitionerConf(partitionerClassName, conf);
OrderedPartitionedKVEdgeConfig et4Conf = OrderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), TezBytesComparator.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
return et4Conf.createDefaultEdgeProperty();
}
}
Aggregations