Search in sources :

Example 6 with VertexManagerPluginDescriptor

use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.

the class DagUtils method createEdge.

/**
 * Given a Vertex group and a vertex createEdge will create an
 * Edge between them.
 *
 * @param group The parent VertexGroup
 * @param vConf The job conf of one of the parrent (grouped) vertices
 * @param w The child vertex
 * @param edgeProp the edge property of connection between the two
 * endpoints.
 */
@SuppressWarnings("rawtypes")
public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, TezEdgeProperty edgeProp, BaseWork work, TezWork tezWork) throws IOException {
    Class mergeInputClass;
    LOG.info("Creating Edge between " + group.getGroupName() + " and " + w.getName());
    EdgeType edgeType = edgeProp.getEdgeType();
    switch(edgeType) {
        case BROADCAST_EDGE:
            mergeInputClass = ConcatenatedMergedKeyValueInput.class;
            break;
        case CUSTOM_EDGE:
            {
                mergeInputClass = ConcatenatedMergedKeyValueInput.class;
                int numBuckets = edgeProp.getNumBuckets();
                CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, tezWork.getVertexType(work));
                DataOutputBuffer dob = new DataOutputBuffer();
                vertexConf.write(dob);
                VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
                byte[] userPayloadBytes = dob.getData();
                ByteBuffer userPayload = ByteBuffer.wrap(userPayloadBytes);
                desc.setUserPayload(UserPayload.create(userPayload));
                w.setVertexManagerPlugin(desc);
                break;
            }
        case CUSTOM_SIMPLE_EDGE:
            mergeInputClass = ConcatenatedMergedKeyValueInput.class;
            break;
        case ONE_TO_ONE_EDGE:
            mergeInputClass = ConcatenatedMergedKeyValueInput.class;
            break;
        case XPROD_EDGE:
            mergeInputClass = ConcatenatedMergedKeyValueInput.class;
            break;
        case SIMPLE_EDGE:
            setupAutoReducerParallelism(edgeProp, w);
        default:
            mergeInputClass = TezMergedLogicalInput.class;
            break;
    }
    return GroupInputEdge.create(group, w, createEdgeProperty(w, edgeProp, vConf, work, tezWork), InputDescriptor.create(mergeInputClass.getName()));
}
Also used : ConcatenatedMergedKeyValueInput(org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValueInput) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) VertexManagerPluginDescriptor(org.apache.tez.dag.api.VertexManagerPluginDescriptor) EdgeType(org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType) ByteBuffer(java.nio.ByteBuffer)

Example 7 with VertexManagerPluginDescriptor

use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.

the class DagUtils method createVertexFromMergeWork.

private Vertex createVertexFromMergeWork(JobConf conf, MergeJoinWork mergeJoinWork, Path mrScratchDir, VertexType vertexType) throws Exception {
    Utilities.setMergeWork(conf, mergeJoinWork, mrScratchDir, false);
    if (mergeJoinWork.getMainWork() instanceof MapWork) {
        List<BaseWork> mapWorkList = mergeJoinWork.getBaseWorkList();
        MapWork mapWork = (MapWork) (mergeJoinWork.getMainWork());
        Vertex mergeVx = createVertexFromMapWork(conf, mapWork, mrScratchDir, vertexType);
        Class<?> inputFormatClass = conf.getClass("mapred.input.format.class", HiveInputFormat.class);
        if (inputFormatClass != BucketizedHiveInputFormat.class && inputFormatClass != HiveInputFormat.class) {
            // As of now only these two formats are supported.
            inputFormatClass = HiveInputFormat.class;
        }
        conf.setClass("mapred.input.format.class", inputFormatClass, InputFormat.class);
        // mapreduce.tez.input.initializer.serialize.event.payload should be set
        // to false when using this plug-in to avoid getting a serialized event at run-time.
        conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
        for (int i = 0; i < mapWorkList.size(); i++) {
            mapWork = (MapWork) (mapWorkList.get(i));
            conf.set(TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX, mapWork.getName());
            conf.set(Utilities.INPUT_NAME, mapWork.getName());
            LOG.info("Going through each work and adding MultiMRInput");
            mergeVx.addDataSource(mapWork.getName(), MultiMRInput.createConfigBuilder(conf, inputFormatClass).build());
        }
        // To be populated for SMB joins only for all the small tables
        Map<String, Integer> inputToBucketMap = new HashMap<>();
        if (mergeJoinWork.getMergeJoinOperator().getParentOperators().size() == 1 && mergeJoinWork.getMergeJoinOperator().getOpTraits() != null) {
            // This is an SMB join.
            for (BaseWork work : mapWorkList) {
                MapWork mw = (MapWork) work;
                Map<String, Operator<?>> aliasToWork = mw.getAliasToWork();
                Preconditions.checkState(aliasToWork.size() == 1, "More than 1 alias in SMB mapwork");
                inputToBucketMap.put(mw.getName(), mw.getWorks().get(0).getOpTraits().getNumBuckets());
            }
        }
        VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
        // the +1 to the size is because of the main work.
        CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf().getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(), mapWorkList.size() + 1, inputToBucketMap);
        DataOutputBuffer dob = new DataOutputBuffer();
        vertexConf.write(dob);
        byte[] userPayload = dob.getData();
        desc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
        mergeVx.setVertexManagerPlugin(desc);
        return mergeVx;
    } else {
        return createVertexFromReduceWork(conf, (ReduceWork) mergeJoinWork.getMainWork(), mrScratchDir);
    }
}
Also used : Operator(org.apache.hadoop.hive.ql.exec.Operator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Vertex(org.apache.tez.dag.api.Vertex) PreWarmVertex(org.apache.tez.dag.api.PreWarmVertex) BucketizedHiveInputFormat(org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) VertexManagerPluginDescriptor(org.apache.tez.dag.api.VertexManagerPluginDescriptor) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) BucketizedHiveInputFormat(org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Example 8 with VertexManagerPluginDescriptor

use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.

the class DagUtils method setupAutoReducerParallelism.

private void setupAutoReducerParallelism(TezEdgeProperty edgeProp, Vertex v) throws IOException {
    if (edgeProp.isAutoReduce()) {
        Configuration pluginConf = new Configuration(false);
        VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(ShuffleVertexManager.class.getName());
        pluginConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, true);
        pluginConf.setInt(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM, edgeProp.getMinReducer());
        pluginConf.setLong(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE, edgeProp.getInputSizePerReducer());
        UserPayload payload = TezUtils.createUserPayloadFromConf(pluginConf);
        desc.setUserPayload(payload);
        v.setVertexManagerPlugin(desc);
    }
}
Also used : ShuffleVertexManager(org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) UserPayload(org.apache.tez.dag.api.UserPayload) VertexManagerPluginDescriptor(org.apache.tez.dag.api.VertexManagerPluginDescriptor)

Aggregations

VertexManagerPluginDescriptor (org.apache.tez.dag.api.VertexManagerPluginDescriptor)8 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)5 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)3 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)3 PreWarmVertex (org.apache.tez.dag.api.PreWarmVertex)3 Vertex (org.apache.tez.dag.api.Vertex)3 ShuffleVertexManager (org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager)3 ByteBuffer (java.nio.ByteBuffer)2 HashMap (java.util.HashMap)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 Configuration (org.apache.hadoop.conf.Configuration)2 Operator (org.apache.hadoop.hive.ql.exec.Operator)2 EdgeType (org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType)2 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)2 UserPayload (org.apache.tez.dag.api.UserPayload)2 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)2 ConcatenatedMergedKeyValueInput (org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValueInput)2 IOException (java.io.IOException)1 LinkedHashMap (java.util.LinkedHashMap)1 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)1