Search in sources :

Example 1 with VertexManagerPluginDescriptor

use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.

the class DagUtils method createEdge.

/**
   * Given a Vertex group and a vertex createEdge will create an
   * Edge between them.
   *
   * @param group The parent VertexGroup
   * @param vConf The job conf of one of the parrent (grouped) vertices
   * @param w The child vertex
   * @param edgeProp the edge property of connection between the two
   * endpoints.
   */
@SuppressWarnings("rawtypes")
public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, TezEdgeProperty edgeProp, VertexType vertexType) throws IOException {
    Class mergeInputClass;
    LOG.info("Creating Edge between " + group.getGroupName() + " and " + w.getName());
    EdgeType edgeType = edgeProp.getEdgeType();
    switch(edgeType) {
        case BROADCAST_EDGE:
            mergeInputClass = ConcatenatedMergedKeyValueInput.class;
            break;
        case CUSTOM_EDGE:
            {
                mergeInputClass = ConcatenatedMergedKeyValueInput.class;
                int numBuckets = edgeProp.getNumBuckets();
                CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, vertexType);
                DataOutputBuffer dob = new DataOutputBuffer();
                vertexConf.write(dob);
                VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
                byte[] userPayloadBytes = dob.getData();
                ByteBuffer userPayload = ByteBuffer.wrap(userPayloadBytes);
                desc.setUserPayload(UserPayload.create(userPayload));
                w.setVertexManagerPlugin(desc);
                break;
            }
        case CUSTOM_SIMPLE_EDGE:
            mergeInputClass = ConcatenatedMergedKeyValueInput.class;
            break;
        case SIMPLE_EDGE:
            setupAutoReducerParallelism(edgeProp, w);
        default:
            mergeInputClass = TezMergedLogicalInput.class;
            break;
    }
    return GroupInputEdge.create(group, w, createEdgeProperty(edgeProp, vConf), InputDescriptor.create(mergeInputClass.getName()));
}
Also used : ConcatenatedMergedKeyValueInput(org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValueInput) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) VertexManagerPluginDescriptor(org.apache.tez.dag.api.VertexManagerPluginDescriptor) EdgeType(org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType) ByteBuffer(java.nio.ByteBuffer)

Example 2 with VertexManagerPluginDescriptor

use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project tez by apache.

the class VertexImpl method assignVertexManager.

private void assignVertexManager() throws TezException {
    // VertexReconfigureDoneEvent will be logged
    if (recoveryData != null && recoveryData.shouldSkipInit()) {
        // Replace the original VertexManager with NoOpVertexManager if the reconfiguration is done in the last AM attempt
        VertexConfigurationDoneEvent reconfigureDoneEvent = recoveryData.getVertexConfigurationDoneEvent();
        if (LOG.isInfoEnabled()) {
            LOG.info("VertexManager reconfiguration is done in the last AM Attempt" + ", use NoOpVertexManager to replace it, vertexId=" + logIdentifier);
            LOG.info("VertexReconfigureDoneEvent=" + reconfigureDoneEvent);
        }
        NonSyncByteArrayOutputStream out = new NonSyncByteArrayOutputStream();
        try {
            reconfigureDoneEvent.toProtoStream(out);
        } catch (IOException e) {
            throw new TezUncheckedException("Unable to deserilize VertexReconfigureDoneEvent");
        }
        this.vertexManager = new VertexManager(VertexManagerPluginDescriptor.create(NoOpVertexManager.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(out.toByteArray()))), dagUgi, this, appContext, stateChangeNotifier);
        return;
    }
    boolean hasBipartite = false;
    boolean hasOneToOne = false;
    boolean hasCustom = false;
    if (sourceVertices != null) {
        for (Edge edge : sourceVertices.values()) {
            switch(edge.getEdgeProperty().getDataMovementType()) {
                case SCATTER_GATHER:
                    hasBipartite = true;
                    break;
                case ONE_TO_ONE:
                    hasOneToOne = true;
                    break;
                case BROADCAST:
                    break;
                case CUSTOM:
                    hasCustom = true;
                    break;
                default:
                    throw new TezUncheckedException("Unknown data movement type: " + edge.getEdgeProperty().getDataMovementType());
            }
        }
    }
    boolean hasUserVertexManager = vertexPlan.hasVertexManagerPlugin();
    if (hasUserVertexManager) {
        VertexManagerPluginDescriptor pluginDesc = DagTypeConverters.convertVertexManagerPluginDescriptorFromDAGPlan(vertexPlan.getVertexManagerPlugin());
        LOG.info("Setting user vertex manager plugin: " + pluginDesc.getClassName() + " on vertex: " + getLogIdentifier());
        vertexManager = new VertexManager(pluginDesc, dagUgi, this, appContext, stateChangeNotifier);
    } else {
        // Else we use the default ImmediateStartVertexManager
        if (inputsWithInitializers != null) {
            LOG.info("Setting vertexManager to RootInputVertexManager for " + logIdentifier);
            vertexManager = new VertexManager(RootInputVertexManager.createConfigBuilder(vertexConf).build(), dagUgi, this, appContext, stateChangeNotifier);
        } else if (hasOneToOne && !hasCustom) {
            LOG.info("Setting vertexManager to InputReadyVertexManager for " + logIdentifier);
            vertexManager = new VertexManager(VertexManagerPluginDescriptor.create(InputReadyVertexManager.class.getName()), dagUgi, this, appContext, stateChangeNotifier);
        } else if (hasBipartite && !hasCustom) {
            LOG.info("Setting vertexManager to ShuffleVertexManager for " + logIdentifier);
            // shuffle vertex manager needs a conf payload
            vertexManager = new VertexManager(ShuffleVertexManager.createConfigBuilder(vertexConf).build(), dagUgi, this, appContext, stateChangeNotifier);
        } else {
            // schedule all tasks upon vertex start. Default behavior.
            LOG.info("Setting vertexManager to ImmediateStartVertexManager for " + logIdentifier);
            vertexManager = new VertexManager(VertexManagerPluginDescriptor.create(ImmediateStartVertexManager.class.getName()), dagUgi, this, appContext, stateChangeNotifier);
        }
    }
}
Also used : TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) NonSyncByteArrayOutputStream(org.apache.tez.common.io.NonSyncByteArrayOutputStream) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) VertexManagerPluginDescriptor(org.apache.tez.dag.api.VertexManagerPluginDescriptor) IOException(java.io.IOException) InputReadyVertexManager(org.apache.tez.dag.library.vertexmanager.InputReadyVertexManager) ShuffleVertexManager(org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager)

Example 3 with VertexManagerPluginDescriptor

use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.

the class DagUtils method setupQuickStart.

private void setupQuickStart(TezEdgeProperty edgeProp, Vertex v) throws IOException {
    if (!edgeProp.isSlowStart()) {
        Configuration pluginConf = new Configuration(false);
        VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(ShuffleVertexManager.class.getName());
        pluginConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION, 0);
        pluginConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION, 0);
        UserPayload payload = TezUtils.createUserPayloadFromConf(pluginConf);
        desc.setUserPayload(payload);
        v.setVertexManagerPlugin(desc);
    }
}
Also used : ShuffleVertexManager(org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) UserPayload(org.apache.tez.dag.api.UserPayload) VertexManagerPluginDescriptor(org.apache.tez.dag.api.VertexManagerPluginDescriptor)

Example 4 with VertexManagerPluginDescriptor

use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.

the class DagUtils method createVertex.

private Vertex createVertex(JobConf conf, MergeJoinWork mergeJoinWork, LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fs, Path mrScratchDir, Context ctx, VertexType vertexType) throws Exception {
    Utilities.setMergeWork(conf, mergeJoinWork, mrScratchDir, false);
    if (mergeJoinWork.getMainWork() instanceof MapWork) {
        List<BaseWork> mapWorkList = mergeJoinWork.getBaseWorkList();
        MapWork mapWork = (MapWork) (mergeJoinWork.getMainWork());
        Vertex mergeVx = createVertex(conf, mapWork, appJarLr, additionalLr, fs, mrScratchDir, ctx, vertexType);
        conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
        // mapreduce.tez.input.initializer.serialize.event.payload should be set
        // to false when using this plug-in to avoid getting a serialized event at run-time.
        conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
        for (int i = 0; i < mapWorkList.size(); i++) {
            mapWork = (MapWork) (mapWorkList.get(i));
            conf.set(TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX, mapWork.getName());
            conf.set(Utilities.INPUT_NAME, mapWork.getName());
            LOG.info("Going through each work and adding MultiMRInput");
            mergeVx.addDataSource(mapWork.getName(), MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());
        }
        VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
        // the +1 to the size is because of the main work.
        CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf().getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(), mapWorkList.size() + 1);
        DataOutputBuffer dob = new DataOutputBuffer();
        vertexConf.write(dob);
        byte[] userPayload = dob.getData();
        desc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
        mergeVx.setVertexManagerPlugin(desc);
        return mergeVx;
    } else {
        Vertex mergeVx = createVertex(conf, (ReduceWork) mergeJoinWork.getMainWork(), appJarLr, additionalLr, fs, mrScratchDir, ctx);
        return mergeVx;
    }
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) PreWarmVertex(org.apache.tez.dag.api.PreWarmVertex) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) VertexManagerPluginDescriptor(org.apache.tez.dag.api.VertexManagerPluginDescriptor) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Example 5 with VertexManagerPluginDescriptor

use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.

the class DagUtils method createVertex.

private Vertex createVertex(JobConf conf, MergeJoinWork mergeJoinWork, FileSystem fs, Path mrScratchDir, Context ctx, VertexType vertexType, Map<String, LocalResource> localResources) throws Exception {
    Utilities.setMergeWork(conf, mergeJoinWork, mrScratchDir, false);
    if (mergeJoinWork.getMainWork() instanceof MapWork) {
        List<BaseWork> mapWorkList = mergeJoinWork.getBaseWorkList();
        MapWork mapWork = (MapWork) (mergeJoinWork.getMainWork());
        Vertex mergeVx = createVertex(conf, mapWork, fs, mrScratchDir, ctx, vertexType, localResources);
        conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
        // mapreduce.tez.input.initializer.serialize.event.payload should be set
        // to false when using this plug-in to avoid getting a serialized event at run-time.
        conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
        for (int i = 0; i < mapWorkList.size(); i++) {
            mapWork = (MapWork) (mapWorkList.get(i));
            conf.set(TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX, mapWork.getName());
            conf.set(Utilities.INPUT_NAME, mapWork.getName());
            LOG.info("Going through each work and adding MultiMRInput");
            mergeVx.addDataSource(mapWork.getName(), MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());
        }
        // To be populated for SMB joins only for all the small tables
        Map<String, Integer> inputToBucketMap = new HashMap<>();
        if (mergeJoinWork.getMergeJoinOperator().getParentOperators().size() == 1 && mergeJoinWork.getMergeJoinOperator().getOpTraits() != null) {
            // This is an SMB join.
            for (BaseWork work : mapWorkList) {
                MapWork mw = (MapWork) work;
                Map<String, Operator<?>> aliasToWork = mw.getAliasToWork();
                Preconditions.checkState(aliasToWork.size() == 1, "More than 1 alias in SMB mapwork");
                inputToBucketMap.put(mw.getName(), mw.getWorks().get(0).getOpTraits().getNumBuckets());
            }
        }
        VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
        // the +1 to the size is because of the main work.
        CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf().getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(), mapWorkList.size() + 1, inputToBucketMap);
        DataOutputBuffer dob = new DataOutputBuffer();
        vertexConf.write(dob);
        byte[] userPayload = dob.getData();
        desc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
        mergeVx.setVertexManagerPlugin(desc);
        return mergeVx;
    } else {
        return createVertex(conf, (ReduceWork) mergeJoinWork.getMainWork(), fs, mrScratchDir, ctx, localResources);
    }
}
Also used : Operator(org.apache.hadoop.hive.ql.exec.Operator) Vertex(org.apache.tez.dag.api.Vertex) PreWarmVertex(org.apache.tez.dag.api.PreWarmVertex) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) VertexManagerPluginDescriptor(org.apache.tez.dag.api.VertexManagerPluginDescriptor) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Aggregations

VertexManagerPluginDescriptor (org.apache.tez.dag.api.VertexManagerPluginDescriptor)8 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)5 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)3 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)3 PreWarmVertex (org.apache.tez.dag.api.PreWarmVertex)3 Vertex (org.apache.tez.dag.api.Vertex)3 ShuffleVertexManager (org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager)3 ByteBuffer (java.nio.ByteBuffer)2 HashMap (java.util.HashMap)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 Configuration (org.apache.hadoop.conf.Configuration)2 Operator (org.apache.hadoop.hive.ql.exec.Operator)2 EdgeType (org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType)2 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)2 UserPayload (org.apache.tez.dag.api.UserPayload)2 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)2 ConcatenatedMergedKeyValueInput (org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValueInput)2 IOException (java.io.IOException)1 LinkedHashMap (java.util.LinkedHashMap)1 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)1