Search in sources :

Example 6 with VertexStatus

use of org.apache.tez.dag.api.client.VertexStatus in project hive by apache.

the class TezTask method collectCommitInformation.

private void collectCommitInformation(TezWork work) throws IOException, TezException {
    for (BaseWork w : work.getAllWork()) {
        JobConf jobConf = workToConf.get(w);
        Vertex vertex = workToVertex.get(w);
        boolean hasIcebergCommitter = Optional.ofNullable(jobConf).map(JobConf::getOutputCommitter).map(Object::getClass).map(Class::getName).filter(name -> name.endsWith("HiveIcebergNoJobCommitter")).isPresent();
        // we should only consider jobs with Iceberg output committer and a data sink
        if (hasIcebergCommitter && !vertex.getDataSinks().isEmpty()) {
            VertexStatus status = dagClient.getVertexStatus(vertex.getName(), EnumSet.of(StatusGetOpts.GET_COUNTERS));
            String[] jobIdParts = status.getId().split("_");
            // status.getId() returns something like: vertex_1617722404520_0001_1_00
            // this should be transformed to a parsable JobID: job_16177224045200_0001
            int vertexId = Integer.parseInt(jobIdParts[jobIdParts.length - 1]);
            String jobId = String.format(JOB_ID_TEMPLATE, jobIdParts[1], vertexId, jobIdParts[2]);
            List<String> tables = new ArrayList<>();
            Map<String, String> icebergProperties = new HashMap<>();
            for (Map.Entry<String, String> entry : jobConf) {
                if (entry.getKey().startsWith(ICEBERG_SERIALIZED_TABLE_PREFIX)) {
                    // get all target tables this vertex wrote to
                    tables.add(entry.getKey().substring(ICEBERG_SERIALIZED_TABLE_PREFIX.length()));
                } else if (entry.getKey().startsWith(ICEBERG_PROPERTY_PREFIX)) {
                    // find iceberg props in jobConf as they can be needed, but not available, during job commit
                    icebergProperties.put(entry.getKey(), entry.getValue());
                }
            }
            // save information for each target table
            tables.forEach(table -> SessionStateUtil.addCommitInfo(jobConf, table, jobId, status.getProgress().getSucceededTaskCount(), icebergProperties));
        }
    }
}
Also used : Metrics(org.apache.hadoop.hive.common.metrics.common.Metrics) MetricsConstant(org.apache.hadoop.hive.common.metrics.common.MetricsConstant) DAGClient(org.apache.tez.dag.api.client.DAGClient) Arrays(java.util.Arrays) TezCounter(org.apache.tez.common.counters.TezCounter) VertexStatus(org.apache.tez.dag.api.client.VertexStatus) LoggerFactory(org.slf4j.LoggerFactory) CallerContext(org.apache.tez.client.CallerContext) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) JSONObject(org.json.JSONObject) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) MergeJoinWork(org.apache.hadoop.hive.ql.plan.MergeJoinWork) CounterGroup(org.apache.tez.common.counters.CounterGroup) Vertex(org.apache.tez.dag.api.Vertex) EnumSet(java.util.EnumSet) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) EdgeType(org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType) Edge(org.apache.tez.dag.api.Edge) Collection(java.util.Collection) HiveConfUtil(org.apache.hadoop.hive.conf.HiveConfUtil) Set(java.util.Set) TezJobMonitor(org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor) DAG(org.apache.tez.dag.api.DAG) SessionNotRunning(org.apache.tez.dag.api.SessionNotRunning) SessionState(org.apache.hadoop.hive.ql.session.SessionState) List(java.util.List) MetastoreConf(org.apache.hadoop.hive.metastore.conf.MetastoreConf) ServerUtils(org.apache.hadoop.hive.common.ServerUtils) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) DAGAccessControls(org.apache.tez.common.security.DAGAccessControls) Optional(java.util.Optional) SessionStateUtil(org.apache.hadoop.hive.ql.session.SessionStateUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) HashMap(java.util.HashMap) MappingInput(org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput) StageType(org.apache.hadoop.hive.ql.plan.api.StageType) ArrayList(java.util.ArrayList) Task(org.apache.hadoop.hive.ql.exec.Task) LinkedHashMap(java.util.LinkedHashMap) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) VertexGroup(org.apache.tez.dag.api.VertexGroup) TezWork(org.apache.hadoop.hive.ql.plan.TezWork) StringUtils(org.apache.hadoop.util.StringUtils) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) LinkedList(java.util.LinkedList) Nullable(javax.annotation.Nullable) Private(org.apache.hadoop.classification.InterfaceAudience.Private) Ref(org.apache.hive.common.util.Ref) Logger(org.slf4j.Logger) UnionWork(org.apache.hadoop.hive.ql.plan.UnionWork) HiveConf(org.apache.hadoop.hive.conf.HiveConf) IOException(java.io.IOException) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge) TezException(org.apache.tez.dag.api.TezException) Operator(org.apache.hadoop.hive.ql.exec.Operator) TezEdgeProperty(org.apache.hadoop.hive.ql.plan.TezEdgeProperty) JobConf(org.apache.hadoop.mapred.JobConf) TezCounters(org.apache.tez.common.counters.TezCounters) TezClient(org.apache.tez.client.TezClient) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) Vertex(org.apache.tez.dag.api.Vertex) VertexStatus(org.apache.tez.dag.api.client.VertexStatus) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) JobConf(org.apache.hadoop.mapred.JobConf) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 7 with VertexStatus

use of org.apache.tez.dag.api.client.VertexStatus in project hive by apache.

the class DAGSummary method print.

@Override
public void print(SessionState.LogHelper console) {
    console.printInfo("Task Execution Summary");
    /* If the counters are missing there is no point trying to print progress */
    if (hiveCounters == null) {
        return;
    }
    /* Print the per Vertex summary */
    printHeader(console);
    SortedSet<String> keys = new TreeSet<>(progressMap.keySet());
    Set<StatusGetOpts> statusOptions = new HashSet<>(1);
    statusOptions.add(StatusGetOpts.GET_COUNTERS);
    for (String vertexName : keys) {
        Progress progress = progressMap.get(vertexName);
        if (progress == null)
            continue;
        VertexStatus vertexStatus = vertexStatus(statusOptions, vertexName);
        if (vertexStatus == null) {
            continue;
        }
        console.printInfo(vertexSummary(vertexName, progress, vertexStatus));
    }
    console.printInfo(FILE_HEADER_SEPARATOR);
}
Also used : Progress(org.apache.tez.dag.api.client.Progress) VertexStatus(org.apache.tez.dag.api.client.VertexStatus) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts)

Aggregations

VertexStatus (org.apache.tez.dag.api.client.VertexStatus)7 StatusGetOpts (org.apache.tez.dag.api.client.StatusGetOpts)5 TezCounters (org.apache.tez.common.counters.TezCounters)4 TezCounter (org.apache.tez.common.counters.TezCounter)3 Path (org.apache.hadoop.fs.Path)2 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)2 TezClient (org.apache.tez.client.TezClient)2 CounterGroup (org.apache.tez.common.counters.CounterGroup)2 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)2 TezException (org.apache.tez.dag.api.TezException)2 Test (org.junit.Test)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 IOException (java.io.IOException)1 AccessControlException (java.security.AccessControlException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 EnumSet (java.util.EnumSet)1 HashMap (java.util.HashMap)1