Search in sources :

Example 6 with StatusGetOpts

use of org.apache.tez.dag.api.client.StatusGetOpts in project hive by apache.

the class DAGSummary method hiveInputRecordsFromTezCounters.

private long hiveInputRecordsFromTezCounters(String vertexName, String inputVertexName) {
    // Get the counters for the input vertex.
    Set<StatusGetOpts> statusOptions = Collections.singleton(StatusGetOpts.GET_COUNTERS);
    VertexStatus inputVertexStatus = vertexStatus(statusOptions, inputVertexName);
    final TezCounters inputVertexCounters = inputVertexStatus.getVertexCounters();
    // eg, group name TaskCounter_Map_7_OUTPUT_Reducer_8, counter name OUTPUT_RECORDS
    String groupName = formattedName("TaskCounter", inputVertexName, vertexName);
    String counterName = "OUTPUT_RECORDS";
    // Do not create counter if it does not exist -
    // instead fall back to default behavior for determining input records.
    TezCounter tezCounter = inputVertexCounters.getGroup(groupName).findCounter(counterName, false);
    if (tezCounter == null) {
        return -1;
    } else {
        return tezCounter.getValue();
    }
}
Also used : VertexStatus(org.apache.tez.dag.api.client.VertexStatus) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) TezCounter(org.apache.tez.common.counters.TezCounter) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 7 with StatusGetOpts

use of org.apache.tez.dag.api.client.StatusGetOpts in project hive by apache.

the class LLAPioSummary method print.

@Override
public void print(SessionState.LogHelper console) {
    console.printInfo("");
    console.printInfo(LLAP_IO_SUMMARY_HEADER);
    SortedSet<String> keys = new TreeSet<>(progressMap.keySet());
    Set<StatusGetOpts> statusOptions = Collections.singleton(StatusGetOpts.GET_COUNTERS);
    String counterGroup = LlapIOCounters.class.getName();
    for (String vertexName : keys) {
        // Reducers do not benefit from LLAP IO so no point in printing
        if (vertexName.startsWith("Reducer")) {
            continue;
        }
        TezCounters vertexCounters = vertexCounter(statusOptions, vertexName);
        if (vertexCounters != null) {
            if (!first) {
                console.printInfo(SEPARATOR);
                console.printInfo(LLAP_SUMMARY_HEADER);
                console.printInfo(SEPARATOR);
                first = true;
            }
            console.printInfo(vertexSummary(vertexName, counterGroup, vertexCounters));
        }
    }
    console.printInfo(SEPARATOR);
    console.printInfo("");
}
Also used : StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 8 with StatusGetOpts

use of org.apache.tez.dag.api.client.StatusGetOpts in project hive by apache.

the class FSCountersSummary method print.

@Override
public void print(SessionState.LogHelper console) {
    console.printInfo("FileSystem Counters Summary");
    SortedSet<String> keys = new TreeSet<>(progressMap.keySet());
    Set<StatusGetOpts> statusOptions = Collections.singleton(StatusGetOpts.GET_COUNTERS);
    // as well. If not, we need a way to get all the schemes that are accessed by the tez task/llap.
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        final String scheme = statistics.getScheme().toUpperCase();
        console.printInfo("");
        console.printInfo("Scheme: " + scheme);
        console.printInfo(SEPARATOR);
        console.printInfo(HEADER);
        console.printInfo(SEPARATOR);
        for (String vertexName : keys) {
            TezCounters vertexCounters = vertexCounters(statusOptions, vertexName);
            if (vertexCounters != null) {
                console.printInfo(summary(scheme, vertexName, vertexCounters));
            }
        }
        console.printInfo(SEPARATOR);
    }
}
Also used : StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) FileSystem(org.apache.hadoop.fs.FileSystem) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 9 with StatusGetOpts

use of org.apache.tez.dag.api.client.StatusGetOpts in project hive by apache.

the class TezTask method execute.

@Override
public int execute(DriverContext driverContext) {
    int rc = 1;
    boolean cleanContext = false;
    Context ctx = null;
    Ref<TezSessionState> sessionRef = Ref.from(null);
    try {
        // Get or create Context object. If we create it we have to clean it later as well.
        ctx = driverContext.getCtx();
        if (ctx == null) {
            ctx = new Context(conf);
            cleanContext = true;
            // some DDL task that directly executes a TezTask does not setup Context and hence TriggerContext.
            // Setting queryId is messed up. Some DDL tasks have executionId instead of proper queryId.
            String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
            WmContext wmContext = new WmContext(System.currentTimeMillis(), queryId);
            ctx.setWmContext(wmContext);
        }
        // Need to remove this static hack. But this is the way currently to get a session.
        SessionState ss = SessionState.get();
        // Note: given that we return pool sessions to the pool in the finally block below, and that
        // we need to set the global to null to do that, this "reuse" may be pointless.
        TezSessionState session = sessionRef.value = ss.getTezSession();
        if (session != null && !session.isOpen()) {
            LOG.warn("The session: " + session + " has not been opened");
        }
        // We only need a username for UGI to use for groups; getGroups will fetch the groups
        // based on Hadoop configuration, as documented at
        // https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html
        String userName = ss.getUserName();
        List<String> groups = null;
        if (userName == null) {
            userName = "anonymous";
        } else {
            groups = UserGroupInformation.createRemoteUser(ss.getUserName()).getGroups();
        }
        MappingInput mi = new MappingInput(userName, groups, ss.getHiveVariables().get("wmpool"), ss.getHiveVariables().get("wmapp"));
        WmContext wmContext = ctx.getWmContext();
        // jobConf will hold all the configuration for hadoop, tez, and hive
        JobConf jobConf = utils.createConfiguration(conf);
        // Get all user jars from work (e.g. input format stuff).
        String[] allNonConfFiles = work.configureJobConfAndExtractJars(jobConf);
        // DAG scratch dir. We get a session from the pool so it may be different from Tez one.
        // TODO: we could perhaps reuse the same directory for HiveResources?
        Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), conf);
        CallerContext callerContext = CallerContext.create("HIVE", queryPlan.getQueryId(), "HIVE_QUERY_ID", queryPlan.getQueryStr());
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
        session = sessionRef.value = WorkloadManagerFederation.getSession(sessionRef.value, conf, mi, getWork().getLlapMode(), wmContext);
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
        try {
            ss.setTezSession(session);
            LOG.info("Subscribed to counters: {} for queryId: {}", wmContext.getSubscribedCounters(), wmContext.getQueryId());
            // Ensure the session is open and has the necessary local resources.
            // This would refresh any conf resources and also local resources.
            ensureSessionHasResources(session, allNonConfFiles);
            // This is a combination of the jar stuff from conf, and not from conf.
            List<LocalResource> allNonAppResources = session.getLocalizedResources();
            logResources(allNonAppResources);
            Map<String, LocalResource> allResources = DagUtils.createTezLrMap(session.getAppJarLr(), allNonAppResources);
            // next we translate the TezWork to a Tez DAG
            DAG dag = build(jobConf, work, scratchDir, ctx, allResources);
            dag.setCallerContext(callerContext);
            // Check isShutdown opportunistically; it's never unset.
            if (this.isShutdown) {
                throw new HiveException("Operation cancelled");
            }
            DAGClient dagClient = submit(jobConf, dag, sessionRef);
            session = sessionRef.value;
            boolean wasShutdown = false;
            synchronized (dagClientLock) {
                assert this.dagClient == null;
                wasShutdown = this.isShutdown;
                if (!wasShutdown) {
                    this.dagClient = dagClient;
                }
            }
            if (wasShutdown) {
                closeDagClientOnCancellation(dagClient);
                throw new HiveException("Operation cancelled");
            }
            // finally monitor will print progress until the job is done
            TezJobMonitor monitor = new TezJobMonitor(work.getAllWork(), dagClient, conf, dag, ctx);
            rc = monitor.monitorExecution();
            if (rc != 0) {
                this.setException(new HiveException(monitor.getDiagnostics()));
            }
            // fetch the counters
            try {
                Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
                counters = dagClient.getDAGStatus(statusGetOpts).getDAGCounters();
            } catch (Exception err) {
                // Don't fail execution due to counters - just don't print summary info
                LOG.warn("Failed to get counters. Ignoring, summary info will be incomplete. " + err, err);
                counters = null;
            }
        } finally {
            // Note: due to TEZ-3846, the session may actually be invalid in case of some errors.
            // Currently, reopen on an attempted reuse will take care of that; we cannot tell
            // if the session is usable until we try.
            // We return this to the pool even if it's unusable; reopen is supposed to handle this.
            wmContext = ctx.getWmContext();
            try {
                if (sessionRef.value != null) {
                    sessionRef.value.returnToSessionManager();
                }
            } catch (Exception e) {
                LOG.error("Failed to return session: {} to pool", session, e);
                throw e;
            }
            if (!conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("none") && wmContext != null) {
                if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("json")) {
                    wmContext.printJson(console);
                } else if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("text")) {
                    wmContext.print(console);
                }
            }
        }
        if (LOG.isInfoEnabled() && counters != null && (HiveConf.getBoolVar(conf, HiveConf.ConfVars.TEZ_EXEC_SUMMARY) || Utilities.isPerfOrAboveLogging(conf))) {
            for (CounterGroup group : counters) {
                LOG.info(group.getDisplayName() + ":");
                for (TezCounter counter : group) {
                    LOG.info("   " + counter.getDisplayName() + ": " + counter.getValue());
                }
            }
        }
    } catch (Exception e) {
        LOG.error("Failed to execute tez graph.", e);
    // rc will be 1 at this point indicating failure.
    } finally {
        Utilities.clearWork(conf);
        // Clear gWorkMap
        for (BaseWork w : work.getAllWork()) {
            JobConf workCfg = workToConf.get(w);
            if (workCfg != null) {
                Utilities.clearWorkMapForConf(workCfg);
            }
        }
        if (cleanContext) {
            try {
                ctx.clear();
            } catch (Exception e) {
                /*best effort*/
                LOG.warn("Failed to clean up after tez job", e);
            }
        }
        // need to either move tmp files or remove them
        DAGClient dagClient = null;
        synchronized (dagClientLock) {
            dagClient = this.dagClient;
            this.dagClient = null;
        }
        // DagClient as such should have no bearing on jobClose.
        if (dagClient != null) {
            // rc will only be overwritten if close errors out
            rc = close(work, rc, dagClient);
        }
    }
    return rc;
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) CallerContext(org.apache.tez.client.CallerContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) TezCounter(org.apache.tez.common.counters.TezCounter) MappingInput(org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput) TezJobMonitor(org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor) JobConf(org.apache.hadoop.mapred.JobConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) CallerContext(org.apache.tez.client.CallerContext) Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) Path(org.apache.hadoop.fs.Path) CounterGroup(org.apache.tez.common.counters.CounterGroup) DAG(org.apache.tez.dag.api.DAG) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) DAGClient(org.apache.tez.dag.api.client.DAGClient)

Example 10 with StatusGetOpts

use of org.apache.tez.dag.api.client.StatusGetOpts in project tez by apache.

the class TestTezJobs method testPerIOCounterAggregation.

@Test(timeout = 60000)
public void testPerIOCounterAggregation() throws Exception {
    String baseDir = "/tmp/perIOCounterAgg/";
    Path inPath1 = new Path(baseDir + "inPath1");
    Path inPath2 = new Path(baseDir + "inPath2");
    Path outPath = new Path(baseDir + "outPath");
    final Set<String> expectedResults = generateSortMergeJoinInput(inPath1, inPath2);
    Path stagingDirPath = new Path("/tmp/tez-staging-dir");
    remoteFs.mkdirs(stagingDirPath);
    TezConfiguration conf = new TezConfiguration(mrrTezCluster.getConfig());
    conf.setBoolean(TezConfiguration.TEZ_TASK_GENERATE_COUNTERS_PER_IO, true);
    TezClient tezClient = TezClient.create(SortMergeJoinHelper.class.getSimpleName(), conf);
    tezClient.start();
    SortMergeJoinHelper sortMergeJoinHelper = new SortMergeJoinHelper(tezClient);
    sortMergeJoinHelper.setConf(conf);
    String[] args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), "-counter", inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
    assertEquals(0, sortMergeJoinHelper.run(conf, args, tezClient));
    verifySortMergeJoinInput(outPath, expectedResults);
    String joinerVertexName = "joiner";
    String input1Name = "input1";
    String input2Name = "input2";
    String joinOutputName = "joinOutput";
    Set<StatusGetOpts> statusOpts = new HashSet<StatusGetOpts>();
    statusOpts.add(StatusGetOpts.GET_COUNTERS);
    VertexStatus joinerVertexStatus = sortMergeJoinHelper.dagClient.getVertexStatus(joinerVertexName, statusOpts);
    final TezCounters joinerCounters = joinerVertexStatus.getVertexCounters();
    final CounterGroup aggregatedGroup = joinerCounters.getGroup(TaskCounter.class.getCanonicalName());
    final CounterGroup input1Group = joinerCounters.getGroup(TaskCounter.class.getSimpleName() + "_" + joinerVertexName + "_INPUT_" + input1Name);
    final CounterGroup input2Group = joinerCounters.getGroup(TaskCounter.class.getSimpleName() + "_" + joinerVertexName + "_INPUT_" + input2Name);
    assertTrue("aggregated counter group cannot be empty", aggregatedGroup.size() > 0);
    assertTrue("per io group for input1 cannot be empty", input1Group.size() > 0);
    assertTrue("per io group for input1 cannot be empty", input2Group.size() > 0);
    List<TaskCounter> countersToVerifyAgg = Arrays.asList(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ, TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN, TaskCounter.COMBINE_INPUT_RECORDS, TaskCounter.MERGED_MAP_OUTPUTS, TaskCounter.NUM_DISK_TO_DISK_MERGES, TaskCounter.NUM_FAILED_SHUFFLE_INPUTS, TaskCounter.NUM_MEM_TO_DISK_MERGES, TaskCounter.NUM_SHUFFLED_INPUTS, TaskCounter.NUM_SKIPPED_INPUTS, TaskCounter.REDUCE_INPUT_GROUPS, TaskCounter.REDUCE_INPUT_RECORDS, TaskCounter.SHUFFLE_BYTES, TaskCounter.SHUFFLE_BYTES_DECOMPRESSED, TaskCounter.SHUFFLE_BYTES_DISK_DIRECT, TaskCounter.SHUFFLE_BYTES_TO_DISK, TaskCounter.SHUFFLE_BYTES_TO_MEM, TaskCounter.SPILLED_RECORDS);
    int nonZeroCounters = 0;
    // verify that the sum of the counter values for edges add up to the aggregated counter value.
    for (TaskCounter c : countersToVerifyAgg) {
        TezCounter aggregatedCounter = aggregatedGroup.findCounter(c.name(), false);
        TezCounter input1Counter = input1Group.findCounter(c.name(), false);
        TezCounter input2Counter = input2Group.findCounter(c.name(), false);
        assertNotNull("aggregated counter cannot be null " + c.name(), aggregatedCounter);
        assertNotNull("input1 counter cannot be null " + c.name(), input1Counter);
        assertNotNull("input2 counter cannot be null " + c.name(), input2Counter);
        assertEquals("aggregated counter does not match sum of input counters " + c.name(), aggregatedCounter.getValue(), input1Counter.getValue() + input2Counter.getValue());
        if (aggregatedCounter.getValue() > 0) {
            nonZeroCounters++;
        }
    }
    // ensure that at least one of the counters tested above were non-zero.
    assertTrue("At least one of the counter should be non-zero. invalid test ", nonZeroCounters > 0);
    CounterGroup joinerOutputGroup = joinerCounters.getGroup(TaskCounter.class.getSimpleName() + "_" + joinerVertexName + "_OUTPUT_" + joinOutputName);
    String outputCounterName = TaskCounter.OUTPUT_RECORDS.name();
    TezCounter aggregateCounter = aggregatedGroup.findCounter(outputCounterName, false);
    TezCounter joinerOutputCounter = joinerOutputGroup.findCounter(outputCounterName, false);
    assertNotNull("aggregated counter cannot be null " + outputCounterName, aggregateCounter);
    assertNotNull("output counter cannot be null " + outputCounterName, joinerOutputCounter);
    assertTrue("counter value is zero. test is invalid", aggregateCounter.getValue() > 0);
    assertEquals("aggregated counter does not match sum of output counters " + outputCounterName, aggregateCounter.getValue(), joinerOutputCounter.getValue());
}
Also used : Path(org.apache.hadoop.fs.Path) VertexStatus(org.apache.tez.dag.api.client.VertexStatus) CounterGroup(org.apache.tez.common.counters.CounterGroup) TezCounter(org.apache.tez.common.counters.TezCounter) TezCounters(org.apache.tez.common.counters.TezCounters) TezClient(org.apache.tez.client.TezClient) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) TaskCounter(org.apache.tez.common.counters.TaskCounter) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

StatusGetOpts (org.apache.tez.dag.api.client.StatusGetOpts)13 TezCounters (org.apache.tez.common.counters.TezCounters)8 TezCounter (org.apache.tez.common.counters.TezCounter)6 DAGClient (org.apache.tez.dag.api.client.DAGClient)6 Path (org.apache.hadoop.fs.Path)5 DAG (org.apache.tez.dag.api.DAG)5 DAGStatus (org.apache.tez.dag.api.client.DAGStatus)5 IOException (java.io.IOException)4 TezException (org.apache.tez.dag.api.TezException)4 VertexStatus (org.apache.tez.dag.api.client.VertexStatus)4 Context (org.apache.hadoop.hive.ql.Context)3 CounterGroup (org.apache.tez.common.counters.CounterGroup)3 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)3 Progress (org.apache.tez.dag.api.client.Progress)3 Configuration (org.apache.hadoop.conf.Configuration)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 MappingInput (org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput)2 TezJobMonitor (org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)2