Search in sources :

Example 1 with JobStats

use of org.apache.pig.tools.pigstats.JobStats in project zeppelin by apache.

the class PigUtils method extractJobIdsFromSimplePigStats.

public static List<String> extractJobIdsFromSimplePigStats(SimplePigStats stat) {
    List<String> jobIds = new ArrayList<>();
    try {
        Field jobPlanField = PigStats.class.getDeclaredField("jobPlan");
        jobPlanField.setAccessible(true);
        PigStats.JobGraph jobPlan = (PigStats.JobGraph) jobPlanField.get(stat);
        List<JobStats> arr = jobPlan.getJobList();
        for (JobStats js : arr) {
            jobIds.add(js.getJobId());
        }
        return jobIds;
    } catch (Exception e) {
        LOGGER.error("Can not extract jobIds from SimpelPigStats", e);
        throw new RuntimeException("Can not extract jobIds from SimpelPigStats", e);
    }
}
Also used : Field(java.lang.reflect.Field) PigStats(org.apache.pig.tools.pigstats.PigStats) SimplePigStats(org.apache.pig.tools.pigstats.mapreduce.SimplePigStats) ArrayList(java.util.ArrayList) IOException(java.io.IOException) JobStats(org.apache.pig.tools.pigstats.JobStats) MRJobStats(org.apache.pig.tools.pigstats.mapreduce.MRJobStats)

Example 2 with JobStats

use of org.apache.pig.tools.pigstats.JobStats in project oozie by apache.

the class OoziePigStats method toJSON.

/**
 * The PigStats API is used to collect the statistics and the result is returned as a JSON String.
 *
 * @return a JSON string
 */
@SuppressWarnings("unchecked")
@Override
public String toJSON() {
    JSONObject pigStatsGroup = new JSONObject();
    pigStatsGroup.put("ACTION_TYPE", getCurrentActionType().toString());
    // pig summary related counters
    pigStatsGroup.put("BYTES_WRITTEN", Long.toString(pigStats.getBytesWritten()));
    pigStatsGroup.put("DURATION", Long.toString(pigStats.getDuration()));
    pigStatsGroup.put("ERROR_CODE", Long.toString(pigStats.getErrorCode()));
    pigStatsGroup.put("ERROR_MESSAGE", pigStats.getErrorMessage());
    pigStatsGroup.put("FEATURES", pigStats.getFeatures());
    pigStatsGroup.put("HADOOP_VERSION", pigStats.getHadoopVersion());
    pigStatsGroup.put("NUMBER_JOBS", Long.toString(pigStats.getNumberJobs()));
    pigStatsGroup.put("PIG_VERSION", pigStats.getPigVersion());
    pigStatsGroup.put("PROACTIVE_SPILL_COUNT_OBJECTS", Long.toString(pigStats.getProactiveSpillCountObjects()));
    pigStatsGroup.put("PROACTIVE_SPILL_COUNT_RECORDS", Long.toString(pigStats.getProactiveSpillCountRecords()));
    pigStatsGroup.put("RECORD_WRITTEN", Long.toString(pigStats.getRecordWritten()));
    pigStatsGroup.put("RETURN_CODE", Long.toString(pigStats.getReturnCode()));
    pigStatsGroup.put("SCRIPT_ID", pigStats.getScriptId());
    pigStatsGroup.put("SMM_SPILL_COUNT", Long.toString(pigStats.getSMMSpillCount()));
    PigStats.JobGraph jobGraph = pigStats.getJobGraph();
    StringBuffer sb = new StringBuffer();
    String separator = ",";
    for (JobStats jobStats : jobGraph) {
        // Get all the HadoopIds and put them as comma separated string for JOB_GRAPH
        String hadoopId = jobStats.getJobId();
        if (sb.length() > 0) {
            sb.append(separator);
        }
        sb.append(hadoopId);
        // Hadoop Counters for pig created MR job
        pigStatsGroup.put(hadoopId, toJSONFromJobStats(jobStats));
    }
    pigStatsGroup.put("JOB_GRAPH", sb.toString());
    return pigStatsGroup.toJSONString();
}
Also used : JSONObject(org.json.simple.JSONObject) PigStats(org.apache.pig.tools.pigstats.PigStats) JobStats(org.apache.pig.tools.pigstats.JobStats)

Example 3 with JobStats

use of org.apache.pig.tools.pigstats.JobStats in project vespa by vespa-engine.

the class VespaStorageTest method assertAllDocumentsOk.

private void assertAllDocumentsOk(String script, Configuration conf) throws Exception {
    PigServer ps = setup(script, conf);
    List<ExecJob> jobs = ps.executeBatch();
    PigStats stats = jobs.get(0).getStatistics();
    for (JobStats js : stats.getJobGraph()) {
        Counters hadoopCounters = ((MRJobStats) js).getHadoopCounters();
        assertNotNull(hadoopCounters);
        VespaCounters counters = VespaCounters.get(hadoopCounters);
        assertEquals(10, counters.getDocumentsSent());
        assertEquals(0, counters.getDocumentsFailed());
        assertEquals(10, counters.getDocumentsOk());
    }
}
Also used : PigServer(org.apache.pig.PigServer) PigStats(org.apache.pig.tools.pigstats.PigStats) MRJobStats(org.apache.pig.tools.pigstats.mapreduce.MRJobStats) VespaCounters(com.yahoo.vespa.hadoop.mapreduce.util.VespaCounters) VespaCounters(com.yahoo.vespa.hadoop.mapreduce.util.VespaCounters) Counters(org.apache.hadoop.mapred.Counters) ExecJob(org.apache.pig.backend.executionengine.ExecJob) JobStats(org.apache.pig.tools.pigstats.JobStats) MRJobStats(org.apache.pig.tools.pigstats.mapreduce.MRJobStats)

Example 4 with JobStats

use of org.apache.pig.tools.pigstats.JobStats in project shifu by ShifuML.

the class EvalModelProcessor method runDistMetaScore.

@SuppressWarnings("deprecation")
private ScoreStatus runDistMetaScore(EvalConfig evalConfig, String metaScore) throws IOException {
    SourceType sourceType = evalConfig.getDataSet().getSource();
    // clean up output directories
    ShifuFileUtils.deleteFile(pathFinder.getEvalMetaScorePath(evalConfig, metaScore), sourceType);
    // prepare special parameters and execute pig
    Map<String, String> paramsMap = new HashMap<String, String>();
    paramsMap.put(Constants.SOURCE_TYPE, sourceType.toString());
    paramsMap.put("pathEvalRawData", evalConfig.getDataSet().getDataPath());
    paramsMap.put("pathSortScoreData", pathFinder.getEvalMetaScorePath(evalConfig, metaScore));
    paramsMap.put("eval_set_name", evalConfig.getName());
    paramsMap.put("delimiter", evalConfig.getDataSet().getDataDelimiter());
    paramsMap.put("column_name", metaScore);
    String pigScript = "scripts/EvalScoreMetaSort.pig";
    Map<String, String> confMap = new HashMap<String, String>();
    // max min score folder
    String maxMinScoreFolder = ShifuFileUtils.getFileSystemBySourceType(sourceType).makeQualified(new Path("tmp" + File.separator + "maxmin_score_" + System.currentTimeMillis() + "_" + RANDOM.nextLong())).toString();
    confMap.put(Constants.SHIFU_EVAL_MAXMIN_SCORE_OUTPUT, maxMinScoreFolder);
    try {
        PigExecutor.getExecutor().submitJob(modelConfig, pathFinder.getScriptPath(pigScript), paramsMap, evalConfig.getDataSet().getSource(), confMap, super.pathFinder);
    } catch (IOException e) {
        throw new ShifuException(ShifuErrorCode.ERROR_RUNNING_PIG_JOB, e);
    } catch (Throwable e) {
        throw new RuntimeException(e);
    }
    Iterator<JobStats> iter = PigStats.get().getJobGraph().iterator();
    while (iter.hasNext()) {
        JobStats jobStats = iter.next();
        long evalRecords = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_RECORDS);
        LOG.info("Total valid eval records is : {}", evalRecords);
        // If no basic record counter, check next one
        if (evalRecords == 0L) {
            continue;
        }
        this.evalRecords = evalRecords;
        long pigPosTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_POSTAGS);
        long pigNegTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_NEGTAGS);
        double pigPosWeightTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_WPOSTAGS) / (Constants.EVAL_COUNTER_WEIGHT_SCALE * 1.0d);
        double pigNegWeightTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_WNEGTAGS) / (Constants.EVAL_COUNTER_WEIGHT_SCALE * 1.0d);
        double maxScore = Integer.MIN_VALUE;
        double minScore = Integer.MAX_VALUE;
        if (modelConfig.isRegression()) {
            double[] maxMinScores = locateMaxMinScoreFromFile(sourceType, maxMinScoreFolder);
            maxScore = maxMinScores[0];
            minScore = maxMinScores[1];
            LOG.info("Max score is {}, min score is {}", maxScore, minScore);
            ShifuFileUtils.deleteFile(maxMinScoreFolder, sourceType);
        }
        long badMetaScores = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter("BAD_META_SCORE");
        // Get score status from Counter to avoid re-computing such metrics
        LOG.info("Eval records is {}; and bad meta score is {}.", evalRecords, badMetaScores);
        return new ScoreStatus(pigPosTags, pigNegTags, pigPosWeightTags, pigNegWeightTags, maxScore, minScore, evalRecords);
    }
    return null;
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) IOException(java.io.IOException) JobStats(org.apache.pig.tools.pigstats.JobStats) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 5 with JobStats

use of org.apache.pig.tools.pigstats.JobStats in project zeppelin by apache.

the class PigUtils method extractFromSimplePigStats.

public static String extractFromSimplePigStats(SimplePigStats stats) {
    try {
        Field userIdField = PigStats.class.getDeclaredField("userId");
        userIdField.setAccessible(true);
        String userId = (String) (userIdField.get(stats));
        Field startTimeField = PigStats.class.getDeclaredField("startTime");
        startTimeField.setAccessible(true);
        long startTime = (Long) (startTimeField.get(stats));
        Field endTimeField = PigStats.class.getDeclaredField("endTime");
        endTimeField.setAccessible(true);
        long endTime = (Long) (endTimeField.get(stats));
        if (stats.getReturnCode() == PigRunner.ReturnCode.UNKNOWN) {
            LOGGER.warn("unknown return code, can't display the results");
            return null;
        }
        if (stats.getPigContext() == null) {
            LOGGER.warn("unknown exec type, don't display the results");
            return null;
        }
        SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
        StringBuilder sb = new StringBuilder();
        sb.append("\nHadoopVersion\tPigVersion\tUserId\tStartedAt\tFinishedAt\tFeatures\n");
        sb.append(stats.getHadoopVersion()).append("\t").append(stats.getPigVersion()).append("\t").append(userId).append("\t").append(sdf.format(new Date(startTime))).append("\t").append(sdf.format(new Date(endTime))).append("\t").append(stats.getFeatures()).append("\n");
        sb.append("\n");
        if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS) {
            sb.append("Success!\n");
        } else if (stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
            sb.append("Some jobs have failed! Stop running all dependent jobs\n");
        } else {
            sb.append("Failed!\n");
        }
        sb.append("\n");
        Field jobPlanField = PigStats.class.getDeclaredField("jobPlan");
        jobPlanField.setAccessible(true);
        PigStats.JobGraph jobPlan = (PigStats.JobGraph) jobPlanField.get(stats);
        if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS || stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
            sb.append("Job Stats (time in seconds):\n");
            sb.append(MRJobStats.SUCCESS_HEADER).append("\n");
            List<JobStats> arr = jobPlan.getSuccessfulJobs();
            for (JobStats js : arr) {
                sb.append(js.getDisplayString());
            }
            sb.append("\n");
        }
        if (stats.getReturnCode() == PigRunner.ReturnCode.FAILURE || stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
            sb.append("Failed Jobs:\n");
            sb.append(MRJobStats.FAILURE_HEADER).append("\n");
            List<JobStats> arr = jobPlan.getFailedJobs();
            for (JobStats js : arr) {
                sb.append(js.getDisplayString());
            }
            sb.append("\n");
        }
        sb.append("Input(s):\n");
        for (InputStats is : stats.getInputStats()) {
            sb.append(is.getDisplayString());
        }
        sb.append("\n");
        sb.append("Output(s):\n");
        for (OutputStats ds : stats.getOutputStats()) {
            sb.append(ds.getDisplayString());
        }
        sb.append("\nCounters:\n");
        sb.append("Total records written : " + stats.getRecordWritten()).append("\n");
        sb.append("Total bytes written : " + stats.getBytesWritten()).append("\n");
        sb.append("Spillable Memory Manager spill count : " + stats.getSMMSpillCount()).append("\n");
        sb.append("Total bags proactively spilled: " + stats.getProactiveSpillCountObjects()).append("\n");
        sb.append("Total records proactively spilled: " + stats.getProactiveSpillCountRecords()).append("\n");
        sb.append("\nJob DAG:\n").append(jobPlan.toString());
        return "Script Statistics: \n" + sb.toString();
    } catch (Exception e) {
        LOGGER.error("Can not extract message from SimplePigStats", e);
        return "Can not extract message from SimpelPigStats," + ExceptionUtils.getStackTrace(e);
    }
}
Also used : OutputStats(org.apache.pig.tools.pigstats.OutputStats) PigStats(org.apache.pig.tools.pigstats.PigStats) SimplePigStats(org.apache.pig.tools.pigstats.mapreduce.SimplePigStats) Date(java.util.Date) IOException(java.io.IOException) JobStats(org.apache.pig.tools.pigstats.JobStats) MRJobStats(org.apache.pig.tools.pigstats.mapreduce.MRJobStats) Field(java.lang.reflect.Field) InputStats(org.apache.pig.tools.pigstats.InputStats) SimpleDateFormat(java.text.SimpleDateFormat)

Aggregations

JobStats (org.apache.pig.tools.pigstats.JobStats)10 IOException (java.io.IOException)6 PigStats (org.apache.pig.tools.pigstats.PigStats)5 SourceType (ml.shifu.shifu.container.obj.RawSourceData.SourceType)4 ShifuException (ml.shifu.shifu.exception.ShifuException)4 HashMap (java.util.HashMap)3 MRJobStats (org.apache.pig.tools.pigstats.mapreduce.MRJobStats)3 Field (java.lang.reflect.Field)2 Path (org.apache.hadoop.fs.Path)2 SimplePigStats (org.apache.pig.tools.pigstats.mapreduce.SimplePigStats)2 Event (com.twitter.ambrose.model.Event)1 VespaCounters (com.yahoo.vespa.hadoop.mapreduce.util.VespaCounters)1 SimpleDateFormat (java.text.SimpleDateFormat)1 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1 RawSourceData (ml.shifu.shifu.container.obj.RawSourceData)1 Counters (org.apache.hadoop.mapred.Counters)1 PigServer (org.apache.pig.PigServer)1 ExecJob (org.apache.pig.backend.executionengine.ExecJob)1 InputStats (org.apache.pig.tools.pigstats.InputStats)1