use of org.apache.pig.tools.pigstats.JobStats in project zeppelin by apache.
the class PigUtils method extractJobIdsFromSimplePigStats.
public static List<String> extractJobIdsFromSimplePigStats(SimplePigStats stat) {
List<String> jobIds = new ArrayList<>();
try {
Field jobPlanField = PigStats.class.getDeclaredField("jobPlan");
jobPlanField.setAccessible(true);
PigStats.JobGraph jobPlan = (PigStats.JobGraph) jobPlanField.get(stat);
List<JobStats> arr = jobPlan.getJobList();
for (JobStats js : arr) {
jobIds.add(js.getJobId());
}
return jobIds;
} catch (Exception e) {
LOGGER.error("Can not extract jobIds from SimpelPigStats", e);
throw new RuntimeException("Can not extract jobIds from SimpelPigStats", e);
}
}
use of org.apache.pig.tools.pigstats.JobStats in project oozie by apache.
the class OoziePigStats method toJSON.
/**
* The PigStats API is used to collect the statistics and the result is returned as a JSON String.
*
* @return a JSON string
*/
@SuppressWarnings("unchecked")
@Override
public String toJSON() {
JSONObject pigStatsGroup = new JSONObject();
pigStatsGroup.put("ACTION_TYPE", getCurrentActionType().toString());
// pig summary related counters
pigStatsGroup.put("BYTES_WRITTEN", Long.toString(pigStats.getBytesWritten()));
pigStatsGroup.put("DURATION", Long.toString(pigStats.getDuration()));
pigStatsGroup.put("ERROR_CODE", Long.toString(pigStats.getErrorCode()));
pigStatsGroup.put("ERROR_MESSAGE", pigStats.getErrorMessage());
pigStatsGroup.put("FEATURES", pigStats.getFeatures());
pigStatsGroup.put("HADOOP_VERSION", pigStats.getHadoopVersion());
pigStatsGroup.put("NUMBER_JOBS", Long.toString(pigStats.getNumberJobs()));
pigStatsGroup.put("PIG_VERSION", pigStats.getPigVersion());
pigStatsGroup.put("PROACTIVE_SPILL_COUNT_OBJECTS", Long.toString(pigStats.getProactiveSpillCountObjects()));
pigStatsGroup.put("PROACTIVE_SPILL_COUNT_RECORDS", Long.toString(pigStats.getProactiveSpillCountRecords()));
pigStatsGroup.put("RECORD_WRITTEN", Long.toString(pigStats.getRecordWritten()));
pigStatsGroup.put("RETURN_CODE", Long.toString(pigStats.getReturnCode()));
pigStatsGroup.put("SCRIPT_ID", pigStats.getScriptId());
pigStatsGroup.put("SMM_SPILL_COUNT", Long.toString(pigStats.getSMMSpillCount()));
PigStats.JobGraph jobGraph = pigStats.getJobGraph();
StringBuffer sb = new StringBuffer();
String separator = ",";
for (JobStats jobStats : jobGraph) {
// Get all the HadoopIds and put them as comma separated string for JOB_GRAPH
String hadoopId = jobStats.getJobId();
if (sb.length() > 0) {
sb.append(separator);
}
sb.append(hadoopId);
// Hadoop Counters for pig created MR job
pigStatsGroup.put(hadoopId, toJSONFromJobStats(jobStats));
}
pigStatsGroup.put("JOB_GRAPH", sb.toString());
return pigStatsGroup.toJSONString();
}
use of org.apache.pig.tools.pigstats.JobStats in project vespa by vespa-engine.
the class VespaStorageTest method assertAllDocumentsOk.
private void assertAllDocumentsOk(String script, Configuration conf) throws Exception {
PigServer ps = setup(script, conf);
List<ExecJob> jobs = ps.executeBatch();
PigStats stats = jobs.get(0).getStatistics();
for (JobStats js : stats.getJobGraph()) {
Counters hadoopCounters = ((MRJobStats) js).getHadoopCounters();
assertNotNull(hadoopCounters);
VespaCounters counters = VespaCounters.get(hadoopCounters);
assertEquals(10, counters.getDocumentsSent());
assertEquals(0, counters.getDocumentsFailed());
assertEquals(10, counters.getDocumentsOk());
}
}
use of org.apache.pig.tools.pigstats.JobStats in project shifu by ShifuML.
the class EvalModelProcessor method runDistMetaScore.
@SuppressWarnings("deprecation")
private ScoreStatus runDistMetaScore(EvalConfig evalConfig, String metaScore) throws IOException {
SourceType sourceType = evalConfig.getDataSet().getSource();
// clean up output directories
ShifuFileUtils.deleteFile(pathFinder.getEvalMetaScorePath(evalConfig, metaScore), sourceType);
// prepare special parameters and execute pig
Map<String, String> paramsMap = new HashMap<String, String>();
paramsMap.put(Constants.SOURCE_TYPE, sourceType.toString());
paramsMap.put("pathEvalRawData", evalConfig.getDataSet().getDataPath());
paramsMap.put("pathSortScoreData", pathFinder.getEvalMetaScorePath(evalConfig, metaScore));
paramsMap.put("eval_set_name", evalConfig.getName());
paramsMap.put("delimiter", evalConfig.getDataSet().getDataDelimiter());
paramsMap.put("column_name", metaScore);
String pigScript = "scripts/EvalScoreMetaSort.pig";
Map<String, String> confMap = new HashMap<String, String>();
// max min score folder
String maxMinScoreFolder = ShifuFileUtils.getFileSystemBySourceType(sourceType).makeQualified(new Path("tmp" + File.separator + "maxmin_score_" + System.currentTimeMillis() + "_" + RANDOM.nextLong())).toString();
confMap.put(Constants.SHIFU_EVAL_MAXMIN_SCORE_OUTPUT, maxMinScoreFolder);
try {
PigExecutor.getExecutor().submitJob(modelConfig, pathFinder.getScriptPath(pigScript), paramsMap, evalConfig.getDataSet().getSource(), confMap, super.pathFinder);
} catch (IOException e) {
throw new ShifuException(ShifuErrorCode.ERROR_RUNNING_PIG_JOB, e);
} catch (Throwable e) {
throw new RuntimeException(e);
}
Iterator<JobStats> iter = PigStats.get().getJobGraph().iterator();
while (iter.hasNext()) {
JobStats jobStats = iter.next();
long evalRecords = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_RECORDS);
LOG.info("Total valid eval records is : {}", evalRecords);
// If no basic record counter, check next one
if (evalRecords == 0L) {
continue;
}
this.evalRecords = evalRecords;
long pigPosTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_POSTAGS);
long pigNegTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_NEGTAGS);
double pigPosWeightTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_WPOSTAGS) / (Constants.EVAL_COUNTER_WEIGHT_SCALE * 1.0d);
double pigNegWeightTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_WNEGTAGS) / (Constants.EVAL_COUNTER_WEIGHT_SCALE * 1.0d);
double maxScore = Integer.MIN_VALUE;
double minScore = Integer.MAX_VALUE;
if (modelConfig.isRegression()) {
double[] maxMinScores = locateMaxMinScoreFromFile(sourceType, maxMinScoreFolder);
maxScore = maxMinScores[0];
minScore = maxMinScores[1];
LOG.info("Max score is {}, min score is {}", maxScore, minScore);
ShifuFileUtils.deleteFile(maxMinScoreFolder, sourceType);
}
long badMetaScores = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter("BAD_META_SCORE");
// Get score status from Counter to avoid re-computing such metrics
LOG.info("Eval records is {}; and bad meta score is {}.", evalRecords, badMetaScores);
return new ScoreStatus(pigPosTags, pigNegTags, pigPosWeightTags, pigNegWeightTags, maxScore, minScore, evalRecords);
}
return null;
}
use of org.apache.pig.tools.pigstats.JobStats in project zeppelin by apache.
the class PigUtils method extractFromSimplePigStats.
public static String extractFromSimplePigStats(SimplePigStats stats) {
try {
Field userIdField = PigStats.class.getDeclaredField("userId");
userIdField.setAccessible(true);
String userId = (String) (userIdField.get(stats));
Field startTimeField = PigStats.class.getDeclaredField("startTime");
startTimeField.setAccessible(true);
long startTime = (Long) (startTimeField.get(stats));
Field endTimeField = PigStats.class.getDeclaredField("endTime");
endTimeField.setAccessible(true);
long endTime = (Long) (endTimeField.get(stats));
if (stats.getReturnCode() == PigRunner.ReturnCode.UNKNOWN) {
LOGGER.warn("unknown return code, can't display the results");
return null;
}
if (stats.getPigContext() == null) {
LOGGER.warn("unknown exec type, don't display the results");
return null;
}
SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
StringBuilder sb = new StringBuilder();
sb.append("\nHadoopVersion\tPigVersion\tUserId\tStartedAt\tFinishedAt\tFeatures\n");
sb.append(stats.getHadoopVersion()).append("\t").append(stats.getPigVersion()).append("\t").append(userId).append("\t").append(sdf.format(new Date(startTime))).append("\t").append(sdf.format(new Date(endTime))).append("\t").append(stats.getFeatures()).append("\n");
sb.append("\n");
if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS) {
sb.append("Success!\n");
} else if (stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Some jobs have failed! Stop running all dependent jobs\n");
} else {
sb.append("Failed!\n");
}
sb.append("\n");
Field jobPlanField = PigStats.class.getDeclaredField("jobPlan");
jobPlanField.setAccessible(true);
PigStats.JobGraph jobPlan = (PigStats.JobGraph) jobPlanField.get(stats);
if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS || stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Job Stats (time in seconds):\n");
sb.append(MRJobStats.SUCCESS_HEADER).append("\n");
List<JobStats> arr = jobPlan.getSuccessfulJobs();
for (JobStats js : arr) {
sb.append(js.getDisplayString());
}
sb.append("\n");
}
if (stats.getReturnCode() == PigRunner.ReturnCode.FAILURE || stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Failed Jobs:\n");
sb.append(MRJobStats.FAILURE_HEADER).append("\n");
List<JobStats> arr = jobPlan.getFailedJobs();
for (JobStats js : arr) {
sb.append(js.getDisplayString());
}
sb.append("\n");
}
sb.append("Input(s):\n");
for (InputStats is : stats.getInputStats()) {
sb.append(is.getDisplayString());
}
sb.append("\n");
sb.append("Output(s):\n");
for (OutputStats ds : stats.getOutputStats()) {
sb.append(ds.getDisplayString());
}
sb.append("\nCounters:\n");
sb.append("Total records written : " + stats.getRecordWritten()).append("\n");
sb.append("Total bytes written : " + stats.getBytesWritten()).append("\n");
sb.append("Spillable Memory Manager spill count : " + stats.getSMMSpillCount()).append("\n");
sb.append("Total bags proactively spilled: " + stats.getProactiveSpillCountObjects()).append("\n");
sb.append("Total records proactively spilled: " + stats.getProactiveSpillCountRecords()).append("\n");
sb.append("\nJob DAG:\n").append(jobPlan.toString());
return "Script Statistics: \n" + sb.toString();
} catch (Exception e) {
LOGGER.error("Can not extract message from SimplePigStats", e);
return "Can not extract message from SimpelPigStats," + ExceptionUtils.getStackTrace(e);
}
}
Aggregations