use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class PerformanceEvaluator method review.
public void review(long records) throws IOException {
if (0 == records) {
log.info("No result read, please check EvalConfusionMatrix file");
throw new ShifuException(ShifuErrorCode.ERROR_EVALCONFMTR);
}
PathFinder pathFinder = new PathFinder(modelConfig);
log.info("Loading confusion matrix in {}", pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()));
BufferedReader reader = null;
try {
reader = ShifuFileUtils.getReader(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
review(new CMOIterable(reader), records);
} finally {
IOUtils.closeQuietly(reader);
}
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class PerformanceEvaluator method review.
public void review() throws IOException {
PathFinder pathFinder = new PathFinder(modelConfig);
log.info("Loading confusion matrix in {}", pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()));
BufferedReader reader = ShifuFileUtils.getReader(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
String line = null;
List<ConfusionMatrixObject> matrixList = new ArrayList<ConfusionMatrixObject>();
int cnt = 0;
while ((line = reader.readLine()) != null) {
cnt++;
String[] raw = line.split("\\|");
ConfusionMatrixObject matrix = new ConfusionMatrixObject();
matrix.setTp(Double.parseDouble(raw[0]));
matrix.setFp(Double.parseDouble(raw[1]));
matrix.setFn(Double.parseDouble(raw[2]));
matrix.setTn(Double.parseDouble(raw[3]));
matrix.setWeightedTp(Double.parseDouble(raw[4]));
matrix.setWeightedFp(Double.parseDouble(raw[5]));
matrix.setWeightedFn(Double.parseDouble(raw[6]));
matrix.setWeightedTn(Double.parseDouble(raw[7]));
matrix.setScore(Double.parseDouble(raw[8]));
matrixList.add(matrix);
}
if (0 == cnt) {
log.info("No result read, please check EvalConfusionMatrix file");
throw new ShifuException(ShifuErrorCode.ERROR_EVALCONFMTR);
}
reader.close();
review(matrixList, cnt);
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class ConfusionMatrix method computeConfusionMatrix.
public void computeConfusionMatrix() throws IOException {
SourceType sourceType = evalConfig.getDataSet().getSource();
List<Scanner> scanners = ShifuFileUtils.getDataScanners(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);
List<ModelResultObject> moList = new ArrayList<ModelResultObject>();
boolean isDir = ShifuFileUtils.isDir(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);
LOG.info("The size of scanner is {}", scanners.size());
int cnt = 0;
for (Scanner scanner : scanners) {
while (scanner.hasNext()) {
if ((++cnt) % 10000 == 0) {
LOG.info("Loaded " + cnt + " records.");
}
String[] raw = scanner.nextLine().split("\\|");
if ((!isDir) && cnt == 1) {
// first line since we add
continue;
}
String tag = CommonUtils.trimTag(raw[targetColumnIndex]);
if (StringUtils.isBlank(tag)) {
if (Math.random() < 0.01) {
LOG.warn("Empty target value!!");
}
continue;
}
double weight = 1.0d;
if (this.weightColumnIndex > 0) {
try {
weight = Double.parseDouble(raw[1]);
} catch (NumberFormatException e) {
// Do nothing
}
}
double score = 0;
try {
score = Double.parseDouble(raw[scoreColumnIndex]);
} catch (NumberFormatException e) {
// user set the score column wrong ?
if (Math.random() < 0.05) {
LOG.warn("The score column - {} is not integer. Is score column set correctly?", raw[scoreColumnIndex]);
}
continue;
}
moList.add(new ModelResultObject(score, tag, weight));
}
// release resource
scanner.close();
}
LOG.info("Totally loaded " + cnt + " records.");
if (cnt == 0 || moList.size() == 0) {
LOG.error("No score read, the EvalScore did not genernate or is null file");
throw new ShifuException(ShifuErrorCode.ERROR_EVALSCORE);
}
ConfusionMatrixCalculator calculator = new ConfusionMatrixCalculator(modelConfig.getPosTags(evalConfig), modelConfig.getNegTags(evalConfig), moList);
BufferedWriter confMatWriter = ShifuFileUtils.getWriter(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
calculator.calculate(confMatWriter);
confMatWriter.close();
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class EvalModelProcessor method runDistMetaScore.
@SuppressWarnings("deprecation")
private ScoreStatus runDistMetaScore(EvalConfig evalConfig, String metaScore) throws IOException {
SourceType sourceType = evalConfig.getDataSet().getSource();
// clean up output directories
ShifuFileUtils.deleteFile(pathFinder.getEvalMetaScorePath(evalConfig, metaScore), sourceType);
// prepare special parameters and execute pig
Map<String, String> paramsMap = new HashMap<String, String>();
paramsMap.put(Constants.SOURCE_TYPE, sourceType.toString());
paramsMap.put("pathEvalRawData", evalConfig.getDataSet().getDataPath());
paramsMap.put("pathSortScoreData", pathFinder.getEvalMetaScorePath(evalConfig, metaScore));
paramsMap.put("eval_set_name", evalConfig.getName());
paramsMap.put("delimiter", evalConfig.getDataSet().getDataDelimiter());
paramsMap.put("column_name", metaScore);
String pigScript = "scripts/EvalScoreMetaSort.pig";
Map<String, String> confMap = new HashMap<String, String>();
// max min score folder
String maxMinScoreFolder = ShifuFileUtils.getFileSystemBySourceType(sourceType).makeQualified(new Path("tmp" + File.separator + "maxmin_score_" + System.currentTimeMillis() + "_" + RANDOM.nextLong())).toString();
confMap.put(Constants.SHIFU_EVAL_MAXMIN_SCORE_OUTPUT, maxMinScoreFolder);
try {
PigExecutor.getExecutor().submitJob(modelConfig, pathFinder.getScriptPath(pigScript), paramsMap, evalConfig.getDataSet().getSource(), confMap, super.pathFinder);
} catch (IOException e) {
throw new ShifuException(ShifuErrorCode.ERROR_RUNNING_PIG_JOB, e);
} catch (Throwable e) {
throw new RuntimeException(e);
}
Iterator<JobStats> iter = PigStats.get().getJobGraph().iterator();
while (iter.hasNext()) {
JobStats jobStats = iter.next();
long evalRecords = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_RECORDS);
LOG.info("Total valid eval records is : {}", evalRecords);
// If no basic record counter, check next one
if (evalRecords == 0L) {
continue;
}
this.evalRecords = evalRecords;
long pigPosTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_POSTAGS);
long pigNegTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_NEGTAGS);
double pigPosWeightTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_WPOSTAGS) / (Constants.EVAL_COUNTER_WEIGHT_SCALE * 1.0d);
double pigNegWeightTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_WNEGTAGS) / (Constants.EVAL_COUNTER_WEIGHT_SCALE * 1.0d);
double maxScore = Integer.MIN_VALUE;
double minScore = Integer.MAX_VALUE;
if (modelConfig.isRegression()) {
double[] maxMinScores = locateMaxMinScoreFromFile(sourceType, maxMinScoreFolder);
maxScore = maxMinScores[0];
minScore = maxMinScores[1];
LOG.info("Max score is {}, min score is {}", maxScore, minScore);
ShifuFileUtils.deleteFile(maxMinScoreFolder, sourceType);
}
long badMetaScores = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter("BAD_META_SCORE");
// Get score status from Counter to avoid re-computing such metrics
LOG.info("Eval records is {}; and bad meta score is {}.", evalRecords, badMetaScores);
return new ScoreStatus(pigPosTags, pigNegTags, pigPosWeightTags, pigNegWeightTags, maxScore, minScore, evalRecords);
}
return null;
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class EvalModelProcessor method deleteEvalSet.
private void deleteEvalSet(String evalSetName) {
EvalConfig evalConfig = modelConfig.getEvalConfigByName(evalSetName);
if (evalConfig == null) {
LOG.error("{} eval set doesn't exist.", evalSetName);
} else {
modelConfig.getEvals().remove(evalConfig);
try {
saveModelConfig();
} catch (IOException e) {
throw new ShifuException(ShifuErrorCode.ERROR_WRITE_MODELCONFIG, e);
}
LOG.info("Done. Delete eval set - " + evalSetName);
}
}
Aggregations