Search in sources :

Example 6 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class PerformanceEvaluator method review.

public void review(long records) throws IOException {
    if (0 == records) {
        log.info("No result read, please check EvalConfusionMatrix file");
        throw new ShifuException(ShifuErrorCode.ERROR_EVALCONFMTR);
    }
    PathFinder pathFinder = new PathFinder(modelConfig);
    log.info("Loading confusion matrix in {}", pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()));
    BufferedReader reader = null;
    try {
        reader = ShifuFileUtils.getReader(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
        review(new CMOIterable(reader), records);
    } finally {
        IOUtils.closeQuietly(reader);
    }
}
Also used : BufferedReader(java.io.BufferedReader) PathFinder(ml.shifu.shifu.fs.PathFinder) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 7 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class PerformanceEvaluator method review.

public void review() throws IOException {
    PathFinder pathFinder = new PathFinder(modelConfig);
    log.info("Loading confusion matrix in {}", pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()));
    BufferedReader reader = ShifuFileUtils.getReader(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
    String line = null;
    List<ConfusionMatrixObject> matrixList = new ArrayList<ConfusionMatrixObject>();
    int cnt = 0;
    while ((line = reader.readLine()) != null) {
        cnt++;
        String[] raw = line.split("\\|");
        ConfusionMatrixObject matrix = new ConfusionMatrixObject();
        matrix.setTp(Double.parseDouble(raw[0]));
        matrix.setFp(Double.parseDouble(raw[1]));
        matrix.setFn(Double.parseDouble(raw[2]));
        matrix.setTn(Double.parseDouble(raw[3]));
        matrix.setWeightedTp(Double.parseDouble(raw[4]));
        matrix.setWeightedFp(Double.parseDouble(raw[5]));
        matrix.setWeightedFn(Double.parseDouble(raw[6]));
        matrix.setWeightedTn(Double.parseDouble(raw[7]));
        matrix.setScore(Double.parseDouble(raw[8]));
        matrixList.add(matrix);
    }
    if (0 == cnt) {
        log.info("No result read, please check EvalConfusionMatrix file");
        throw new ShifuException(ShifuErrorCode.ERROR_EVALCONFMTR);
    }
    reader.close();
    review(matrixList, cnt);
}
Also used : BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) PathFinder(ml.shifu.shifu.fs.PathFinder) ConfusionMatrixObject(ml.shifu.shifu.container.ConfusionMatrixObject) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 8 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class ConfusionMatrix method computeConfusionMatrix.

public void computeConfusionMatrix() throws IOException {
    SourceType sourceType = evalConfig.getDataSet().getSource();
    List<Scanner> scanners = ShifuFileUtils.getDataScanners(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);
    List<ModelResultObject> moList = new ArrayList<ModelResultObject>();
    boolean isDir = ShifuFileUtils.isDir(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);
    LOG.info("The size of scanner is {}", scanners.size());
    int cnt = 0;
    for (Scanner scanner : scanners) {
        while (scanner.hasNext()) {
            if ((++cnt) % 10000 == 0) {
                LOG.info("Loaded " + cnt + " records.");
            }
            String[] raw = scanner.nextLine().split("\\|");
            if ((!isDir) && cnt == 1) {
                // first line since we add
                continue;
            }
            String tag = CommonUtils.trimTag(raw[targetColumnIndex]);
            if (StringUtils.isBlank(tag)) {
                if (Math.random() < 0.01) {
                    LOG.warn("Empty target value!!");
                }
                continue;
            }
            double weight = 1.0d;
            if (this.weightColumnIndex > 0) {
                try {
                    weight = Double.parseDouble(raw[1]);
                } catch (NumberFormatException e) {
                // Do nothing
                }
            }
            double score = 0;
            try {
                score = Double.parseDouble(raw[scoreColumnIndex]);
            } catch (NumberFormatException e) {
                // user set the score column wrong ?
                if (Math.random() < 0.05) {
                    LOG.warn("The score column - {} is not integer. Is score column set correctly?", raw[scoreColumnIndex]);
                }
                continue;
            }
            moList.add(new ModelResultObject(score, tag, weight));
        }
        // release resource
        scanner.close();
    }
    LOG.info("Totally loaded " + cnt + " records.");
    if (cnt == 0 || moList.size() == 0) {
        LOG.error("No score read, the EvalScore did not genernate or is null file");
        throw new ShifuException(ShifuErrorCode.ERROR_EVALSCORE);
    }
    ConfusionMatrixCalculator calculator = new ConfusionMatrixCalculator(modelConfig.getPosTags(evalConfig), modelConfig.getNegTags(evalConfig), moList);
    BufferedWriter confMatWriter = ShifuFileUtils.getWriter(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
    calculator.calculate(confMatWriter);
    confMatWriter.close();
}
Also used : Scanner(java.util.Scanner) SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) ArrayList(java.util.ArrayList) BufferedWriter(java.io.BufferedWriter) ModelResultObject(ml.shifu.shifu.container.ModelResultObject) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 9 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class EvalModelProcessor method runDistMetaScore.

@SuppressWarnings("deprecation")
private ScoreStatus runDistMetaScore(EvalConfig evalConfig, String metaScore) throws IOException {
    SourceType sourceType = evalConfig.getDataSet().getSource();
    // clean up output directories
    ShifuFileUtils.deleteFile(pathFinder.getEvalMetaScorePath(evalConfig, metaScore), sourceType);
    // prepare special parameters and execute pig
    Map<String, String> paramsMap = new HashMap<String, String>();
    paramsMap.put(Constants.SOURCE_TYPE, sourceType.toString());
    paramsMap.put("pathEvalRawData", evalConfig.getDataSet().getDataPath());
    paramsMap.put("pathSortScoreData", pathFinder.getEvalMetaScorePath(evalConfig, metaScore));
    paramsMap.put("eval_set_name", evalConfig.getName());
    paramsMap.put("delimiter", evalConfig.getDataSet().getDataDelimiter());
    paramsMap.put("column_name", metaScore);
    String pigScript = "scripts/EvalScoreMetaSort.pig";
    Map<String, String> confMap = new HashMap<String, String>();
    // max min score folder
    String maxMinScoreFolder = ShifuFileUtils.getFileSystemBySourceType(sourceType).makeQualified(new Path("tmp" + File.separator + "maxmin_score_" + System.currentTimeMillis() + "_" + RANDOM.nextLong())).toString();
    confMap.put(Constants.SHIFU_EVAL_MAXMIN_SCORE_OUTPUT, maxMinScoreFolder);
    try {
        PigExecutor.getExecutor().submitJob(modelConfig, pathFinder.getScriptPath(pigScript), paramsMap, evalConfig.getDataSet().getSource(), confMap, super.pathFinder);
    } catch (IOException e) {
        throw new ShifuException(ShifuErrorCode.ERROR_RUNNING_PIG_JOB, e);
    } catch (Throwable e) {
        throw new RuntimeException(e);
    }
    Iterator<JobStats> iter = PigStats.get().getJobGraph().iterator();
    while (iter.hasNext()) {
        JobStats jobStats = iter.next();
        long evalRecords = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_RECORDS);
        LOG.info("Total valid eval records is : {}", evalRecords);
        // If no basic record counter, check next one
        if (evalRecords == 0L) {
            continue;
        }
        this.evalRecords = evalRecords;
        long pigPosTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_POSTAGS);
        long pigNegTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_NEGTAGS);
        double pigPosWeightTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_WPOSTAGS) / (Constants.EVAL_COUNTER_WEIGHT_SCALE * 1.0d);
        double pigNegWeightTags = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter(Constants.COUNTER_WNEGTAGS) / (Constants.EVAL_COUNTER_WEIGHT_SCALE * 1.0d);
        double maxScore = Integer.MIN_VALUE;
        double minScore = Integer.MAX_VALUE;
        if (modelConfig.isRegression()) {
            double[] maxMinScores = locateMaxMinScoreFromFile(sourceType, maxMinScoreFolder);
            maxScore = maxMinScores[0];
            minScore = maxMinScores[1];
            LOG.info("Max score is {}, min score is {}", maxScore, minScore);
            ShifuFileUtils.deleteFile(maxMinScoreFolder, sourceType);
        }
        long badMetaScores = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter("BAD_META_SCORE");
        // Get score status from Counter to avoid re-computing such metrics
        LOG.info("Eval records is {}; and bad meta score is {}.", evalRecords, badMetaScores);
        return new ScoreStatus(pigPosTags, pigNegTags, pigPosWeightTags, pigNegWeightTags, maxScore, minScore, evalRecords);
    }
    return null;
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) IOException(java.io.IOException) JobStats(org.apache.pig.tools.pigstats.JobStats) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 10 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class EvalModelProcessor method deleteEvalSet.

private void deleteEvalSet(String evalSetName) {
    EvalConfig evalConfig = modelConfig.getEvalConfigByName(evalSetName);
    if (evalConfig == null) {
        LOG.error("{} eval set doesn't exist.", evalSetName);
    } else {
        modelConfig.getEvals().remove(evalConfig);
        try {
            saveModelConfig();
        } catch (IOException e) {
            throw new ShifuException(ShifuErrorCode.ERROR_WRITE_MODELCONFIG, e);
        }
        LOG.info("Done. Delete eval set - " + evalSetName);
    }
}
Also used : EvalConfig(ml.shifu.shifu.container.obj.EvalConfig) IOException(java.io.IOException) ShifuException(ml.shifu.shifu.exception.ShifuException)

Aggregations

ShifuException (ml.shifu.shifu.exception.ShifuException)39 IOException (java.io.IOException)22 SourceType (ml.shifu.shifu.container.obj.RawSourceData.SourceType)12 HashMap (java.util.HashMap)8 ArrayList (java.util.ArrayList)5 ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)5 File (java.io.File)4 Scanner (java.util.Scanner)4 Path (org.apache.hadoop.fs.Path)4 SourceFile (ml.shifu.shifu.fs.SourceFile)3 JobStats (org.apache.pig.tools.pigstats.JobStats)3 BufferedReader (java.io.BufferedReader)2 ConfusionMatrixObject (ml.shifu.shifu.container.ConfusionMatrixObject)2 EvalConfig (ml.shifu.shifu.container.obj.EvalConfig)2 RawSourceData (ml.shifu.shifu.container.obj.RawSourceData)2 AbstractStatsExecutor (ml.shifu.shifu.core.processor.stats.AbstractStatsExecutor)2 AkkaStatsWorker (ml.shifu.shifu.core.processor.stats.AkkaStatsWorker)2 DIBStatsExecutor (ml.shifu.shifu.core.processor.stats.DIBStatsExecutor)2 MunroPatIStatsExecutor (ml.shifu.shifu.core.processor.stats.MunroPatIStatsExecutor)2 MunroPatStatsExecutor (ml.shifu.shifu.core.processor.stats.MunroPatStatsExecutor)2