Search in sources :

Example 1 with ModelResultObject

use of ml.shifu.shifu.container.ModelResultObject in project shifu by ShifuML.

the class ConfusionMatrix method computeConfusionMatrix.

public void computeConfusionMatrix() throws IOException {
    SourceType sourceType = evalConfig.getDataSet().getSource();
    List<Scanner> scanners = ShifuFileUtils.getDataScanners(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);
    List<ModelResultObject> moList = new ArrayList<ModelResultObject>();
    boolean isDir = ShifuFileUtils.isDir(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);
    LOG.info("The size of scanner is {}", scanners.size());
    int cnt = 0;
    for (Scanner scanner : scanners) {
        while (scanner.hasNext()) {
            if ((++cnt) % 10000 == 0) {
                LOG.info("Loaded " + cnt + " records.");
            }
            String[] raw = scanner.nextLine().split("\\|");
            if ((!isDir) && cnt == 1) {
                // first line since we add
                continue;
            }
            String tag = CommonUtils.trimTag(raw[targetColumnIndex]);
            if (StringUtils.isBlank(tag)) {
                if (Math.random() < 0.01) {
                    LOG.warn("Empty target value!!");
                }
                continue;
            }
            double weight = 1.0d;
            if (this.weightColumnIndex > 0) {
                try {
                    weight = Double.parseDouble(raw[1]);
                } catch (NumberFormatException e) {
                // Do nothing
                }
            }
            double score = 0;
            try {
                score = Double.parseDouble(raw[scoreColumnIndex]);
            } catch (NumberFormatException e) {
                // user set the score column wrong ?
                if (Math.random() < 0.05) {
                    LOG.warn("The score column - {} is not integer. Is score column set correctly?", raw[scoreColumnIndex]);
                }
                continue;
            }
            moList.add(new ModelResultObject(score, tag, weight));
        }
        // release resource
        scanner.close();
    }
    LOG.info("Totally loaded " + cnt + " records.");
    if (cnt == 0 || moList.size() == 0) {
        LOG.error("No score read, the EvalScore did not genernate or is null file");
        throw new ShifuException(ShifuErrorCode.ERROR_EVALSCORE);
    }
    ConfusionMatrixCalculator calculator = new ConfusionMatrixCalculator(modelConfig.getPosTags(evalConfig), modelConfig.getNegTags(evalConfig), moList);
    BufferedWriter confMatWriter = ShifuFileUtils.getWriter(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
    calculator.calculate(confMatWriter);
    confMatWriter.close();
}
Also used : Scanner(java.util.Scanner) SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) ArrayList(java.util.ArrayList) BufferedWriter(java.io.BufferedWriter) ModelResultObject(ml.shifu.shifu.container.ModelResultObject) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 2 with ModelResultObject

use of ml.shifu.shifu.container.ModelResultObject in project shifu by ShifuML.

the class ConfusionMatrixCalculator method calculate.

public void calculate(BufferedWriter writer) {
    Double sumPos = 0.0, sumNeg = 0.0, sumWeightedPos = 0.0, sumWeightedNeg = 0.0;
    for (ModelResultObject mo : moList) {
        if (posTags.contains(mo.getTag())) {
            // Positive
            sumPos += posScaleFactor;
            sumWeightedPos += mo.getWeight() * posScaleFactor;
        } else {
            // Negative
            sumNeg += negScaleFactor;
            sumWeightedNeg += mo.getWeight() * negScaleFactor;
        }
    }
    ConfusionMatrixObject prevCmo = new ConfusionMatrixObject();
    prevCmo.setTp(0.0);
    prevCmo.setFp(0.0);
    prevCmo.setFn(sumPos);
    prevCmo.setTn(sumNeg);
    prevCmo.setWeightedTp(0.0);
    prevCmo.setWeightedFp(0.0);
    prevCmo.setWeightedFn(sumWeightedPos);
    prevCmo.setWeightedTn(sumWeightedNeg);
    prevCmo.setScore(1000);
    saveConfusionMaxtrixWithWriter(writer, prevCmo);
    for (ModelResultObject mo : moList) {
        ConfusionMatrixObject cmo = new ConfusionMatrixObject(prevCmo);
        if (posTags.contains(mo.getTag())) {
            // Positive Instance
            cmo.setTp(cmo.getTp() + posScaleFactor);
            cmo.setFn(cmo.getFn() - posScaleFactor);
            cmo.setWeightedTp(cmo.getWeightedTp() + mo.getWeight() * posScaleFactor);
            cmo.setWeightedFn(cmo.getWeightedFn() - mo.getWeight() * posScaleFactor);
        } else {
            // Negative Instance
            cmo.setFp(cmo.getFp() + negScaleFactor);
            cmo.setTn(cmo.getTn() - negScaleFactor);
            cmo.setWeightedFp(cmo.getWeightedFp() + mo.getWeight() * negScaleFactor);
            cmo.setWeightedTn(cmo.getWeightedTn() - mo.getWeight() * negScaleFactor);
        }
        cmo.setScore(mo.getScore());
        saveConfusionMaxtrixWithWriter(writer, cmo);
        prevCmo = cmo;
    }
}
Also used : ModelResultObject(ml.shifu.shifu.container.ModelResultObject) ConfusionMatrixObject(ml.shifu.shifu.container.ConfusionMatrixObject)

Example 3 with ModelResultObject

use of ml.shifu.shifu.container.ModelResultObject in project shifu by ShifuML.

the class ConfusionMatrixCalculator method calculate.

public List<ConfusionMatrixObject> calculate() {
    List<ConfusionMatrixObject> cmoList = new ArrayList<ConfusionMatrixObject>();
    // Calculate the sum
    Double sumPos = 0.0, sumNeg = 0.0, sumWeightedPos = 0.0, sumWeightedNeg = 0.0;
    for (ModelResultObject mo : moList) {
        if (posTags.contains(mo.getTag())) {
            // Positive
            sumPos += posScaleFactor;
            sumWeightedPos += mo.getWeight() * posScaleFactor;
        } else {
            // Negative
            sumNeg += negScaleFactor;
            sumWeightedNeg += mo.getWeight() * negScaleFactor;
        }
    }
    // init ConfusionMatrix
    ConfusionMatrixObject initCmo = new ConfusionMatrixObject();
    initCmo.setTp(0.0);
    initCmo.setFp(0.0);
    initCmo.setFn(sumPos);
    initCmo.setTn(sumNeg);
    initCmo.setWeightedTp(0.0);
    initCmo.setWeightedFp(0.0);
    initCmo.setWeightedFn(sumWeightedPos);
    initCmo.setWeightedTn(sumWeightedNeg);
    initCmo.setScore(moList.get(0).getScore());
    cmoList.add(initCmo);
    // Calculate the rest
    ConfusionMatrixObject prevCmo = initCmo;
    for (ModelResultObject mo : moList) {
        ConfusionMatrixObject cmo = new ConfusionMatrixObject(prevCmo);
        if (posTags.contains(mo.getTag())) {
            // Positive Instance
            cmo.setTp(cmo.getTp() + posScaleFactor);
            cmo.setFn(cmo.getFn() - posScaleFactor);
            cmo.setWeightedTp(cmo.getWeightedTp() + mo.getWeight() * posScaleFactor);
            cmo.setWeightedFn(cmo.getWeightedFn() - mo.getWeight() * posScaleFactor);
        } else {
            // Negative Instance
            cmo.setFp(cmo.getFp() + negScaleFactor);
            cmo.setTn(cmo.getTn() - negScaleFactor);
            cmo.setWeightedFp(cmo.getWeightedFp() + mo.getWeight() * negScaleFactor);
            cmo.setWeightedTn(cmo.getWeightedTn() - mo.getWeight() * negScaleFactor);
        }
        cmo.setScore(mo.getScore());
        cmoList.add(cmo);
        prevCmo = cmo;
    }
    return cmoList;
}
Also used : ModelResultObject(ml.shifu.shifu.container.ModelResultObject) ArrayList(java.util.ArrayList) ConfusionMatrixObject(ml.shifu.shifu.container.ConfusionMatrixObject)

Aggregations

ModelResultObject (ml.shifu.shifu.container.ModelResultObject)3 ArrayList (java.util.ArrayList)2 ConfusionMatrixObject (ml.shifu.shifu.container.ConfusionMatrixObject)2 BufferedWriter (java.io.BufferedWriter)1 Scanner (java.util.Scanner)1 SourceType (ml.shifu.shifu.container.obj.RawSourceData.SourceType)1 ShifuException (ml.shifu.shifu.exception.ShifuException)1