use of ml.shifu.shifu.container.ModelResultObject in project shifu by ShifuML.
the class ConfusionMatrix method computeConfusionMatrix.
public void computeConfusionMatrix() throws IOException {
SourceType sourceType = evalConfig.getDataSet().getSource();
List<Scanner> scanners = ShifuFileUtils.getDataScanners(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);
List<ModelResultObject> moList = new ArrayList<ModelResultObject>();
boolean isDir = ShifuFileUtils.isDir(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);
LOG.info("The size of scanner is {}", scanners.size());
int cnt = 0;
for (Scanner scanner : scanners) {
while (scanner.hasNext()) {
if ((++cnt) % 10000 == 0) {
LOG.info("Loaded " + cnt + " records.");
}
String[] raw = scanner.nextLine().split("\\|");
if ((!isDir) && cnt == 1) {
// first line since we add
continue;
}
String tag = CommonUtils.trimTag(raw[targetColumnIndex]);
if (StringUtils.isBlank(tag)) {
if (Math.random() < 0.01) {
LOG.warn("Empty target value!!");
}
continue;
}
double weight = 1.0d;
if (this.weightColumnIndex > 0) {
try {
weight = Double.parseDouble(raw[1]);
} catch (NumberFormatException e) {
// Do nothing
}
}
double score = 0;
try {
score = Double.parseDouble(raw[scoreColumnIndex]);
} catch (NumberFormatException e) {
// user set the score column wrong ?
if (Math.random() < 0.05) {
LOG.warn("The score column - {} is not integer. Is score column set correctly?", raw[scoreColumnIndex]);
}
continue;
}
moList.add(new ModelResultObject(score, tag, weight));
}
// release resource
scanner.close();
}
LOG.info("Totally loaded " + cnt + " records.");
if (cnt == 0 || moList.size() == 0) {
LOG.error("No score read, the EvalScore did not genernate or is null file");
throw new ShifuException(ShifuErrorCode.ERROR_EVALSCORE);
}
ConfusionMatrixCalculator calculator = new ConfusionMatrixCalculator(modelConfig.getPosTags(evalConfig), modelConfig.getNegTags(evalConfig), moList);
BufferedWriter confMatWriter = ShifuFileUtils.getWriter(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
calculator.calculate(confMatWriter);
confMatWriter.close();
}
use of ml.shifu.shifu.container.ModelResultObject in project shifu by ShifuML.
the class ConfusionMatrixCalculator method calculate.
public void calculate(BufferedWriter writer) {
Double sumPos = 0.0, sumNeg = 0.0, sumWeightedPos = 0.0, sumWeightedNeg = 0.0;
for (ModelResultObject mo : moList) {
if (posTags.contains(mo.getTag())) {
// Positive
sumPos += posScaleFactor;
sumWeightedPos += mo.getWeight() * posScaleFactor;
} else {
// Negative
sumNeg += negScaleFactor;
sumWeightedNeg += mo.getWeight() * negScaleFactor;
}
}
ConfusionMatrixObject prevCmo = new ConfusionMatrixObject();
prevCmo.setTp(0.0);
prevCmo.setFp(0.0);
prevCmo.setFn(sumPos);
prevCmo.setTn(sumNeg);
prevCmo.setWeightedTp(0.0);
prevCmo.setWeightedFp(0.0);
prevCmo.setWeightedFn(sumWeightedPos);
prevCmo.setWeightedTn(sumWeightedNeg);
prevCmo.setScore(1000);
saveConfusionMaxtrixWithWriter(writer, prevCmo);
for (ModelResultObject mo : moList) {
ConfusionMatrixObject cmo = new ConfusionMatrixObject(prevCmo);
if (posTags.contains(mo.getTag())) {
// Positive Instance
cmo.setTp(cmo.getTp() + posScaleFactor);
cmo.setFn(cmo.getFn() - posScaleFactor);
cmo.setWeightedTp(cmo.getWeightedTp() + mo.getWeight() * posScaleFactor);
cmo.setWeightedFn(cmo.getWeightedFn() - mo.getWeight() * posScaleFactor);
} else {
// Negative Instance
cmo.setFp(cmo.getFp() + negScaleFactor);
cmo.setTn(cmo.getTn() - negScaleFactor);
cmo.setWeightedFp(cmo.getWeightedFp() + mo.getWeight() * negScaleFactor);
cmo.setWeightedTn(cmo.getWeightedTn() - mo.getWeight() * negScaleFactor);
}
cmo.setScore(mo.getScore());
saveConfusionMaxtrixWithWriter(writer, cmo);
prevCmo = cmo;
}
}
use of ml.shifu.shifu.container.ModelResultObject in project shifu by ShifuML.
the class ConfusionMatrixCalculator method calculate.
public List<ConfusionMatrixObject> calculate() {
List<ConfusionMatrixObject> cmoList = new ArrayList<ConfusionMatrixObject>();
// Calculate the sum
Double sumPos = 0.0, sumNeg = 0.0, sumWeightedPos = 0.0, sumWeightedNeg = 0.0;
for (ModelResultObject mo : moList) {
if (posTags.contains(mo.getTag())) {
// Positive
sumPos += posScaleFactor;
sumWeightedPos += mo.getWeight() * posScaleFactor;
} else {
// Negative
sumNeg += negScaleFactor;
sumWeightedNeg += mo.getWeight() * negScaleFactor;
}
}
// init ConfusionMatrix
ConfusionMatrixObject initCmo = new ConfusionMatrixObject();
initCmo.setTp(0.0);
initCmo.setFp(0.0);
initCmo.setFn(sumPos);
initCmo.setTn(sumNeg);
initCmo.setWeightedTp(0.0);
initCmo.setWeightedFp(0.0);
initCmo.setWeightedFn(sumWeightedPos);
initCmo.setWeightedTn(sumWeightedNeg);
initCmo.setScore(moList.get(0).getScore());
cmoList.add(initCmo);
// Calculate the rest
ConfusionMatrixObject prevCmo = initCmo;
for (ModelResultObject mo : moList) {
ConfusionMatrixObject cmo = new ConfusionMatrixObject(prevCmo);
if (posTags.contains(mo.getTag())) {
// Positive Instance
cmo.setTp(cmo.getTp() + posScaleFactor);
cmo.setFn(cmo.getFn() - posScaleFactor);
cmo.setWeightedTp(cmo.getWeightedTp() + mo.getWeight() * posScaleFactor);
cmo.setWeightedFn(cmo.getWeightedFn() - mo.getWeight() * posScaleFactor);
} else {
// Negative Instance
cmo.setFp(cmo.getFp() + negScaleFactor);
cmo.setTn(cmo.getTn() - negScaleFactor);
cmo.setWeightedFp(cmo.getWeightedFp() + mo.getWeight() * negScaleFactor);
cmo.setWeightedTn(cmo.getWeightedTn() - mo.getWeight() * negScaleFactor);
}
cmo.setScore(mo.getScore());
cmoList.add(cmo);
prevCmo = cmo;
}
return cmoList;
}
Aggregations