use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.
the class LogisticRegressionTest method test5.
private static void test5() {
double[] prior = { 0.1, 0.2, 0.7 };
LogisticRegression logisticRegression = new LogisticRegression(3, 10, prior);
Vector vector = new DenseVector(10);
for (int d = 0; d < 10; d++) {
vector.set(d, Math.random());
}
System.out.println(Arrays.toString(logisticRegression.predictClassProbs(vector)));
}
use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.
the class FusedKolmogorovFilterTest method test1.
private static void test1() {
Vector vector = new DenseVector(10);
vector.set(0, 0.1);
vector.set(1, 0.2);
vector.set(2, 0.15);
vector.set(3, 0.4);
vector.set(4, 0.7);
vector.set(8, 0.9);
int[] labels = new int[10];
labels[0] = 0;
labels[1] = 1;
labels[2] = 1;
labels[3] = 1;
labels[9] = 1;
FusedKolmogorovFilter filter = new FusedKolmogorovFilter();
filter.setNumBins(10);
List<List<Double>> inputsEachClass = filter.generateInputsEachClass(vector, labels, 2);
System.out.println(inputsEachClass);
List<EmpiricalCDF> empiricalCDFs = filter.generateCDFs(vector, inputsEachClass);
System.out.println(empiricalCDFs);
System.out.println(filter.maxDistance(empiricalCDFs));
}
use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.
the class AppCombSUM method generateReport.
private static void generateReport(Map<String, Pair<MultiLabel, Double>> map, Config config, String dataSetFolder, LabelTranslator labelTranslator, Map<String, String> groundTruth, List<Map<String, Pair<Double, Integer>>> confideceRankLists, VectorCalibrator setCalibrator) throws Exception {
List<String> modelNames = config.getStrings("modelNames");
List<String> modelPaths = config.getStrings("modelPaths");
List<String> docIds = ReportUtils.getDocIds(Paths.get(modelPaths.get(0), "predictions", dataSetFolder + "_reports", "report.csv").toString());
StringBuilder sb = new StringBuilder();
sb.append("doc_id").append("\t").append("prediction").append("\t").append("prediction_type").append("\t").append("confidence").append("\t").append("truth").append("\t").append("ground_truth").append("\t").append("precision").append("\t").append("recall").append("\t").append("F1").append("\t");
for (int i = 0; i < modelNames.size(); i++) {
sb.append(modelNames.get(i)).append("_confidence").append("\t").append(modelNames.get(i)).append("_rank").append("\t");
}
sb.append("\n");
for (int i = 0; i < docIds.size(); i++) {
String docId = docIds.get(i);
Pair<MultiLabel, Double> docInfo = map.get(docId);
// todo change 1 to feature number for reranker
Vector confidenceVector = new DenseVector(1);
confidenceVector.set(0, map.get(docId).getSecond());
sb.append(docId).append("\t").append(docInfo.getFirst().toStringWithExtLabels(labelTranslator).replaceAll("\\[", "").replaceAll("\\]", "")).append("\t").append("set").append("\t").append(setCalibrator.calibrate(confidenceVector)).append("\t");
MultiLabel pre = docInfo.getFirst();
MultiLabel lab = new MultiLabel(groundTruth.get(docId), labelTranslator);
double precision = Precision.precision(lab, pre);
double recall = Recall.recall(lab, pre);
double f1 = FMeasure.f1(precision, recall);
double truth = 0;
if (pre.getMatchedLabels().equals(lab.getMatchedLabels())) {
truth = 1;
}
sb.append(truth).append("\t").append(groundTruth.get(docId)).append("\t").append(precision).append("\t").append(recall).append("\t").append(f1).append("\t");
for (int j = 0; j < config.getStrings("modelPaths").size(); j++) {
sb.append(confideceRankLists.get(j).get(docId).getFirst()).append("\t").append(confideceRankLists.get(j).get(docId).getSecond()).append("\t");
}
sb.append("\n");
}
FileUtils.writeStringToFile(Paths.get(config.getString("output.folder"), "model_predictions", config.getString("ensembleModelName"), "predictions", dataSetFolder + "_reports", "report.csv").toFile(), sb.toString());
}
use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.
the class Calibration method loadFeatures.
private static Vector[] loadFeatures(String file) throws Exception {
List<String> lines = FileUtils.readLines(new File(file));
Vector[] scores = new Vector[lines.size()];
for (int i = 0; i < lines.size(); i++) {
String split = lines.get(i).split(Pattern.quote("("))[1].replace(")", "");
String[] features = split.split(",");
Vector vector = new DenseVector(features.length);
for (int j = 0; j < features.length; j++) {
vector.set(j, Double.parseDouble(features[j].trim()));
}
scores[i] = vector;
}
return scores;
}
use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.
the class BRProbFeatureExtractor method extractFeatures.
@Override
public Vector extractFeatures(PredictionCandidate predictionCandidate) {
MultiLabel prediction = predictionCandidate.multiLabel;
double[] calibratedLabelProbs = predictionCandidate.labelProbs;
double prod = 1;
for (int l = 0; l < calibratedLabelProbs.length; l++) {
if (prediction.matchClass(l)) {
prod *= calibratedLabelProbs[l];
} else {
prod *= 1 - calibratedLabelProbs[l];
}
}
Vector vector = new DenseVector(1);
vector.set(0, prod);
return vector;
}
Aggregations