Search in sources :

Example 36 with DenseVector

use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.

the class LogisticRegressionTest method test5.

private static void test5() {
    double[] prior = { 0.1, 0.2, 0.7 };
    LogisticRegression logisticRegression = new LogisticRegression(3, 10, prior);
    Vector vector = new DenseVector(10);
    for (int d = 0; d < 10; d++) {
        vector.set(d, Math.random());
    }
    System.out.println(Arrays.toString(logisticRegression.predictClassProbs(vector)));
}
Also used : DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) DenseVector(org.apache.mahout.math.DenseVector)

Example 37 with DenseVector

use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.

the class FusedKolmogorovFilterTest method test1.

private static void test1() {
    Vector vector = new DenseVector(10);
    vector.set(0, 0.1);
    vector.set(1, 0.2);
    vector.set(2, 0.15);
    vector.set(3, 0.4);
    vector.set(4, 0.7);
    vector.set(8, 0.9);
    int[] labels = new int[10];
    labels[0] = 0;
    labels[1] = 1;
    labels[2] = 1;
    labels[3] = 1;
    labels[9] = 1;
    FusedKolmogorovFilter filter = new FusedKolmogorovFilter();
    filter.setNumBins(10);
    List<List<Double>> inputsEachClass = filter.generateInputsEachClass(vector, labels, 2);
    System.out.println(inputsEachClass);
    List<EmpiricalCDF> empiricalCDFs = filter.generateCDFs(vector, inputsEachClass);
    System.out.println(empiricalCDFs);
    System.out.println(filter.maxDistance(empiricalCDFs));
}
Also used : List(java.util.List) DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) DenseVector(org.apache.mahout.math.DenseVector) EmpiricalCDF(edu.neu.ccs.pyramid.util.EmpiricalCDF)

Example 38 with DenseVector

use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.

the class AppCombSUM method generateReport.

private static void generateReport(Map<String, Pair<MultiLabel, Double>> map, Config config, String dataSetFolder, LabelTranslator labelTranslator, Map<String, String> groundTruth, List<Map<String, Pair<Double, Integer>>> confideceRankLists, VectorCalibrator setCalibrator) throws Exception {
    List<String> modelNames = config.getStrings("modelNames");
    List<String> modelPaths = config.getStrings("modelPaths");
    List<String> docIds = ReportUtils.getDocIds(Paths.get(modelPaths.get(0), "predictions", dataSetFolder + "_reports", "report.csv").toString());
    StringBuilder sb = new StringBuilder();
    sb.append("doc_id").append("\t").append("prediction").append("\t").append("prediction_type").append("\t").append("confidence").append("\t").append("truth").append("\t").append("ground_truth").append("\t").append("precision").append("\t").append("recall").append("\t").append("F1").append("\t");
    for (int i = 0; i < modelNames.size(); i++) {
        sb.append(modelNames.get(i)).append("_confidence").append("\t").append(modelNames.get(i)).append("_rank").append("\t");
    }
    sb.append("\n");
    for (int i = 0; i < docIds.size(); i++) {
        String docId = docIds.get(i);
        Pair<MultiLabel, Double> docInfo = map.get(docId);
        // todo change 1 to feature number for reranker
        Vector confidenceVector = new DenseVector(1);
        confidenceVector.set(0, map.get(docId).getSecond());
        sb.append(docId).append("\t").append(docInfo.getFirst().toStringWithExtLabels(labelTranslator).replaceAll("\\[", "").replaceAll("\\]", "")).append("\t").append("set").append("\t").append(setCalibrator.calibrate(confidenceVector)).append("\t");
        MultiLabel pre = docInfo.getFirst();
        MultiLabel lab = new MultiLabel(groundTruth.get(docId), labelTranslator);
        double precision = Precision.precision(lab, pre);
        double recall = Recall.recall(lab, pre);
        double f1 = FMeasure.f1(precision, recall);
        double truth = 0;
        if (pre.getMatchedLabels().equals(lab.getMatchedLabels())) {
            truth = 1;
        }
        sb.append(truth).append("\t").append(groundTruth.get(docId)).append("\t").append(precision).append("\t").append(recall).append("\t").append(f1).append("\t");
        for (int j = 0; j < config.getStrings("modelPaths").size(); j++) {
            sb.append(confideceRankLists.get(j).get(docId).getFirst()).append("\t").append(confideceRankLists.get(j).get(docId).getSecond()).append("\t");
        }
        sb.append("\n");
    }
    FileUtils.writeStringToFile(Paths.get(config.getString("output.folder"), "model_predictions", config.getString("ensembleModelName"), "predictions", dataSetFolder + "_reports", "report.csv").toFile(), sb.toString());
}
Also used : DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) DenseVector(org.apache.mahout.math.DenseVector)

Example 39 with DenseVector

use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.

the class Calibration method loadFeatures.

private static Vector[] loadFeatures(String file) throws Exception {
    List<String> lines = FileUtils.readLines(new File(file));
    Vector[] scores = new Vector[lines.size()];
    for (int i = 0; i < lines.size(); i++) {
        String split = lines.get(i).split(Pattern.quote("("))[1].replace(")", "");
        String[] features = split.split(",");
        Vector vector = new DenseVector(features.length);
        for (int j = 0; j < features.length; j++) {
            vector.set(j, Double.parseDouble(features[j].trim()));
        }
        scores[i] = vector;
    }
    return scores;
}
Also used : File(java.io.File) DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) DenseVector(org.apache.mahout.math.DenseVector)

Example 40 with DenseVector

use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.

the class BRProbFeatureExtractor method extractFeatures.

@Override
public Vector extractFeatures(PredictionCandidate predictionCandidate) {
    MultiLabel prediction = predictionCandidate.multiLabel;
    double[] calibratedLabelProbs = predictionCandidate.labelProbs;
    double prod = 1;
    for (int l = 0; l < calibratedLabelProbs.length; l++) {
        if (prediction.matchClass(l)) {
            prod *= calibratedLabelProbs[l];
        } else {
            prod *= 1 - calibratedLabelProbs[l];
        }
    }
    Vector vector = new DenseVector(1);
    vector.set(0, prod);
    return vector;
}
Also used : MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) DenseVector(org.apache.mahout.math.DenseVector)

Aggregations

DenseVector (org.apache.mahout.math.DenseVector)79 Vector (org.apache.mahout.math.Vector)73 MultiLabel (edu.neu.ccs.pyramid.dataset.MultiLabel)9 RandomAccessSparseVector (org.apache.mahout.math.RandomAccessSparseVector)8 MultiLabelClfDataSet (edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)7 SequentialAccessSparseVector (org.apache.mahout.math.SequentialAccessSparseVector)6 Pair (edu.neu.ccs.pyramid.util.Pair)4 List (java.util.List)3 IntStream (java.util.stream.IntStream)3 EnumeratedIntegerDistribution (org.apache.commons.math3.distribution.EnumeratedIntegerDistribution)3 LogisticRegression (edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression)2 DataSet (edu.neu.ccs.pyramid.dataset.DataSet)2 EmpiricalCDF (edu.neu.ccs.pyramid.util.EmpiricalCDF)2 IntegerDistribution (org.apache.commons.math3.distribution.IntegerDistribution)2 MultivariateNormalDistribution (org.apache.commons.math3.distribution.MultivariateNormalDistribution)2 Classifier (edu.neu.ccs.pyramid.classification.Classifier)1 Weights (edu.neu.ccs.pyramid.classification.logistic_regression.Weights)1 RegDataSet (edu.neu.ccs.pyramid.dataset.RegDataSet)1 ConstantRegressor (edu.neu.ccs.pyramid.regression.ConstantRegressor)1 BernoulliDistribution (edu.neu.ccs.pyramid.util.BernoulliDistribution)1