use of edu.neu.ccs.pyramid.util.BernoulliDistribution in project pyramid by cheng-li.
the class BM method computeLogClusterConditionalForEmpty.
private double computeLogClusterConditionalForEmpty(int clusterIndex) {
double logProb = 0.0;
for (int l = 0; l < dimension; l++) {
BernoulliDistribution distribution = distributions[clusterIndex][l];
logProb += distribution.logProbability(0);
}
return logProb;
}
use of edu.neu.ccs.pyramid.util.BernoulliDistribution in project pyramid by cheng-li.
the class ClusterLabels method getCluster.
private static List<WordFrequency> getCluster(BM bm, int k) throws Exception {
BernoulliDistribution[][] distributions = bm.getDistributions();
List<Pair<String, Double>> pairs = new ArrayList<>();
for (int d = 0; d < bm.getDimension(); d++) {
Pair<String, Double> pair = new Pair<>(bm.getNames().get(d), distributions[k][d].getP());
pairs.add(pair);
}
Comparator<Pair<String, Double>> comparator = Comparator.comparing(Pair::getSecond);
List<Pair<String, Double>> sorted = pairs.stream().sorted(comparator.reversed()).collect(Collectors.toList());
List<WordFrequency> frequencies = new ArrayList<>();
double sum = sorted.stream().filter(pair -> pair.getSecond() > 0).limit(20).mapToDouble(Pair::getSecond).sum();
sorted.stream().filter(pair -> pair.getSecond() > 0).limit(20).forEach(pair -> {
WordFrequency wordFrequency = new WordFrequency(pair.getFirst(), (int) (pair.getSecond() * 200 / sum));
frequencies.add(wordFrequency);
});
return frequencies;
}
use of edu.neu.ccs.pyramid.util.BernoulliDistribution in project pyramid by cheng-li.
the class RegressionSynthesizer method linear.
public static RegDataSet linear() {
int numData = 50;
RegDataSet dataSet = RegDataSetBuilder.getBuilder().numDataPoints(numData).numFeatures(16000).dense(true).missingValue(false).build();
Vector weights = new DenseVector(16000);
weights.set(0, 0.001);
weights.set(1, 0.001);
weights.set(2, 0.001);
weights.set(3, 0.001);
for (int i = 0; i < numData; i++) {
for (int j = 0; j < 16000; j++) {
BernoulliDistribution bernoulliDistribution = new BernoulliDistribution(0.5);
int sample = bernoulliDistribution.sample();
if (sample == 0) {
dataSet.setFeatureValue(i, j, -1);
} else {
dataSet.setFeatureValue(i, j, 1);
}
}
double label = weights.dot(dataSet.getRow(i));
dataSet.setLabel(i, label);
}
return dataSet;
}
use of edu.neu.ccs.pyramid.util.BernoulliDistribution in project pyramid by cheng-li.
the class BMTrainer method updateCluster.
/**
*
* @param k cluster index
*/
private void updateCluster(int k) {
final double effectiveTotal = IntStream.range(0, dataSet.getNumDataPoints()).parallel().mapToDouble(i -> gammas[i][k]).sum();
IntStream.range(0, dataSet.getNumFeatures()).parallel().forEach(d -> {
double sum = weightedSum(k, d);
double average = sum / effectiveTotal;
if (average >= 1) {
average = 0.9999;
}
bm.distributions[k][d] = new BernoulliDistribution(average);
});
bm.mixtureCoefficients[k] = effectiveTotal / dataSet.getNumDataPoints();
bm.logMixtureCoefficients[k] = Math.log(bm.mixtureCoefficients[k]);
}
use of edu.neu.ccs.pyramid.util.BernoulliDistribution in project pyramid by cheng-li.
the class BM method clusterConditionalLogProb.
public double clusterConditionalLogProb(Vector vector, int clusterIndex) {
double logProb = logClusterConditioinalForEmpty[clusterIndex];
for (Vector.Element nonzero : vector.nonZeroes()) {
int l = nonzero.index();
BernoulliDistribution distribution = distributions[clusterIndex][l];
logProb -= distribution.logProbability(0);
logProb += distribution.logProbability(1);
}
return logProb;
}
Aggregations