use of com.kennycason.kumo.WordFrequency in project pyramid by cheng-li.
the class ClusterLabels method getCluster.
private static List<WordFrequency> getCluster(BM bm, int k) throws Exception {
BernoulliDistribution[][] distributions = bm.getDistributions();
List<Pair<String, Double>> pairs = new ArrayList<>();
for (int d = 0; d < bm.getDimension(); d++) {
Pair<String, Double> pair = new Pair<>(bm.getNames().get(d), distributions[k][d].getP());
pairs.add(pair);
}
Comparator<Pair<String, Double>> comparator = Comparator.comparing(Pair::getSecond);
List<Pair<String, Double>> sorted = pairs.stream().sorted(comparator.reversed()).collect(Collectors.toList());
List<WordFrequency> frequencies = new ArrayList<>();
double sum = sorted.stream().filter(pair -> pair.getSecond() > 0).limit(20).mapToDouble(Pair::getSecond).sum();
sorted.stream().filter(pair -> pair.getSecond() > 0).limit(20).forEach(pair -> {
WordFrequency wordFrequency = new WordFrequency(pair.getFirst(), (int) (pair.getSecond() * 200 / sum));
frequencies.add(wordFrequency);
});
return frequencies;
}
use of com.kennycason.kumo.WordFrequency in project pyramid by cheng-li.
the class ClusterLabels method plot.
public static void plot(Config config) throws Exception {
BM bm = (BM) Serialization.deserialize(new File(config.getString("output.dir"), "model"));
double[] coefficients = bm.getMixtureCoefficients();
int[] sortedComponents = ArgSort.argSortDescending(bm.getMixtureCoefficients());
File clusterFolder = Paths.get(config.getString("output.dir"), "clusters").toFile();
clusterFolder.mkdirs();
FileUtils.cleanDirectory(clusterFolder);
for (int i = 0; i < sortedComponents.length; i++) {
int k = sortedComponents[i];
List<WordFrequency> frequencies = getCluster(bm, k);
double max = frequencies.stream().mapToDouble(WordFrequency::getFrequency).max().getAsDouble();
double sum = frequencies.stream().mapToDouble(WordFrequency::getFrequency).sum();
double ratio = sum / max;
final Dimension dimension = new Dimension(600, 600);
final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.RECTANGLE);
wordCloud.setPadding(0);
wordCloud.setAngleGenerator(new AngleGenerator(0));
wordCloud.setBackground(new RectangleBackground(dimension));
wordCloud.setColorPalette(buildRandomColorPalette(20));
wordCloud.setBackgroundColor(Color.WHITE);
wordCloud.setFontScalar(new LinearFontScalar(20, (int) (500 / ratio)));
wordCloud.setWordStartStrategy(new CenterWordStart());
wordCloud.build(frequencies);
File out = Paths.get(config.getString("output.dir"), "clusters", "" + i + "_" + coefficients[k] + ".png").toFile();
wordCloud.writeToFile(out.getAbsolutePath());
}
}
Aggregations