use of java.text.NumberFormat in project CoreNLP by stanfordnlp.
the class SisterAnnotationStats method printStats.
public void printStats() {
NumberFormat nf = NumberFormat.getNumberInstance();
nf.setMaximumFractionDigits(2);
// System.out.println("Node rules");
// System.out.println(nodeRules);
// System.out.println("Parent rules");
// System.out.println(pRules);
// System.out.println("Grandparent rules");
// System.out.println(gPRules);
// Store java code for selSplit
StringBuffer[] javaSB = new StringBuffer[CUTOFFS.length];
for (int i = 0; i < CUTOFFS.length; i++) {
javaSB[i] = new StringBuffer(" private static String[] sisterSplit" + (i + 1) + " = new String[] {");
}
/** topScores contains all enriched categories, to be sorted
* later */
ArrayList topScores = new ArrayList();
for (Object o : nodeRules.keySet()) {
ArrayList answers = new ArrayList();
String label = (String) o;
ClassicCounter cntr = (ClassicCounter) nodeRules.get(label);
double support = (cntr.totalCount());
System.out.println("Node " + label + " support is " + support);
for (Object o4 : ((HashMap) leftRules.get(label)).keySet()) {
String sis = (String) o4;
ClassicCounter cntr2 = (ClassicCounter) ((HashMap) leftRules.get(label)).get(sis);
double support2 = (cntr2.totalCount());
/* alternative 1: use full distribution to calculate score */
double kl = Counters.klDivergence(cntr2, cntr);
/* alternative 2: hold out test-context data to calculate score */
/* this doesn't work because it can lead to zero-probability
* data points hence infinite divergence */
// Counter tempCounter = new Counter();
// tempCounter.addCounter(cntr2);
// for(Iterator i = tempCounter.seenSet().iterator(); i.hasNext();) {
// Object o = i.next();
// tempCounter.setCount(o,-1*tempCounter.countOf(o));
// }
// System.out.println(tempCounter); //debugging
// tempCounter.addCounter(cntr);
// System.out.println(tempCounter); //debugging
// System.out.println(cntr);
// double kl = cntr2.klDivergence(tempCounter);
/* alternative 2 ends here */
String annotatedLabel = label + "=l=" + sis;
System.out.println("KL(" + annotatedLabel + "||" + label + ") = " + nf.format(kl) + "\t" + "support(" + sis + ") = " + support2);
answers.add(new Pair(annotatedLabel, new Double(kl * support2)));
topScores.add(new Pair(annotatedLabel, new Double(kl * support2)));
}
for (Object o3 : ((HashMap) rightRules.get(label)).keySet()) {
String sis = (String) o3;
ClassicCounter cntr2 = (ClassicCounter) ((HashMap) rightRules.get(label)).get(sis);
double support2 = (cntr2.totalCount());
double kl = Counters.klDivergence(cntr2, cntr);
String annotatedLabel = label + "=r=" + sis;
System.out.println("KL(" + annotatedLabel + "||" + label + ") = " + nf.format(kl) + "\t" + "support(" + sis + ") = " + support2);
answers.add(new Pair(annotatedLabel, new Double(kl * support2)));
topScores.add(new Pair(annotatedLabel, new Double(kl * support2)));
}
// upto
System.out.println("----");
System.out.println("Sorted descending support * KL");
Collections.sort(answers, (o1, o2) -> {
Pair p1 = (Pair) o1;
Pair p2 = (Pair) o2;
Double p12 = (Double) p1.second();
Double p22 = (Double) p2.second();
return p22.compareTo(p12);
});
for (Object answer : answers) {
Pair p = (Pair) answer;
double psd = ((Double) p.second()).doubleValue();
System.out.println(p.first() + ": " + nf.format(psd));
if (psd >= CUTOFFS[0]) {
String annotatedLabel = (String) p.first();
for (double CUTOFF : CUTOFFS) {
if (psd >= CUTOFF) {
//javaSB[j].append("\"").append(annotatedLabel);
//javaSB[j].append("\",");
}
}
}
}
System.out.println();
}
Collections.sort(topScores, (o1, o2) -> {
Pair p1 = (Pair) o1;
Pair p2 = (Pair) o2;
Double p12 = (Double) p1.second();
Double p22 = (Double) p2.second();
return p22.compareTo(p12);
});
String outString = "All enriched categories, sorted by score\n";
for (Object topScore : topScores) {
Pair p = (Pair) topScore;
double psd = ((Double) p.second()).doubleValue();
System.out.println(p.first() + ": " + nf.format(psd));
}
System.out.println();
System.out.println(" // Automatically generated by SisterAnnotationStats -- preferably don't edit");
int k = CUTOFFS.length - 1;
for (int j = 0; j < topScores.size(); j++) {
Pair p = (Pair) topScores.get(j);
double psd = ((Double) p.second()).doubleValue();
if (psd < CUTOFFS[k]) {
if (k == 0) {
break;
} else {
k--;
// messy but should do it
j -= 1;
continue;
}
}
javaSB[k].append("\"").append(p.first());
javaSB[k].append("\",");
}
for (int i = 0; i < CUTOFFS.length; i++) {
int len = javaSB[i].length();
javaSB[i].replace(len - 2, len, "};");
System.out.println(javaSB[i]);
}
System.out.print(" public static String[] sisterSplit = ");
for (int i = CUTOFFS.length; i > 0; i--) {
if (i == 1) {
System.out.print("sisterSplit1");
} else {
System.out.print("selectiveSisterSplit" + i + " ? sisterSplit" + i + " : (");
}
}
// need to print extra one to close other things open
for (int i = CUTOFFS.length; i >= 0; i--) {
System.out.print(")");
}
System.out.println(";");
}
use of java.text.NumberFormat in project CoreNLP by stanfordnlp.
the class MLEDependencyGrammar method toString.
@Override
public String toString() {
NumberFormat nf = NumberFormat.getNumberInstance();
nf.setMaximumFractionDigits(2);
StringBuilder sb = new StringBuilder(2000);
String cl = getClass().getName();
sb.append(cl.substring(cl.lastIndexOf('.') + 1)).append("[tagbins=");
sb.append(numTagBins).append(",wordTokens=").append(numWordTokens).append("; head -> arg\n");
// for (Iterator dI = coreDependencies.keySet().iterator(); dI.hasNext();) {
// IntDependency d = (IntDependency) dI.next();
// double count = coreDependencies.getCount(d);
// sb.append(d + " count " + nf.format(count));
// if (dI.hasNext()) {
// sb.append(",");
// }
// sb.append("\n");
// }
sb.append("]");
return sb.toString();
}
use of java.text.NumberFormat in project CoreNLP by stanfordnlp.
the class CollinsDepEval method display.
@Override
public void display(boolean verbose, PrintWriter pw) {
final NumberFormat nf = new DecimalFormat("0.00");
final Set<CollinsRelation> cats = Generics.newHashSet();
final Random rand = new Random();
cats.addAll(precisions.keySet());
cats.addAll(recalls.keySet());
Map<Double, CollinsRelation> f1Map = new TreeMap<>();
for (CollinsRelation cat : cats) {
double pnum2 = pnums2.getCount(cat);
double rnum2 = rnums2.getCount(cat);
//(num > 0.0 ? precision/num : 0.0);
double prec = precisions2.getCount(cat) / pnum2;
//(num > 0.0 ? recall/num : 0.0);
double rec = recalls2.getCount(cat) / rnum2;
//(num > 0.0 ? f1/num : 0.0);
double f1 = 2.0 / (1.0 / prec + 1.0 / rec);
if (new Double(f1).equals(Double.NaN))
f1 = -1.0;
if (f1Map.containsKey(f1))
f1Map.put(f1 + (rand.nextDouble() / 1000.0), cat);
else
f1Map.put(f1, cat);
}
pw.println(" Abstract Collins Dependencies -- final statistics");
pw.println("================================================================================");
for (CollinsRelation cat : f1Map.values()) {
double pnum2 = pnums2.getCount(cat);
double rnum2 = rnums2.getCount(cat);
//(num > 0.0 ? precision/num : 0.0);
double prec = precisions2.getCount(cat) / pnum2;
//(num > 0.0 ? recall/num : 0.0);
double rec = recalls2.getCount(cat) / rnum2;
//(num > 0.0 ? f1/num : 0.0);
double f1 = 2.0 / (1.0 / prec + 1.0 / rec);
pw.println(cat + "\tLP: " + ((pnum2 == 0.0) ? " N/A" : nf.format(prec)) + "\tguessed: " + (int) pnum2 + "\tLR: " + ((rnum2 == 0.0) ? " N/A" : nf.format(rec)) + "\tgold: " + (int) rnum2 + "\tF1: " + ((pnum2 == 0.0 || rnum2 == 0.0) ? " N/A" : nf.format(f1)));
}
pw.println("================================================================================");
}
use of java.text.NumberFormat in project CoreNLP by stanfordnlp.
the class Treebank method textualSummary.
/**
* Return various statistics about the treebank (number of sentences,
* words, tag set, etc.).
*
* @param tlp The TreebankLanguagePack used to determine punctuation and an
* appropriate character encoding
* @return A big string for human consumption describing the treebank
*/
public String textualSummary(TreebankLanguagePack tlp) {
int numTrees = 0;
int numTreesLE40 = 0;
int numNonUnaryRoots = 0;
Tree nonUnaryEg = null;
ClassicCounter<Tree> nonUnaries = new ClassicCounter<>();
ClassicCounter<String> roots = new ClassicCounter<>();
ClassicCounter<String> starts = new ClassicCounter<>();
ClassicCounter<String> puncts = new ClassicCounter<>();
int numUnenclosedLeaves = 0;
int numLeaves = 0;
int numNonPhrasal = 0;
int numPreTerminalWithMultipleChildren = 0;
int numWords = 0;
int numTags = 0;
int shortestSentence = Integer.MAX_VALUE;
int longestSentence = 0;
int numNullLabel = 0;
Set<String> words = Generics.newHashSet();
ClassicCounter<String> tags = new ClassicCounter<>();
ClassicCounter<String> cats = new ClassicCounter<>();
Tree leafEg = null;
Tree preTerminalMultipleChildrenEg = null;
Tree nullLabelEg = null;
Tree rootRewritesAsTaggedWordEg = null;
for (Tree t : this) {
roots.incrementCount(t.value());
numTrees++;
int leng = t.yield().size();
if (leng <= 40) {
numTreesLE40++;
}
if (leng < shortestSentence) {
shortestSentence = leng;
}
if (leng > longestSentence) {
longestSentence = leng;
}
if (t.numChildren() > 1) {
if (numNonUnaryRoots == 0) {
nonUnaryEg = t;
}
if (numNonUnaryRoots < 100) {
nonUnaries.incrementCount(t.localTree());
}
numNonUnaryRoots++;
} else if (t.isLeaf()) {
numUnenclosedLeaves++;
} else {
Tree t2 = t.firstChild();
if (t2.isLeaf()) {
numLeaves++;
leafEg = t;
} else if (t2.isPreTerminal()) {
if (numNonPhrasal == 0) {
rootRewritesAsTaggedWordEg = t;
}
numNonPhrasal++;
}
starts.incrementCount(t2.value());
}
for (Tree subtree : t) {
Label lab = subtree.label();
if (lab == null || lab.value() == null || "".equals(lab.value())) {
if (numNullLabel == 0) {
nullLabelEg = subtree;
}
numNullLabel++;
if (lab == null) {
subtree.setLabel(new StringLabel(""));
} else if (lab.value() == null) {
subtree.label().setValue("");
}
}
if (subtree.isLeaf()) {
numWords++;
words.add(subtree.value());
} else if (subtree.isPreTerminal()) {
numTags++;
tags.incrementCount(subtree.value());
if (tlp != null && tlp.isPunctuationTag(subtree.value())) {
puncts.incrementCount(subtree.firstChild().value());
}
} else if (subtree.isPhrasal()) {
boolean hasLeafChild = false;
for (Tree kt : subtree.children()) {
if (kt.isLeaf()) {
hasLeafChild = true;
}
}
if (hasLeafChild) {
numPreTerminalWithMultipleChildren++;
if (preTerminalMultipleChildrenEg == null) {
preTerminalMultipleChildrenEg = subtree;
}
}
cats.incrementCount(subtree.value());
} else {
throw new IllegalStateException("Treebank: Bad tree in treebank!: " + subtree);
}
}
}
StringWriter sw = new StringWriter(2000);
PrintWriter pw = new PrintWriter(sw);
NumberFormat nf = NumberFormat.getNumberInstance();
nf.setMaximumFractionDigits(0);
pw.println("Treebank has " + numTrees + " trees (" + numTreesLE40 + " of length <= 40) and " + numWords + " words (tokens)");
if (numTrees > 0) {
if (numTags != numWords) {
pw.println(" Warning! numTags differs and is " + numTags);
}
if (roots.size() == 1) {
String root = (String) roots.keySet().toArray()[0];
pw.println(" The root category is: " + root);
} else {
pw.println(" Warning! " + roots.size() + " different roots in treebank: " + Counters.toString(roots, nf));
}
if (numNonUnaryRoots > 0) {
pw.print(" Warning! " + numNonUnaryRoots + " trees without unary initial rewrite. ");
if (numNonUnaryRoots > 100) {
pw.print("First 100 ");
}
pw.println("Rewrites: " + Counters.toString(nonUnaries, nf));
pw.println(" Example: " + nonUnaryEg);
}
if (numUnenclosedLeaves > 0 || numLeaves > 0 || numNonPhrasal > 0) {
pw.println(" Warning! Non-phrasal trees: " + numUnenclosedLeaves + " bare leaves; " + numLeaves + " root rewrites as leaf; and " + numNonPhrasal + " root rewrites as tagged word");
if (numLeaves > 0) {
pw.println(" Example bad root rewrites as leaf: " + leafEg);
}
if (numNonPhrasal > 0) {
pw.println(" Example bad root rewrites as tagged word: " + rootRewritesAsTaggedWordEg);
}
}
if (numNullLabel > 0) {
pw.println(" Warning! " + numNullLabel + " tree nodes with null or empty string labels, e.g.:");
pw.println(" " + nullLabelEg);
}
if (numPreTerminalWithMultipleChildren > 0) {
pw.println(" Warning! " + numPreTerminalWithMultipleChildren + " preterminal nodes with multiple children.");
pw.println(" Example: " + preTerminalMultipleChildrenEg);
}
pw.println(" Sentences range from " + shortestSentence + " to " + longestSentence + " words, with an average length of " + (((numWords * 100) / numTrees) / 100.0) + " words.");
pw.println(" " + cats.size() + " phrasal category types, " + tags.size() + " tag types, and " + words.size() + " word types");
String[] empties = { "*", "0", "*T*", "*RNR*", "*U*", "*?*", "*EXP*", "*ICH*", "*NOT*", "*PPA*", "*OP*", "*pro*", "*PRO*" };
// What a dopey choice using 0 as an empty element name!!
// The problem with the below is that words aren't turned into a basic
// category, but empties commonly are indexed.... Would need to look
// for them with a suffix of -[0-9]+
Set<String> knownEmpties = Generics.newHashSet(Arrays.asList(empties));
Set<String> emptiesIntersection = Sets.intersection(words, knownEmpties);
if (!emptiesIntersection.isEmpty()) {
pw.println(" Caution! " + emptiesIntersection.size() + " word types are known empty elements: " + emptiesIntersection);
}
Set<String> joint = Sets.intersection(cats.keySet(), tags.keySet());
if (!joint.isEmpty()) {
pw.println(" Warning! " + joint.size() + " items are tags and categories: " + joint);
}
for (String cat : cats.keySet()) {
if (cat != null && cat.contains("@")) {
pw.println(" Warning!! Stanford Parser does not work with categories containing '@' like: " + cat);
break;
}
}
for (String cat : tags.keySet()) {
if (cat != null && cat.contains("@")) {
pw.println(" Warning!! Stanford Parser does not work with tags containing '@' like: " + cat);
break;
}
}
pw.println(" Cats: " + Counters.toString(cats, nf));
pw.println(" Tags: " + Counters.toString(tags, nf));
pw.println(" " + starts.size() + " start categories: " + Counters.toString(starts, nf));
if (!puncts.isEmpty()) {
pw.println(" Puncts: " + Counters.toString(puncts, nf));
}
}
return sw.toString();
}
use of java.text.NumberFormat in project japid42 by branaway.
the class WebUtils method formatCurrency.
// public static String formatCurrency(Number number, String currencyCode) {
// Currency currency = Currency.getInstance(currencyCode);
// NumberFormat numberFormat = NumberFormat.getCurrencyInstance(new Locale(Lang.get()));
// numberFormat.setCurrency(currency);
// numberFormat.setMaximumFractionDigits(currency.getDefaultFractionDigits());
// String s = numberFormat.format(number);
// s = s.replace(currencyCode, I18N.getCurrencySymbol(currencyCode));
// return s;
// }
public static String formatCurrency(Number number, Locale locale) {
Currency currency = Currency.getInstance(locale);
NumberFormat numberFormat = NumberFormat.getCurrencyInstance(locale);
numberFormat.setCurrency(currency);
numberFormat.setMaximumFractionDigits(currency.getDefaultFractionDigits());
String s = numberFormat.format(number);
s = s.replace(currency.getCurrencyCode(), currency.getSymbol(locale));
return s;
}
Aggregations