Search in sources :

Example 41 with NumberFormat

use of java.text.NumberFormat in project CoreNLP by stanfordnlp.

the class SisterAnnotationStats method printStats.

public void printStats() {
    NumberFormat nf = NumberFormat.getNumberInstance();
    nf.setMaximumFractionDigits(2);
    // System.out.println("Node rules");
    // System.out.println(nodeRules);
    // System.out.println("Parent rules");
    // System.out.println(pRules);
    // System.out.println("Grandparent rules");
    // System.out.println(gPRules);
    // Store java code for selSplit
    StringBuffer[] javaSB = new StringBuffer[CUTOFFS.length];
    for (int i = 0; i < CUTOFFS.length; i++) {
        javaSB[i] = new StringBuffer("  private static String[] sisterSplit" + (i + 1) + " = new String[] {");
    }
    /** topScores contains all enriched categories, to be sorted
     * later */
    ArrayList topScores = new ArrayList();
    for (Object o : nodeRules.keySet()) {
        ArrayList answers = new ArrayList();
        String label = (String) o;
        ClassicCounter cntr = (ClassicCounter) nodeRules.get(label);
        double support = (cntr.totalCount());
        System.out.println("Node " + label + " support is " + support);
        for (Object o4 : ((HashMap) leftRules.get(label)).keySet()) {
            String sis = (String) o4;
            ClassicCounter cntr2 = (ClassicCounter) ((HashMap) leftRules.get(label)).get(sis);
            double support2 = (cntr2.totalCount());
            /* alternative 1: use full distribution to calculate score */
            double kl = Counters.klDivergence(cntr2, cntr);
            /* alternative 2: hold out test-context data to calculate score */
            /* this doesn't work because it can lead to zero-probability
         * data points hence infinite divergence */
            // 	Counter tempCounter = new Counter();
            // 	tempCounter.addCounter(cntr2);
            // 	for(Iterator i = tempCounter.seenSet().iterator(); i.hasNext();) {
            // 	  Object o = i.next();
            // 	  tempCounter.setCount(o,-1*tempCounter.countOf(o));
            // 	}
            // 	System.out.println(tempCounter); //debugging
            // 	tempCounter.addCounter(cntr);
            // 	System.out.println(tempCounter); //debugging
            // 	System.out.println(cntr);
            // 	double kl = cntr2.klDivergence(tempCounter);
            /* alternative 2 ends here */
            String annotatedLabel = label + "=l=" + sis;
            System.out.println("KL(" + annotatedLabel + "||" + label + ") = " + nf.format(kl) + "\t" + "support(" + sis + ") = " + support2);
            answers.add(new Pair(annotatedLabel, new Double(kl * support2)));
            topScores.add(new Pair(annotatedLabel, new Double(kl * support2)));
        }
        for (Object o3 : ((HashMap) rightRules.get(label)).keySet()) {
            String sis = (String) o3;
            ClassicCounter cntr2 = (ClassicCounter) ((HashMap) rightRules.get(label)).get(sis);
            double support2 = (cntr2.totalCount());
            double kl = Counters.klDivergence(cntr2, cntr);
            String annotatedLabel = label + "=r=" + sis;
            System.out.println("KL(" + annotatedLabel + "||" + label + ") = " + nf.format(kl) + "\t" + "support(" + sis + ") = " + support2);
            answers.add(new Pair(annotatedLabel, new Double(kl * support2)));
            topScores.add(new Pair(annotatedLabel, new Double(kl * support2)));
        }
        // upto
        System.out.println("----");
        System.out.println("Sorted descending support * KL");
        Collections.sort(answers, (o1, o2) -> {
            Pair p1 = (Pair) o1;
            Pair p2 = (Pair) o2;
            Double p12 = (Double) p1.second();
            Double p22 = (Double) p2.second();
            return p22.compareTo(p12);
        });
        for (Object answer : answers) {
            Pair p = (Pair) answer;
            double psd = ((Double) p.second()).doubleValue();
            System.out.println(p.first() + ": " + nf.format(psd));
            if (psd >= CUTOFFS[0]) {
                String annotatedLabel = (String) p.first();
                for (double CUTOFF : CUTOFFS) {
                    if (psd >= CUTOFF) {
                    //javaSB[j].append("\"").append(annotatedLabel);
                    //javaSB[j].append("\",");
                    }
                }
            }
        }
        System.out.println();
    }
    Collections.sort(topScores, (o1, o2) -> {
        Pair p1 = (Pair) o1;
        Pair p2 = (Pair) o2;
        Double p12 = (Double) p1.second();
        Double p22 = (Double) p2.second();
        return p22.compareTo(p12);
    });
    String outString = "All enriched categories, sorted by score\n";
    for (Object topScore : topScores) {
        Pair p = (Pair) topScore;
        double psd = ((Double) p.second()).doubleValue();
        System.out.println(p.first() + ": " + nf.format(psd));
    }
    System.out.println();
    System.out.println("  // Automatically generated by SisterAnnotationStats -- preferably don't edit");
    int k = CUTOFFS.length - 1;
    for (int j = 0; j < topScores.size(); j++) {
        Pair p = (Pair) topScores.get(j);
        double psd = ((Double) p.second()).doubleValue();
        if (psd < CUTOFFS[k]) {
            if (k == 0) {
                break;
            } else {
                k--;
                // messy but should do it
                j -= 1;
                continue;
            }
        }
        javaSB[k].append("\"").append(p.first());
        javaSB[k].append("\",");
    }
    for (int i = 0; i < CUTOFFS.length; i++) {
        int len = javaSB[i].length();
        javaSB[i].replace(len - 2, len, "};");
        System.out.println(javaSB[i]);
    }
    System.out.print("  public static String[] sisterSplit = ");
    for (int i = CUTOFFS.length; i > 0; i--) {
        if (i == 1) {
            System.out.print("sisterSplit1");
        } else {
            System.out.print("selectiveSisterSplit" + i + " ? sisterSplit" + i + " : (");
        }
    }
    // need to print extra one to close other things open
    for (int i = CUTOFFS.length; i >= 0; i--) {
        System.out.print(")");
    }
    System.out.println(";");
}
Also used : ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) NumberFormat(java.text.NumberFormat) Pair(edu.stanford.nlp.util.Pair)

Example 42 with NumberFormat

use of java.text.NumberFormat in project CoreNLP by stanfordnlp.

the class MLEDependencyGrammar method toString.

@Override
public String toString() {
    NumberFormat nf = NumberFormat.getNumberInstance();
    nf.setMaximumFractionDigits(2);
    StringBuilder sb = new StringBuilder(2000);
    String cl = getClass().getName();
    sb.append(cl.substring(cl.lastIndexOf('.') + 1)).append("[tagbins=");
    sb.append(numTagBins).append(",wordTokens=").append(numWordTokens).append("; head -> arg\n");
    //    for (Iterator dI = coreDependencies.keySet().iterator(); dI.hasNext();) {
    //      IntDependency d = (IntDependency) dI.next();
    //      double count = coreDependencies.getCount(d);
    //      sb.append(d + " count " + nf.format(count));
    //      if (dI.hasNext()) {
    //        sb.append(",");
    //      }
    //      sb.append("\n");
    //    }
    sb.append("]");
    return sb.toString();
}
Also used : NumberFormat(java.text.NumberFormat)

Example 43 with NumberFormat

use of java.text.NumberFormat in project CoreNLP by stanfordnlp.

the class CollinsDepEval method display.

@Override
public void display(boolean verbose, PrintWriter pw) {
    final NumberFormat nf = new DecimalFormat("0.00");
    final Set<CollinsRelation> cats = Generics.newHashSet();
    final Random rand = new Random();
    cats.addAll(precisions.keySet());
    cats.addAll(recalls.keySet());
    Map<Double, CollinsRelation> f1Map = new TreeMap<>();
    for (CollinsRelation cat : cats) {
        double pnum2 = pnums2.getCount(cat);
        double rnum2 = rnums2.getCount(cat);
        //(num > 0.0 ? precision/num : 0.0);
        double prec = precisions2.getCount(cat) / pnum2;
        //(num > 0.0 ? recall/num : 0.0);
        double rec = recalls2.getCount(cat) / rnum2;
        //(num > 0.0 ? f1/num : 0.0);
        double f1 = 2.0 / (1.0 / prec + 1.0 / rec);
        if (new Double(f1).equals(Double.NaN))
            f1 = -1.0;
        if (f1Map.containsKey(f1))
            f1Map.put(f1 + (rand.nextDouble() / 1000.0), cat);
        else
            f1Map.put(f1, cat);
    }
    pw.println(" Abstract Collins Dependencies -- final statistics");
    pw.println("================================================================================");
    for (CollinsRelation cat : f1Map.values()) {
        double pnum2 = pnums2.getCount(cat);
        double rnum2 = rnums2.getCount(cat);
        //(num > 0.0 ? precision/num : 0.0);
        double prec = precisions2.getCount(cat) / pnum2;
        //(num > 0.0 ? recall/num : 0.0);
        double rec = recalls2.getCount(cat) / rnum2;
        //(num > 0.0 ? f1/num : 0.0);
        double f1 = 2.0 / (1.0 / prec + 1.0 / rec);
        pw.println(cat + "\tLP: " + ((pnum2 == 0.0) ? " N/A" : nf.format(prec)) + "\tguessed: " + (int) pnum2 + "\tLR: " + ((rnum2 == 0.0) ? " N/A" : nf.format(rec)) + "\tgold:  " + (int) rnum2 + "\tF1: " + ((pnum2 == 0.0 || rnum2 == 0.0) ? " N/A" : nf.format(f1)));
    }
    pw.println("================================================================================");
}
Also used : Random(java.util.Random) DecimalFormat(java.text.DecimalFormat) CollinsRelation(edu.stanford.nlp.trees.CollinsRelation) TreeMap(java.util.TreeMap) NumberFormat(java.text.NumberFormat)

Example 44 with NumberFormat

use of java.text.NumberFormat in project CoreNLP by stanfordnlp.

the class Treebank method textualSummary.

/**
   * Return various statistics about the treebank (number of sentences,
   * words, tag set, etc.).
   *
   * @param tlp The TreebankLanguagePack used to determine punctuation and an
   *            appropriate character encoding
   * @return A big string for human consumption describing the treebank
   */
public String textualSummary(TreebankLanguagePack tlp) {
    int numTrees = 0;
    int numTreesLE40 = 0;
    int numNonUnaryRoots = 0;
    Tree nonUnaryEg = null;
    ClassicCounter<Tree> nonUnaries = new ClassicCounter<>();
    ClassicCounter<String> roots = new ClassicCounter<>();
    ClassicCounter<String> starts = new ClassicCounter<>();
    ClassicCounter<String> puncts = new ClassicCounter<>();
    int numUnenclosedLeaves = 0;
    int numLeaves = 0;
    int numNonPhrasal = 0;
    int numPreTerminalWithMultipleChildren = 0;
    int numWords = 0;
    int numTags = 0;
    int shortestSentence = Integer.MAX_VALUE;
    int longestSentence = 0;
    int numNullLabel = 0;
    Set<String> words = Generics.newHashSet();
    ClassicCounter<String> tags = new ClassicCounter<>();
    ClassicCounter<String> cats = new ClassicCounter<>();
    Tree leafEg = null;
    Tree preTerminalMultipleChildrenEg = null;
    Tree nullLabelEg = null;
    Tree rootRewritesAsTaggedWordEg = null;
    for (Tree t : this) {
        roots.incrementCount(t.value());
        numTrees++;
        int leng = t.yield().size();
        if (leng <= 40) {
            numTreesLE40++;
        }
        if (leng < shortestSentence) {
            shortestSentence = leng;
        }
        if (leng > longestSentence) {
            longestSentence = leng;
        }
        if (t.numChildren() > 1) {
            if (numNonUnaryRoots == 0) {
                nonUnaryEg = t;
            }
            if (numNonUnaryRoots < 100) {
                nonUnaries.incrementCount(t.localTree());
            }
            numNonUnaryRoots++;
        } else if (t.isLeaf()) {
            numUnenclosedLeaves++;
        } else {
            Tree t2 = t.firstChild();
            if (t2.isLeaf()) {
                numLeaves++;
                leafEg = t;
            } else if (t2.isPreTerminal()) {
                if (numNonPhrasal == 0) {
                    rootRewritesAsTaggedWordEg = t;
                }
                numNonPhrasal++;
            }
            starts.incrementCount(t2.value());
        }
        for (Tree subtree : t) {
            Label lab = subtree.label();
            if (lab == null || lab.value() == null || "".equals(lab.value())) {
                if (numNullLabel == 0) {
                    nullLabelEg = subtree;
                }
                numNullLabel++;
                if (lab == null) {
                    subtree.setLabel(new StringLabel(""));
                } else if (lab.value() == null) {
                    subtree.label().setValue("");
                }
            }
            if (subtree.isLeaf()) {
                numWords++;
                words.add(subtree.value());
            } else if (subtree.isPreTerminal()) {
                numTags++;
                tags.incrementCount(subtree.value());
                if (tlp != null && tlp.isPunctuationTag(subtree.value())) {
                    puncts.incrementCount(subtree.firstChild().value());
                }
            } else if (subtree.isPhrasal()) {
                boolean hasLeafChild = false;
                for (Tree kt : subtree.children()) {
                    if (kt.isLeaf()) {
                        hasLeafChild = true;
                    }
                }
                if (hasLeafChild) {
                    numPreTerminalWithMultipleChildren++;
                    if (preTerminalMultipleChildrenEg == null) {
                        preTerminalMultipleChildrenEg = subtree;
                    }
                }
                cats.incrementCount(subtree.value());
            } else {
                throw new IllegalStateException("Treebank: Bad tree in treebank!: " + subtree);
            }
        }
    }
    StringWriter sw = new StringWriter(2000);
    PrintWriter pw = new PrintWriter(sw);
    NumberFormat nf = NumberFormat.getNumberInstance();
    nf.setMaximumFractionDigits(0);
    pw.println("Treebank has " + numTrees + " trees (" + numTreesLE40 + " of length <= 40) and " + numWords + " words (tokens)");
    if (numTrees > 0) {
        if (numTags != numWords) {
            pw.println("  Warning! numTags differs and is " + numTags);
        }
        if (roots.size() == 1) {
            String root = (String) roots.keySet().toArray()[0];
            pw.println("  The root category is: " + root);
        } else {
            pw.println("  Warning! " + roots.size() + " different roots in treebank: " + Counters.toString(roots, nf));
        }
        if (numNonUnaryRoots > 0) {
            pw.print("  Warning! " + numNonUnaryRoots + " trees without unary initial rewrite.  ");
            if (numNonUnaryRoots > 100) {
                pw.print("First 100 ");
            }
            pw.println("Rewrites: " + Counters.toString(nonUnaries, nf));
            pw.println("    Example: " + nonUnaryEg);
        }
        if (numUnenclosedLeaves > 0 || numLeaves > 0 || numNonPhrasal > 0) {
            pw.println("  Warning! Non-phrasal trees: " + numUnenclosedLeaves + " bare leaves; " + numLeaves + " root rewrites as leaf; and " + numNonPhrasal + " root rewrites as tagged word");
            if (numLeaves > 0) {
                pw.println("  Example bad root rewrites as leaf: " + leafEg);
            }
            if (numNonPhrasal > 0) {
                pw.println("  Example bad root rewrites as tagged word: " + rootRewritesAsTaggedWordEg);
            }
        }
        if (numNullLabel > 0) {
            pw.println("  Warning!  " + numNullLabel + " tree nodes with null or empty string labels, e.g.:");
            pw.println("    " + nullLabelEg);
        }
        if (numPreTerminalWithMultipleChildren > 0) {
            pw.println("  Warning! " + numPreTerminalWithMultipleChildren + " preterminal nodes with multiple children.");
            pw.println("    Example: " + preTerminalMultipleChildrenEg);
        }
        pw.println("  Sentences range from " + shortestSentence + " to " + longestSentence + " words, with an average length of " + (((numWords * 100) / numTrees) / 100.0) + " words.");
        pw.println("  " + cats.size() + " phrasal category types, " + tags.size() + " tag types, and " + words.size() + " word types");
        String[] empties = { "*", "0", "*T*", "*RNR*", "*U*", "*?*", "*EXP*", "*ICH*", "*NOT*", "*PPA*", "*OP*", "*pro*", "*PRO*" };
        // What a dopey choice using 0 as an empty element name!!
        // The problem with the below is that words aren't turned into a basic
        // category, but empties commonly are indexed....  Would need to look
        // for them with a suffix of -[0-9]+
        Set<String> knownEmpties = Generics.newHashSet(Arrays.asList(empties));
        Set<String> emptiesIntersection = Sets.intersection(words, knownEmpties);
        if (!emptiesIntersection.isEmpty()) {
            pw.println("  Caution! " + emptiesIntersection.size() + " word types are known empty elements: " + emptiesIntersection);
        }
        Set<String> joint = Sets.intersection(cats.keySet(), tags.keySet());
        if (!joint.isEmpty()) {
            pw.println("  Warning! " + joint.size() + " items are tags and categories: " + joint);
        }
        for (String cat : cats.keySet()) {
            if (cat != null && cat.contains("@")) {
                pw.println("  Warning!!  Stanford Parser does not work with categories containing '@' like: " + cat);
                break;
            }
        }
        for (String cat : tags.keySet()) {
            if (cat != null && cat.contains("@")) {
                pw.println("  Warning!!  Stanford Parser does not work with tags containing '@' like: " + cat);
                break;
            }
        }
        pw.println("    Cats: " + Counters.toString(cats, nf));
        pw.println("    Tags: " + Counters.toString(tags, nf));
        pw.println("    " + starts.size() + " start categories: " + Counters.toString(starts, nf));
        if (!puncts.isEmpty()) {
            pw.println("    Puncts: " + Counters.toString(puncts, nf));
        }
    }
    return sw.toString();
}
Also used : StringLabel(edu.stanford.nlp.ling.StringLabel) Label(edu.stanford.nlp.ling.Label) StringLabel(edu.stanford.nlp.ling.StringLabel) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) NumberFormat(java.text.NumberFormat)

Example 45 with NumberFormat

use of java.text.NumberFormat in project japid42 by branaway.

the class WebUtils method formatCurrency.

//    public static String formatCurrency(Number number, String currencyCode) {
//        Currency currency = Currency.getInstance(currencyCode);
//        NumberFormat numberFormat = NumberFormat.getCurrencyInstance(new Locale(Lang.get()));
//        numberFormat.setCurrency(currency);
//        numberFormat.setMaximumFractionDigits(currency.getDefaultFractionDigits());
//        String s = numberFormat.format(number);
//        s = s.replace(currencyCode, I18N.getCurrencySymbol(currencyCode));
//        return s;
//    }
public static String formatCurrency(Number number, Locale locale) {
    Currency currency = Currency.getInstance(locale);
    NumberFormat numberFormat = NumberFormat.getCurrencyInstance(locale);
    numberFormat.setCurrency(currency);
    numberFormat.setMaximumFractionDigits(currency.getDefaultFractionDigits());
    String s = numberFormat.format(number);
    s = s.replace(currency.getCurrencyCode(), currency.getSymbol(locale));
    return s;
}
Also used : Currency(java.util.Currency) NumberFormat(java.text.NumberFormat)

Aggregations

NumberFormat (java.text.NumberFormat)471 DecimalFormat (java.text.DecimalFormat)92 ArrayList (java.util.ArrayList)24 HashMap (java.util.HashMap)24 BigDecimal (java.math.BigDecimal)23 Locale (java.util.Locale)22 Map (java.util.Map)18 Test (org.junit.Test)17 ParseException (java.text.ParseException)16 DecimalFormatSymbols (java.text.DecimalFormatSymbols)14 JFreeChart (org.jfree.chart.JFreeChart)13 IOException (java.io.IOException)12 ParsePosition (java.text.ParsePosition)12 XYSeries (org.jfree.data.xy.XYSeries)11 XYSeriesCollection (org.jfree.data.xy.XYSeriesCollection)11 Intent (android.content.Intent)10 PrintWriter (java.io.PrintWriter)9 View (android.view.View)8 TextView (android.widget.TextView)8 Currency (java.util.Currency)8