Search in sources :

Example 96 with DecimalFormat

use of java.text.DecimalFormat in project CoreNLP by stanfordnlp.

the class SieveCoreferenceSystem method runAndScoreCoref.

public static double runAndScoreCoref(SieveCoreferenceSystem corefSystem, MentionExtractor mentionExtractor, Properties props, String timeStamp) throws Exception {
    // prepare conll output
    PrintWriter writerGold = null;
    PrintWriter writerPredicted = null;
    PrintWriter writerPredictedCoref = null;
    String conllOutputMentionGoldFile = null;
    String conllOutputMentionPredictedFile = null;
    String conllOutputMentionCorefPredictedFile = null;
    String conllMentionEvalFile = null;
    String conllMentionEvalErrFile = null;
    String conllMentionCorefEvalFile = null;
    String conllMentionCorefEvalErrFile = null;
    if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
        String conllOutput = props.getProperty(Constants.CONLL_OUTPUT_PROP, "conlloutput");
        conllOutputMentionGoldFile = conllOutput + "-" + timeStamp + ".gold.txt";
        conllOutputMentionPredictedFile = conllOutput + "-" + timeStamp + ".predicted.txt";
        conllOutputMentionCorefPredictedFile = conllOutput + "-" + timeStamp + ".coref.predicted.txt";
        conllMentionEvalFile = conllOutput + "-" + timeStamp + ".eval.txt";
        conllMentionEvalErrFile = conllOutput + "-" + timeStamp + ".eval.err.txt";
        conllMentionCorefEvalFile = conllOutput + "-" + timeStamp + ".coref.eval.txt";
        conllMentionCorefEvalErrFile = conllOutput + "-" + timeStamp + ".coref.eval.err.txt";
        logger.info("CONLL MENTION GOLD FILE: " + conllOutputMentionGoldFile);
        logger.info("CONLL MENTION PREDICTED FILE: " + conllOutputMentionPredictedFile);
        logger.info("CONLL MENTION EVAL FILE: " + conllMentionEvalFile);
        if (!Constants.SKIP_COREF) {
            logger.info("CONLL MENTION PREDICTED WITH COREF FILE: " + conllOutputMentionCorefPredictedFile);
            logger.info("CONLL MENTION WITH COREF EVAL FILE: " + conllMentionCorefEvalFile);
        }
        writerGold = new PrintWriter(new FileOutputStream(conllOutputMentionGoldFile));
        writerPredicted = new PrintWriter(new FileOutputStream(conllOutputMentionPredictedFile));
        writerPredictedCoref = new PrintWriter(new FileOutputStream(conllOutputMentionCorefPredictedFile));
    }
    mentionExtractor.resetDocs();
    if (corefSystem.doScore()) {
        corefSystem.initScorers();
    }
    while (true) {
        Document document = mentionExtractor.nextDoc();
        if (document == null)
            break;
        if (!props.containsKey(Constants.MUC_PROP)) {
            printRawDoc(document, true);
            printRawDoc(document, false);
        }
        printDiscourseStructure(document);
        if (corefSystem.doScore()) {
            document.extractGoldCorefClusters();
        }
        if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
            // Not doing coref - print conll output here
            printConllOutput(document, writerGold, true);
            printConllOutput(document, writerPredicted, false);
        }
        // run mention detection only
        if (Constants.SKIP_COREF) {
            continue;
        }
        // Do Coreference Resolution
        corefSystem.coref(document);
        if (corefSystem.doScore()) {
            //Identifying possible coreferring mentions in the corpus along with any recall/precision errors with gold corpus
            corefSystem.printTopK(logger, document, corefSystem.semantics);
            logger.fine("pairwise score for this doc: ");
            corefSystem.scoreSingleDoc.get(corefSystem.sieves.length - 1).printF1(logger);
            logger.fine("accumulated score: ");
            corefSystem.printF1(true);
            logger.fine("\n");
        }
        if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
            printConllOutput(document, writerPredictedCoref, false, true);
        }
    }
    double finalScore = 0;
    if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
        writerGold.close();
        writerPredicted.close();
        writerPredictedCoref.close();
        //if(props.containsKey(Constants.CONLL_SCORER)) {
        if (corefSystem.conllMentionEvalScript != null) {
            //        runConllEval(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionPredictedFile, conllMentionEvalFile, conllMentionEvalErrFile);
            String summary = getConllEvalSummary(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionPredictedFile);
            logger.info("\nCONLL EVAL SUMMARY (Before COREF)");
            printScoreSummary(summary, logger, false);
            if (!Constants.SKIP_COREF) {
                //          runConllEval(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionCorefPredictedFile, conllMentionCorefEvalFile, conllMentionCorefEvalErrFile);
                summary = getConllEvalSummary(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionCorefPredictedFile);
                logger.info("\nCONLL EVAL SUMMARY (After COREF)");
                printScoreSummary(summary, logger, true);
                printFinalConllScore(summary);
                if (corefSystem.optimizeConllScore) {
                    finalScore = getFinalConllScore(summary, corefSystem.optimizeMetricType, corefSystem.optimizeSubScoreType.toString());
                }
            }
        }
    }
    if (!corefSystem.optimizeConllScore && corefSystem.doScore()) {
        finalScore = corefSystem.getFinalScore(corefSystem.optimizeMetricType, corefSystem.optimizeSubScoreType);
    }
    String scoresFile = props.getProperty(Constants.SCORE_FILE_PROP);
    if (scoresFile != null) {
        PrintWriter pw = IOUtils.getPrintWriter(scoresFile);
        pw.println((new DecimalFormat("#.##")).format(finalScore));
        pw.close();
    }
    if (corefSystem.optimizeSieves) {
        logger.info("Final reported score for sieve optimization " + corefSystem.optimizeScoreType + " : " + finalScore);
    }
    return finalScore;
}
Also used : FileOutputStream(java.io.FileOutputStream) DecimalFormat(java.text.DecimalFormat) PrintWriter(java.io.PrintWriter)

Example 97 with DecimalFormat

use of java.text.DecimalFormat in project CoreNLP by stanfordnlp.

the class CRFClassifier method printLabelValue.

public void printLabelValue(List<IN> document) {
    if (flags.useReverse) {
        Collections.reverse(document);
    }
    NumberFormat nf = new DecimalFormat();
    List<String> classes = new ArrayList<>();
    for (int i = 0; i < classIndex.size(); i++) {
        classes.add(classIndex.get(i));
    }
    String[] columnHeaders = classes.toArray(new String[classes.size()]);
    // log.info("docSize:"+docSize);
    for (int j = 0; j < document.size(); j++) {
        System.out.println("--== " + document.get(j).get(CoreAnnotations.TextAnnotation.class) + " ==--");
        List<String[]> lines = new ArrayList<>();
        List<String> rowHeaders = new ArrayList<>();
        List<String> line = new ArrayList<>();
        for (int p = 0; p < labelIndices.size(); p++) {
            if (j + p >= document.size()) {
                continue;
            }
            CRFDatum<List<String>, CRFLabel> d = makeDatum(document, j + p, featureFactories);
            List<List<String>> features = d.asFeatures();
            for (int k = p, fSize = features.size(); k < fSize; k++) {
                Collection<String> cliqueFeatures = features.get(k);
                for (String feature : cliqueFeatures) {
                    int index = featureIndex.indexOf(feature);
                    if (index >= 0) {
                        // line.add(feature+"["+(-p)+"]");
                        rowHeaders.add(feature + '[' + (-p) + ']');
                        double[] values = new double[labelIndices.get(0).size()];
                        for (CRFLabel label : labelIndices.get(k)) {
                            int[] l = label.getLabel();
                            double v = weights[index][labelIndices.get(k).indexOf(label)];
                            values[l[l.length - 1 - p]] += v;
                        }
                        for (double value : values) {
                            line.add(nf.format(value));
                        }
                        lines.add(line.toArray(new String[line.size()]));
                        line = new ArrayList<>();
                    }
                }
            }
            // lines.add(Collections.<String>emptyList());
            System.out.println(StringUtils.makeTextTable(lines.toArray(new String[lines.size()][0]), rowHeaders.toArray(new String[rowHeaders.size()]), columnHeaders, 0, 1, true));
            System.out.println();
        }
    // log.info(edu.stanford.nlp.util.StringUtils.join(lines,"\n"));
    }
    if (flags.useReverse) {
        Collections.reverse(document);
    }
}
Also used : DecimalFormat(java.text.DecimalFormat) NumberFormat(java.text.NumberFormat)

Example 98 with DecimalFormat

use of java.text.DecimalFormat in project CoreNLP by stanfordnlp.

the class CreateClauseDataset method processDirectory.

/**
   * Process all the trees in the given directory. For example, the WSJ section of the Penn Treebank.
   *
   * @param name The name of the directory we are processing.
   * @param directory The directory we are processing.
   * @return A dataset of subject/object pairs in the trees in the directory.
   *         This is a list of sentences, such that each sentence has a collection of pairs of spans.
   *         Each pair of spans is a subject/object span pair that constitutes a valid extraction.
   * @throws IOException
   */
private static List<Pair<CoreMap, Collection<Pair<Span, Span>>>> processDirectory(String name, File directory) throws IOException {
    forceTrack("Processing " + name);
    // Prepare the files to iterate over
    Iterable<File> files = IOUtils.iterFilesRecursive(directory, "mrg");
    Tree tree;
    int numTreesProcessed = 0;
    List<Pair<CoreMap, Collection<Pair<Span, Span>>>> trainingData = new ArrayList<>(1024);
    // Iterate over the files
    for (File file : files) {
        //      log(file);
        TreeReader reader = new PennTreeReader(IOUtils.readerFromFile(file));
        while ((tree = reader.readTree()) != null) {
            try {
                // Prepare the tree
                tree.indexSpans();
                tree.setSpans();
                // Get relevant information from sentence
                List<CoreLabel> tokens = tree.getLeaves().stream().map(leaf -> (CoreLabel) leaf.label()).collect(Collectors.toList());
                SemanticGraph graph = parse(tree);
                Map<Integer, Span> targets = findTraceTargets(tree);
                Map<Integer, Integer> sources = findTraceSources(tree);
                // Create a sentence object
                CoreMap sentence = new ArrayCoreMap(4) {

                    {
                        set(CoreAnnotations.TokensAnnotation.class, tokens);
                        set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
                        set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, graph);
                        set(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class, graph);
                    }
                };
                natlog.doOneSentence(null, sentence);
                // Generate training data
                Collection<Pair<Span, Span>> trainingDataFromSentence = subjectObjectPairs(graph, tokens, targets, sources);
                trainingData.add(Pair.makePair(sentence, trainingDataFromSentence));
                // Debug print
                numTreesProcessed += 1;
                if (numTreesProcessed % 100 == 0) {
                    log("[" + new DecimalFormat("00000").format(numTreesProcessed) + "] " + countDatums(trainingData) + " known extractions");
                }
            } catch (Throwable t) {
                t.printStackTrace();
            }
        }
    }
    // End
    log("" + numTreesProcessed + " trees processed yielding " + countDatums(trainingData) + " known extractions");
    endTrack("Processing " + name);
    return trainingData;
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) java.util(java.util) edu.stanford.nlp.util(edu.stanford.nlp.util) Tree(edu.stanford.nlp.trees.Tree) SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) UniversalEnglishGrammaticalStructureFactory(edu.stanford.nlp.trees.UniversalEnglishGrammaticalStructureFactory) Matcher(java.util.regex.Matcher) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) IndexedWord(edu.stanford.nlp.ling.IndexedWord) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) IOUtils(edu.stanford.nlp.io.IOUtils) Redwood(edu.stanford.nlp.util.logging.Redwood) PennTreeReader(edu.stanford.nlp.trees.PennTreeReader) DecimalFormat(java.text.DecimalFormat) Util(edu.stanford.nlp.util.logging.Redwood.Util) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) Span(edu.stanford.nlp.ie.machinereading.structure.Span) Annotation(edu.stanford.nlp.pipeline.Annotation) HasIndex(edu.stanford.nlp.ling.HasIndex) TSVSentenceProcessor(edu.stanford.nlp.process.TSVSentenceProcessor) Pattern(java.util.regex.Pattern) InputStream(java.io.InputStream) DecimalFormat(java.text.DecimalFormat) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeReader(edu.stanford.nlp.trees.TreeReader) PennTreeReader(edu.stanford.nlp.trees.PennTreeReader) Span(edu.stanford.nlp.ie.machinereading.structure.Span) CoreLabel(edu.stanford.nlp.ling.CoreLabel) PennTreeReader(edu.stanford.nlp.trees.PennTreeReader) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) File(java.io.File)

Example 99 with DecimalFormat

use of java.text.DecimalFormat in project CoreNLP by stanfordnlp.

the class StochasticDiffFunctionTester method arrayToFile.

public void arrayToFile(double[] thisArray, String fileName) {
    PrintWriter file = null;
    NumberFormat nf = new DecimalFormat("0.000E0");
    try {
        file = new PrintWriter(new FileOutputStream(fileName), true);
    } catch (IOException e) {
        log.info("Caught IOException outputing List to file: " + e.getMessage());
        System.exit(1);
    }
    for (double element : thisArray) {
        file.print(nf.format(element) + "  ");
    }
    file.close();
}
Also used : DecimalFormat(java.text.DecimalFormat) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter) NumberFormat(java.text.NumberFormat)

Example 100 with DecimalFormat

use of java.text.DecimalFormat in project CoreNLP by stanfordnlp.

the class Util method dumpAccuracy.

/**
   * A helper function for dumping the accuracy of the trained classifier.
   *
   * @param classifier The classifier to evaluate.
   * @param dataset The dataset to evaluate the classifier on.
   */
public static void dumpAccuracy(Classifier<ClauseSplitter.ClauseClassifierLabel, String> classifier, GeneralDataset<ClauseSplitter.ClauseClassifierLabel, String> dataset) {
    DecimalFormat df = new DecimalFormat("0.00%");
    log("size:         " + dataset.size());
    log("split count:  " + StreamSupport.stream(dataset.spliterator(), false).filter(x -> x.label() == ClauseSplitter.ClauseClassifierLabel.CLAUSE_SPLIT).collect(Collectors.toList()).size());
    log("interm count: " + StreamSupport.stream(dataset.spliterator(), false).filter(x -> x.label() == ClauseSplitter.ClauseClassifierLabel.CLAUSE_INTERM).collect(Collectors.toList()).size());
    Pair<Double, Double> pr = classifier.evaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.CLAUSE_SPLIT);
    log("p  (split):   " + df.format(pr.first));
    log("r  (split):   " + df.format(pr.second));
    log("f1 (split):   " + df.format(2 * pr.first * pr.second / (pr.first + pr.second)));
    pr = classifier.evaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.CLAUSE_INTERM);
    log("p  (interm):  " + df.format(pr.first));
    log("r  (interm):  " + df.format(pr.second));
    log("f1 (interm):  " + df.format(2 * pr.first * pr.second / (pr.first + pr.second)));
}
Also used : DecimalFormat(java.text.DecimalFormat)

Aggregations

DecimalFormat (java.text.DecimalFormat)719 DecimalFormatSymbols (java.text.DecimalFormatSymbols)89 NumberFormat (java.text.NumberFormat)88 BigDecimal (java.math.BigDecimal)45 IOException (java.io.IOException)42 ArrayList (java.util.ArrayList)40 IFormatterTextCallBack (org.xclcharts.common.IFormatterTextCallBack)33 ParseException (java.text.ParseException)30 IFormatterDoubleCallBack (org.xclcharts.common.IFormatterDoubleCallBack)29 Support_DecimalFormat (tests.support.Support_DecimalFormat)28 File (java.io.File)24 Date (java.util.Date)24 SimpleDateFormat (java.text.SimpleDateFormat)22 HashMap (java.util.HashMap)21 Locale (java.util.Locale)21 PrintWriter (java.io.PrintWriter)20 UsageVO (com.cloud.usage.UsageVO)19 JFreeChart (org.jfree.chart.JFreeChart)18 List (java.util.List)17 TextView (android.widget.TextView)15