use of java.text.DecimalFormat in project CoreNLP by stanfordnlp.
the class SieveCoreferenceSystem method runAndScoreCoref.
public static double runAndScoreCoref(SieveCoreferenceSystem corefSystem, MentionExtractor mentionExtractor, Properties props, String timeStamp) throws Exception {
// prepare conll output
PrintWriter writerGold = null;
PrintWriter writerPredicted = null;
PrintWriter writerPredictedCoref = null;
String conllOutputMentionGoldFile = null;
String conllOutputMentionPredictedFile = null;
String conllOutputMentionCorefPredictedFile = null;
String conllMentionEvalFile = null;
String conllMentionEvalErrFile = null;
String conllMentionCorefEvalFile = null;
String conllMentionCorefEvalErrFile = null;
if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
String conllOutput = props.getProperty(Constants.CONLL_OUTPUT_PROP, "conlloutput");
conllOutputMentionGoldFile = conllOutput + "-" + timeStamp + ".gold.txt";
conllOutputMentionPredictedFile = conllOutput + "-" + timeStamp + ".predicted.txt";
conllOutputMentionCorefPredictedFile = conllOutput + "-" + timeStamp + ".coref.predicted.txt";
conllMentionEvalFile = conllOutput + "-" + timeStamp + ".eval.txt";
conllMentionEvalErrFile = conllOutput + "-" + timeStamp + ".eval.err.txt";
conllMentionCorefEvalFile = conllOutput + "-" + timeStamp + ".coref.eval.txt";
conllMentionCorefEvalErrFile = conllOutput + "-" + timeStamp + ".coref.eval.err.txt";
logger.info("CONLL MENTION GOLD FILE: " + conllOutputMentionGoldFile);
logger.info("CONLL MENTION PREDICTED FILE: " + conllOutputMentionPredictedFile);
logger.info("CONLL MENTION EVAL FILE: " + conllMentionEvalFile);
if (!Constants.SKIP_COREF) {
logger.info("CONLL MENTION PREDICTED WITH COREF FILE: " + conllOutputMentionCorefPredictedFile);
logger.info("CONLL MENTION WITH COREF EVAL FILE: " + conllMentionCorefEvalFile);
}
writerGold = new PrintWriter(new FileOutputStream(conllOutputMentionGoldFile));
writerPredicted = new PrintWriter(new FileOutputStream(conllOutputMentionPredictedFile));
writerPredictedCoref = new PrintWriter(new FileOutputStream(conllOutputMentionCorefPredictedFile));
}
mentionExtractor.resetDocs();
if (corefSystem.doScore()) {
corefSystem.initScorers();
}
while (true) {
Document document = mentionExtractor.nextDoc();
if (document == null)
break;
if (!props.containsKey(Constants.MUC_PROP)) {
printRawDoc(document, true);
printRawDoc(document, false);
}
printDiscourseStructure(document);
if (corefSystem.doScore()) {
document.extractGoldCorefClusters();
}
if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
// Not doing coref - print conll output here
printConllOutput(document, writerGold, true);
printConllOutput(document, writerPredicted, false);
}
// run mention detection only
if (Constants.SKIP_COREF) {
continue;
}
// Do Coreference Resolution
corefSystem.coref(document);
if (corefSystem.doScore()) {
//Identifying possible coreferring mentions in the corpus along with any recall/precision errors with gold corpus
corefSystem.printTopK(logger, document, corefSystem.semantics);
logger.fine("pairwise score for this doc: ");
corefSystem.scoreSingleDoc.get(corefSystem.sieves.length - 1).printF1(logger);
logger.fine("accumulated score: ");
corefSystem.printF1(true);
logger.fine("\n");
}
if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
printConllOutput(document, writerPredictedCoref, false, true);
}
}
double finalScore = 0;
if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
writerGold.close();
writerPredicted.close();
writerPredictedCoref.close();
//if(props.containsKey(Constants.CONLL_SCORER)) {
if (corefSystem.conllMentionEvalScript != null) {
// runConllEval(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionPredictedFile, conllMentionEvalFile, conllMentionEvalErrFile);
String summary = getConllEvalSummary(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionPredictedFile);
logger.info("\nCONLL EVAL SUMMARY (Before COREF)");
printScoreSummary(summary, logger, false);
if (!Constants.SKIP_COREF) {
// runConllEval(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionCorefPredictedFile, conllMentionCorefEvalFile, conllMentionCorefEvalErrFile);
summary = getConllEvalSummary(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionCorefPredictedFile);
logger.info("\nCONLL EVAL SUMMARY (After COREF)");
printScoreSummary(summary, logger, true);
printFinalConllScore(summary);
if (corefSystem.optimizeConllScore) {
finalScore = getFinalConllScore(summary, corefSystem.optimizeMetricType, corefSystem.optimizeSubScoreType.toString());
}
}
}
}
if (!corefSystem.optimizeConllScore && corefSystem.doScore()) {
finalScore = corefSystem.getFinalScore(corefSystem.optimizeMetricType, corefSystem.optimizeSubScoreType);
}
String scoresFile = props.getProperty(Constants.SCORE_FILE_PROP);
if (scoresFile != null) {
PrintWriter pw = IOUtils.getPrintWriter(scoresFile);
pw.println((new DecimalFormat("#.##")).format(finalScore));
pw.close();
}
if (corefSystem.optimizeSieves) {
logger.info("Final reported score for sieve optimization " + corefSystem.optimizeScoreType + " : " + finalScore);
}
return finalScore;
}
use of java.text.DecimalFormat in project CoreNLP by stanfordnlp.
the class CRFClassifier method printLabelValue.
public void printLabelValue(List<IN> document) {
if (flags.useReverse) {
Collections.reverse(document);
}
NumberFormat nf = new DecimalFormat();
List<String> classes = new ArrayList<>();
for (int i = 0; i < classIndex.size(); i++) {
classes.add(classIndex.get(i));
}
String[] columnHeaders = classes.toArray(new String[classes.size()]);
// log.info("docSize:"+docSize);
for (int j = 0; j < document.size(); j++) {
System.out.println("--== " + document.get(j).get(CoreAnnotations.TextAnnotation.class) + " ==--");
List<String[]> lines = new ArrayList<>();
List<String> rowHeaders = new ArrayList<>();
List<String> line = new ArrayList<>();
for (int p = 0; p < labelIndices.size(); p++) {
if (j + p >= document.size()) {
continue;
}
CRFDatum<List<String>, CRFLabel> d = makeDatum(document, j + p, featureFactories);
List<List<String>> features = d.asFeatures();
for (int k = p, fSize = features.size(); k < fSize; k++) {
Collection<String> cliqueFeatures = features.get(k);
for (String feature : cliqueFeatures) {
int index = featureIndex.indexOf(feature);
if (index >= 0) {
// line.add(feature+"["+(-p)+"]");
rowHeaders.add(feature + '[' + (-p) + ']');
double[] values = new double[labelIndices.get(0).size()];
for (CRFLabel label : labelIndices.get(k)) {
int[] l = label.getLabel();
double v = weights[index][labelIndices.get(k).indexOf(label)];
values[l[l.length - 1 - p]] += v;
}
for (double value : values) {
line.add(nf.format(value));
}
lines.add(line.toArray(new String[line.size()]));
line = new ArrayList<>();
}
}
}
// lines.add(Collections.<String>emptyList());
System.out.println(StringUtils.makeTextTable(lines.toArray(new String[lines.size()][0]), rowHeaders.toArray(new String[rowHeaders.size()]), columnHeaders, 0, 1, true));
System.out.println();
}
// log.info(edu.stanford.nlp.util.StringUtils.join(lines,"\n"));
}
if (flags.useReverse) {
Collections.reverse(document);
}
}
use of java.text.DecimalFormat in project CoreNLP by stanfordnlp.
the class CreateClauseDataset method processDirectory.
/**
* Process all the trees in the given directory. For example, the WSJ section of the Penn Treebank.
*
* @param name The name of the directory we are processing.
* @param directory The directory we are processing.
* @return A dataset of subject/object pairs in the trees in the directory.
* This is a list of sentences, such that each sentence has a collection of pairs of spans.
* Each pair of spans is a subject/object span pair that constitutes a valid extraction.
* @throws IOException
*/
private static List<Pair<CoreMap, Collection<Pair<Span, Span>>>> processDirectory(String name, File directory) throws IOException {
forceTrack("Processing " + name);
// Prepare the files to iterate over
Iterable<File> files = IOUtils.iterFilesRecursive(directory, "mrg");
Tree tree;
int numTreesProcessed = 0;
List<Pair<CoreMap, Collection<Pair<Span, Span>>>> trainingData = new ArrayList<>(1024);
// Iterate over the files
for (File file : files) {
// log(file);
TreeReader reader = new PennTreeReader(IOUtils.readerFromFile(file));
while ((tree = reader.readTree()) != null) {
try {
// Prepare the tree
tree.indexSpans();
tree.setSpans();
// Get relevant information from sentence
List<CoreLabel> tokens = tree.getLeaves().stream().map(leaf -> (CoreLabel) leaf.label()).collect(Collectors.toList());
SemanticGraph graph = parse(tree);
Map<Integer, Span> targets = findTraceTargets(tree);
Map<Integer, Integer> sources = findTraceSources(tree);
// Create a sentence object
CoreMap sentence = new ArrayCoreMap(4) {
{
set(CoreAnnotations.TokensAnnotation.class, tokens);
set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, graph);
set(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class, graph);
}
};
natlog.doOneSentence(null, sentence);
// Generate training data
Collection<Pair<Span, Span>> trainingDataFromSentence = subjectObjectPairs(graph, tokens, targets, sources);
trainingData.add(Pair.makePair(sentence, trainingDataFromSentence));
// Debug print
numTreesProcessed += 1;
if (numTreesProcessed % 100 == 0) {
log("[" + new DecimalFormat("00000").format(numTreesProcessed) + "] " + countDatums(trainingData) + " known extractions");
}
} catch (Throwable t) {
t.printStackTrace();
}
}
}
// End
log("" + numTreesProcessed + " trees processed yielding " + countDatums(trainingData) + " known extractions");
endTrack("Processing " + name);
return trainingData;
}
use of java.text.DecimalFormat in project CoreNLP by stanfordnlp.
the class StochasticDiffFunctionTester method arrayToFile.
public void arrayToFile(double[] thisArray, String fileName) {
PrintWriter file = null;
NumberFormat nf = new DecimalFormat("0.000E0");
try {
file = new PrintWriter(new FileOutputStream(fileName), true);
} catch (IOException e) {
log.info("Caught IOException outputing List to file: " + e.getMessage());
System.exit(1);
}
for (double element : thisArray) {
file.print(nf.format(element) + " ");
}
file.close();
}
use of java.text.DecimalFormat in project CoreNLP by stanfordnlp.
the class Util method dumpAccuracy.
/**
* A helper function for dumping the accuracy of the trained classifier.
*
* @param classifier The classifier to evaluate.
* @param dataset The dataset to evaluate the classifier on.
*/
public static void dumpAccuracy(Classifier<ClauseSplitter.ClauseClassifierLabel, String> classifier, GeneralDataset<ClauseSplitter.ClauseClassifierLabel, String> dataset) {
DecimalFormat df = new DecimalFormat("0.00%");
log("size: " + dataset.size());
log("split count: " + StreamSupport.stream(dataset.spliterator(), false).filter(x -> x.label() == ClauseSplitter.ClauseClassifierLabel.CLAUSE_SPLIT).collect(Collectors.toList()).size());
log("interm count: " + StreamSupport.stream(dataset.spliterator(), false).filter(x -> x.label() == ClauseSplitter.ClauseClassifierLabel.CLAUSE_INTERM).collect(Collectors.toList()).size());
Pair<Double, Double> pr = classifier.evaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.CLAUSE_SPLIT);
log("p (split): " + df.format(pr.first));
log("r (split): " + df.format(pr.second));
log("f1 (split): " + df.format(2 * pr.first * pr.second / (pr.first + pr.second)));
pr = classifier.evaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.CLAUSE_INTERM);
log("p (interm): " + df.format(pr.first));
log("r (interm): " + df.format(pr.second));
log("f1 (interm): " + df.format(2 * pr.first * pr.second / (pr.first + pr.second)));
}
Aggregations