use of java.io.PrintWriter in project CoreNLP by stanfordnlp.
the class PairwiseModel method writeModel.
public void writeModel(String outputPath) throws Exception {
File outDir = new File(outputPath);
if (!outDir.exists()) {
outDir.mkdir();
}
try (PrintWriter writer = new PrintWriter(outputPath + "config", "UTF-8")) {
writer.print(str);
}
try (PrintWriter writer = new PrintWriter(outputPath + "/weights", "UTF-8")) {
classifier.printWeightVector(writer);
}
classifier.writeWeights(outputPath + "/model.ser");
}
use of java.io.PrintWriter in project CoreNLP by stanfordnlp.
the class PairwiseModelTrainer method test.
public static void test(PairwiseModel model, String predictionsName, boolean anaphoricityModel) throws Exception {
Redwood.log("scoref-train", "Reading compression...");
Compressor<String> compressor = IOUtils.readObjectFromFile(StatisticalCorefTrainer.compressorFile);
Redwood.log("scoref-train", "Reading test data...");
List<DocumentExamples> testDocuments = IOUtils.readObjectFromFile(StatisticalCorefTrainer.extractedFeaturesFile);
Redwood.log("scoref-train", "Building test set...");
List<Pair<Example, Map<Integer, CompressedFeatureVector>>> allExamples = anaphoricityModel ? getAnaphoricityExamples(testDocuments) : getExamples(testDocuments);
Redwood.log("scoref-train", "Testing...");
PrintWriter writer = new PrintWriter(model.getDefaultOutputPath() + predictionsName);
Map<Integer, Counter<Pair<Integer, Integer>>> scores = new HashMap<>();
writeScores(allExamples, compressor, model, writer, scores);
if (model instanceof MaxMarginMentionRanker) {
writer.close();
writer = new PrintWriter(model.getDefaultOutputPath() + predictionsName + "_anaphoricity");
testDocuments = IOUtils.readObjectFromFile(StatisticalCorefTrainer.extractedFeaturesFile);
allExamples = getAnaphoricityExamples(testDocuments);
writeScores(allExamples, compressor, model, writer, scores);
}
IOUtils.writeObjectToFile(scores, model.getDefaultOutputPath() + predictionsName + ".ser");
writer.close();
}
use of java.io.PrintWriter in project CoreNLP by stanfordnlp.
the class SieveCoreferenceSystem method runAndScoreCoref.
public static double runAndScoreCoref(SieveCoreferenceSystem corefSystem, MentionExtractor mentionExtractor, Properties props, String timeStamp) throws Exception {
// prepare conll output
PrintWriter writerGold = null;
PrintWriter writerPredicted = null;
PrintWriter writerPredictedCoref = null;
String conllOutputMentionGoldFile = null;
String conllOutputMentionPredictedFile = null;
String conllOutputMentionCorefPredictedFile = null;
String conllMentionEvalFile = null;
String conllMentionEvalErrFile = null;
String conllMentionCorefEvalFile = null;
String conllMentionCorefEvalErrFile = null;
if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
String conllOutput = props.getProperty(Constants.CONLL_OUTPUT_PROP, "conlloutput");
conllOutputMentionGoldFile = conllOutput + "-" + timeStamp + ".gold.txt";
conllOutputMentionPredictedFile = conllOutput + "-" + timeStamp + ".predicted.txt";
conllOutputMentionCorefPredictedFile = conllOutput + "-" + timeStamp + ".coref.predicted.txt";
conllMentionEvalFile = conllOutput + "-" + timeStamp + ".eval.txt";
conllMentionEvalErrFile = conllOutput + "-" + timeStamp + ".eval.err.txt";
conllMentionCorefEvalFile = conllOutput + "-" + timeStamp + ".coref.eval.txt";
conllMentionCorefEvalErrFile = conllOutput + "-" + timeStamp + ".coref.eval.err.txt";
logger.info("CONLL MENTION GOLD FILE: " + conllOutputMentionGoldFile);
logger.info("CONLL MENTION PREDICTED FILE: " + conllOutputMentionPredictedFile);
logger.info("CONLL MENTION EVAL FILE: " + conllMentionEvalFile);
if (!Constants.SKIP_COREF) {
logger.info("CONLL MENTION PREDICTED WITH COREF FILE: " + conllOutputMentionCorefPredictedFile);
logger.info("CONLL MENTION WITH COREF EVAL FILE: " + conllMentionCorefEvalFile);
}
writerGold = new PrintWriter(new FileOutputStream(conllOutputMentionGoldFile));
writerPredicted = new PrintWriter(new FileOutputStream(conllOutputMentionPredictedFile));
writerPredictedCoref = new PrintWriter(new FileOutputStream(conllOutputMentionCorefPredictedFile));
}
mentionExtractor.resetDocs();
if (corefSystem.doScore()) {
corefSystem.initScorers();
}
while (true) {
Document document = mentionExtractor.nextDoc();
if (document == null)
break;
if (!props.containsKey(Constants.MUC_PROP)) {
printRawDoc(document, true);
printRawDoc(document, false);
}
printDiscourseStructure(document);
if (corefSystem.doScore()) {
document.extractGoldCorefClusters();
}
if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
// Not doing coref - print conll output here
printConllOutput(document, writerGold, true);
printConllOutput(document, writerPredicted, false);
}
// run mention detection only
if (Constants.SKIP_COREF) {
continue;
}
// Do Coreference Resolution
corefSystem.coref(document);
if (corefSystem.doScore()) {
//Identifying possible coreferring mentions in the corpus along with any recall/precision errors with gold corpus
corefSystem.printTopK(logger, document, corefSystem.semantics);
logger.fine("pairwise score for this doc: ");
corefSystem.scoreSingleDoc.get(corefSystem.sieves.length - 1).printF1(logger);
logger.fine("accumulated score: ");
corefSystem.printF1(true);
logger.fine("\n");
}
if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
printConllOutput(document, writerPredictedCoref, false, true);
}
}
double finalScore = 0;
if (Constants.PRINT_CONLL_OUTPUT || corefSystem.replicateCoNLL) {
writerGold.close();
writerPredicted.close();
writerPredictedCoref.close();
//if(props.containsKey(Constants.CONLL_SCORER)) {
if (corefSystem.conllMentionEvalScript != null) {
// runConllEval(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionPredictedFile, conllMentionEvalFile, conllMentionEvalErrFile);
String summary = getConllEvalSummary(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionPredictedFile);
logger.info("\nCONLL EVAL SUMMARY (Before COREF)");
printScoreSummary(summary, logger, false);
if (!Constants.SKIP_COREF) {
// runConllEval(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionCorefPredictedFile, conllMentionCorefEvalFile, conllMentionCorefEvalErrFile);
summary = getConllEvalSummary(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionCorefPredictedFile);
logger.info("\nCONLL EVAL SUMMARY (After COREF)");
printScoreSummary(summary, logger, true);
printFinalConllScore(summary);
if (corefSystem.optimizeConllScore) {
finalScore = getFinalConllScore(summary, corefSystem.optimizeMetricType, corefSystem.optimizeSubScoreType.toString());
}
}
}
}
if (!corefSystem.optimizeConllScore && corefSystem.doScore()) {
finalScore = corefSystem.getFinalScore(corefSystem.optimizeMetricType, corefSystem.optimizeSubScoreType);
}
String scoresFile = props.getProperty(Constants.SCORE_FILE_PROP);
if (scoresFile != null) {
PrintWriter pw = IOUtils.getPrintWriter(scoresFile);
pw.println((new DecimalFormat("#.##")).format(finalScore));
pw.close();
}
if (corefSystem.optimizeSieves) {
logger.info("Final reported score for sieve optimization " + corefSystem.optimizeScoreType + " : " + finalScore);
}
return finalScore;
}
use of java.io.PrintWriter in project CoreNLP by stanfordnlp.
the class CRFFeatureExporter method printFeatures.
/**
* Output features that have already been converted into features
* (using documentToDataAndLabels) in format suitable for CRFSuite.
* Format is with one line per token using the following format
* label feat1 feat2 ...
* (where each space is actually a tab)
* Each document is separated by an empty line.
*
* @param exportFile file to export the features to
* @param docsData array of document features
* @param labels correct labels indexed by document, and position within document
*/
public void printFeatures(String exportFile, int[][][][] docsData, int[][] labels) {
try {
PrintWriter pw = IOUtils.getPrintWriter(exportFile);
for (int i = 0; i < docsData.length; i++) {
for (int j = 0; j < docsData[i].length; j++) {
StringBuilder sb = new StringBuilder();
int label = labels[i][j];
sb.append(classifier.classIndex.get(label));
for (int k = 0; k < docsData[i][j].length; k++) {
for (int m = 0; m < docsData[i][j][k].length; m++) {
String feat = classifier.featureIndex.get(docsData[i][j][k][m]);
feat = ubPrefixFeatureString(feat);
sb.append(delimiter);
sb.append(feat);
}
}
pw.println(sb.toString());
}
pw.println();
}
pw.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
use of java.io.PrintWriter in project CoreNLP by stanfordnlp.
the class CRFFeatureExporter method printFeatures.
/**
* Output features from a collection of documents to a file
* Format is with one line per token using the following format
* word label feat1 feat2 ...
* (where each space is actually a tab)
* Each document is separated by an empty line
* This format is suitable for modified crfsgd.
*
* @param exportFile file to export the features to
* @param documents input collection of documents
*/
public void printFeatures(String exportFile, Collection<List<IN>> documents) {
try {
PrintWriter pw = IOUtils.getPrintWriter(exportFile);
for (List<IN> doc : documents) {
String str = getFeatureString(doc);
pw.println(str);
}
pw.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
Aggregations