use of java.util.logging.SimpleFormatter in project pyramid by cheng-li.
the class BRPrediction method reportValid.
public static void reportValid(Config config) throws Exception {
Logger logger = Logger.getAnonymousLogger();
String logFile = config.getString("output.log");
FileHandler fileHandler = null;
if (!logFile.isEmpty()) {
new File(logFile).getParentFile().mkdirs();
// todo should append?
fileHandler = new FileHandler(logFile, true);
java.util.logging.Formatter formatter = new SimpleFormatter();
fileHandler.setFormatter(formatter);
logger.addHandler(fileHandler);
logger.setUseParentHandlers(false);
}
if (config.getBoolean("validate")) {
report(config, config.getString("input.validData"), logger);
report(config, config.getString("input.calibrationData"), logger);
}
if (fileHandler != null) {
fileHandler.close();
}
}
use of java.util.logging.SimpleFormatter in project pyramid by cheng-li.
the class BRPrediction method reportTest.
public static void reportTest(Config config) throws Exception {
Logger logger = Logger.getAnonymousLogger();
String logFile = config.getString("output.log");
FileHandler fileHandler = null;
if (!logFile.isEmpty()) {
new File(logFile).getParentFile().mkdirs();
// todo should append?
fileHandler = new FileHandler(logFile, true);
java.util.logging.Formatter formatter = new SimpleFormatter();
fileHandler.setFormatter(formatter);
logger.addHandler(fileHandler);
logger.setUseParentHandlers(false);
}
if (config.getBoolean("test")) {
report(config, config.getString("input.testData"), logger);
}
if (fileHandler != null) {
fileHandler.close();
}
}
use of java.util.logging.SimpleFormatter in project pyramid by cheng-li.
the class AppBRLR method main.
public static void main(String[] args) throws Exception {
if (args.length != 1) {
throw new IllegalArgumentException("Please specify a properties file.");
}
Config config = new Config(args[0]);
Logger logger = Logger.getAnonymousLogger();
String logFile = config.getString("output.log");
FileHandler fileHandler = null;
if (!logFile.isEmpty()) {
new File(logFile).getParentFile().mkdirs();
// todo should append?
fileHandler = new FileHandler(logFile, true);
java.util.logging.Formatter formatter = new SimpleFormatter();
fileHandler.setFormatter(formatter);
logger.addHandler(fileHandler);
logger.setUseParentHandlers(false);
}
logger.info(config.toString());
if (fileHandler != null) {
fileHandler.close();
}
File output = new File(config.getString("output.folder"));
output.mkdirs();
Config app1Config = createApp1Config(config);
Config brConfig = createBRLRENConfig(config);
Config calConfig = createBRCalibrationConfig(config);
Config predictConfig = createBRPredictionConfig(config);
Config autoConfig = createBRAutomationConfig(config);
App1.main(app1Config);
BRLREN.main(brConfig);
BRCalibration.main(calConfig);
BRPrediction.reportValid(predictConfig);
BRAutomation.tuneThreshold(autoConfig);
BRPrediction.reportTest(predictConfig);
BRAutomation.showTestPerformance(autoConfig);
}
use of java.util.logging.SimpleFormatter in project pyramid by cheng-li.
the class AppCTFT method main.
public static void main(String[] args) throws Exception {
if (args.length != 1) {
throw new IllegalArgumentException("Please specify a properties file.");
}
Config config = new Config(args[0]);
Logger logger = Logger.getAnonymousLogger();
String logFile = config.getString("output.log");
FileHandler fileHandler = null;
if (!logFile.isEmpty()) {
new File(logFile).getParentFile().mkdirs();
// todo should append?
fileHandler = new FileHandler(logFile, true);
Formatter formatter = new SimpleFormatter();
fileHandler.setFormatter(formatter);
logger.addHandler(fileHandler);
logger.setUseParentHandlers(false);
}
logger.info(config.toString());
File output = new File(config.getString("output.folder"));
output.mkdirs();
logger.info("start tuning CTFT ");
String validReportPath = config.getString("validReportPath");
String testReportPath = config.getString("testReportPath");
Stream<Pair<Double, Double>> validStream = ReportUtils.getConfidenceF1(validReportPath).stream();
List<Pair<Double, Double>> testList = ReportUtils.getConfidenceF1(testReportPath);
CTFT.Summary summaryValid = CTFT.findThreshold(validStream, config.getDouble("CTFT.targetF1"));
double ctft = summaryValid.getConfidenceThreshold();
double ctft_clipped = ctft;
if (ctft_clipped > config.getDouble("CTFT.upperBound")) {
ctft_clipped = config.getDouble("CTFT.upperBound");
}
if (ctft_clipped < config.getDouble("CTFT.lowerBound")) {
ctft_clipped = config.getDouble("CTFT.lowerBound");
}
FileUtils.writeStringToFile(Paths.get(config.getString("output.folder"), config.getString("CTFT.name") + "_unclipped").toFile(), "" + ctft);
FileUtils.writeStringToFile(Paths.get(config.getString("output.folder"), config.getString("CTFT.name") + "_clipped").toFile(), "" + ctft_clipped);
CTFT.Summary summaryTest = CTFT.applyThreshold(testList.stream(), ctft);
CTFT.Summary summaryTest_clipped = CTFT.applyThreshold(testList.stream(), ctft_clipped);
logger.info("tuning CTFT is done");
logger.info("*****************");
logger.info("autocoding performance with unclipped CTFT " + summaryTest.getConfidenceThreshold());
logger.info("autocoding percentage = " + summaryTest.getAutoCodingPercentage());
logger.info("autocoding accuracy = " + summaryTest.getAutoCodingAccuracy());
logger.info("autocoding F1 = " + summaryTest.getAutoCodingF1());
logger.info("number of autocoded documents = " + summaryTest.getNumAutoCoded());
logger.info("number of correct autocoded documents = " + summaryTest.getNumCorrectAutoCoded());
logger.info("*****************");
logger.info("autocoding performance with clipped CTFT " + summaryTest_clipped.getConfidenceThreshold());
logger.info("autocoding percentage = " + summaryTest_clipped.getAutoCodingPercentage());
logger.info("autocoding accuracy = " + summaryTest_clipped.getAutoCodingAccuracy());
logger.info("autocoding F1 = " + summaryTest_clipped.getAutoCodingF1());
logger.info("number of autocoded documents = " + summaryTest_clipped.getNumAutoCoded());
logger.info("number of correct autocoded documents = " + summaryTest_clipped.getNumCorrectAutoCoded());
if (fileHandler != null) {
fileHandler.close();
}
}
use of java.util.logging.SimpleFormatter in project pyramid by cheng-li.
the class AppEnsemble method main.
public static void main(String[] args) throws Exception {
if (args.length != 1) {
throw new IllegalArgumentException("Please specify a properties file.");
}
Config config = new Config(args[0]);
Logger logger = Logger.getAnonymousLogger();
String logFile = config.getString("output.log");
FileHandler fileHandler = null;
if (!logFile.isEmpty()) {
new File(logFile).getParentFile().mkdirs();
// todo should append?
fileHandler = new FileHandler(logFile, true);
Formatter formatter = new SimpleFormatter();
fileHandler.setFormatter(formatter);
logger.addHandler(fileHandler);
logger.setUseParentHandlers(false);
}
logger.info(config.toString());
File output = new File(config.getString("output.folder"));
output.mkdirs();
List<String> modelPaths = config.getStrings("modelPaths");
List<String> modelNames = config.getStrings("modelNames");
String ensembleName = config.getString("ensembleModelName");
String testFolder = config.getString("testFolder");
String validFolder = config.getString("validFolder");
double targetValue = config.getDouble("threshold.targetValue");
logger.info("start loading all reports and getting ground truth");
List<Map<String, DocumentReport>> testlistMaps = new ArrayList<>();
List<Map<String, DocumentReport>> validlistMaps = new ArrayList<>();
Map<String, String> groundTruthTest;
Map<String, String> groundTruthValid;
String dataSetPath = modelPaths.get(0).split("model_predictions")[0] + "data_sets/";
String testSetPath = dataSetPath + testFolder;
String validSetPath = dataSetPath + validFolder;
MultiLabelClfDataSet testSetModel0 = TRECFormat.loadMultiLabelClfDataSet(testSetPath, DataSetType.ML_CLF_SPARSE, true);
MultiLabelClfDataSet validSetModel0 = TRECFormat.loadMultiLabelClfDataSet(validSetPath, DataSetType.ML_CLF_SPARSE, true);
groundTruthTest = ReportUtils.getIDGroundTruth(testSetModel0);
groundTruthValid = ReportUtils.getIDGroundTruth(validSetModel0);
for (int i = 0; i < modelPaths.size(); i++) {
Map<String, DocumentReport> testmap = loadReportCSV(Paths.get(modelPaths.get(i), "predictions", testFolder + "_reports", "report.csv").toString(), modelNames.get(i));
testlistMaps.add(testmap);
Map<String, DocumentReport> validmap = loadReportCSV(Paths.get(modelPaths.get(i), "predictions", validFolder + "_reports", "report.csv").toString(), modelNames.get(i));
validlistMaps.add(validmap);
}
logger.info("finish loading all reports and getting ground truth");
logger.info("start generating ensemble test report");
LabelTranslator newLabelTranslatorTest = getLabelTranslatorEnsemble(config, testFolder);
List<String> testDocIds = ReportUtils.getDocIds(Paths.get(modelPaths.get(0), "predictions", testFolder + "_reports", "report.csv").toString());
generateReport(config, groundTruthTest, testlistMaps, ensembleName, testFolder, testDocIds, newLabelTranslatorTest);
logger.info("ensemble test report generated");
logger.info("start generating ensemble validation report");
LabelTranslator newLabelTranslatorValid = getLabelTranslatorEnsemble(config, validFolder);
List<String> validDocIds = ReportUtils.getDocIds(Paths.get(modelPaths.get(0), "predictions", validFolder + "_reports", "report.csv").toString());
generateReport(config, groundTruthValid, validlistMaps, ensembleName, validFolder, validDocIds, newLabelTranslatorValid);
logger.info("ensemble validation report generated");
logger.info("classification performance on dataset " + testFolder);
MlMeasureInfo measureInfo_test = getmlMeasureInfo(config, testSetModel0, testFolder, newLabelTranslatorTest);
MLMeasures mlMeasures = new MLMeasures(measureInfo_test.numClasses, measureInfo_test.multiLabels, measureInfo_test.predictions);
logger.info(mlMeasures.toString());
if (config.getBoolean("tuneThreshold")) {
logger.info("start tuning confidence threshold");
Stream<Pair<Double, Double>> streamValid;
double threshold = 1.1;
if (config.getString("threshold.targetMetric").equals("accuracy")) {
streamValid = ReportUtils.getConfidenceCorrectness(Paths.get(config.getString("output.folder"), "model_predictions", ensembleName, "predictions", validFolder + "_reports", "report.csv").toString()).stream();
CTAT.Summary validSummary = CTAT.findThreshold(streamValid, targetValue);
threshold = validSummary.getConfidenceThreshold();
}
if (config.getString("threshold.targetMetric").equals("f1")) {
streamValid = ReportUtils.getConfidenceF1(Paths.get(config.getString("output.folder"), "model_predictions", ensembleName, "predictions", validFolder + "_reports", "report.csv").toString()).stream();
CTFT.Summary summary_valid = CTFT.findThreshold(streamValid, targetValue);
threshold = summary_valid.getConfidenceThreshold();
}
FileUtils.writeStringToFile(Paths.get(config.getString("output.folder"), "model_predictions", ensembleName, "models", "threshold", config.getString("threshold.name")).toFile(), "" + threshold);
double confidenceThresholdClipped = CTAT.clip(threshold, config.getDouble("threshold.lowerBound"), config.getDouble("threshold.upperBound"));
FileUtils.writeStringToFile(Paths.get(config.getString("output.folder"), "model_predictions", ensembleName, "models", "threshold", config.getString("threshold.name") + "_clipped").toFile(), "" + confidenceThresholdClipped);
logger.info("tuning threshold is done");
List<Pair<Double, Double>> testStream;
if (config.getString("threshold.targetMetric").equals("accuracy")) {
testStream = ReportUtils.getConfidenceCorrectness(Paths.get(config.getString("output.folder"), "model_predictions", ensembleName, "predictions", testFolder + "_reports", "report.csv").toString());
CTAT.Summary testSummary_unclipped = CTAT.applyThreshold(testStream.stream(), threshold);
CTAT.Summary testSummary_clipped = CTAT.applyThreshold(testStream.stream(), confidenceThresholdClipped);
logger.info("*****************");
logger.info("autocoding performance with unclipped CTAT " + testSummary_unclipped.getConfidenceThreshold());
logger.info("autocoding percentage = " + testSummary_unclipped.getAutoCodingPercentage());
logger.info("autocoding accuracy = " + testSummary_unclipped.getAutoCodingAccuracy());
logger.info("number of autocoded documents = " + testSummary_unclipped.getNumAutoCoded());
logger.info("number of correct autocoded documents = " + testSummary_unclipped.getNumCorrectAutoCoded());
logger.info("*****************");
logger.info("autocoding performance with clipped CTAT " + testSummary_clipped.getConfidenceThreshold());
logger.info("autocoding percentage = " + testSummary_clipped.getAutoCodingPercentage());
logger.info("autocoding accuracy = " + testSummary_clipped.getAutoCodingAccuracy());
logger.info("number of autocoded documents = " + testSummary_clipped.getNumAutoCoded());
logger.info("number of correct autocoded documents = " + testSummary_clipped.getNumCorrectAutoCoded());
}
if (config.getString("threshold.targetMetric").equals("f1")) {
testStream = ReportUtils.getConfidenceF1(Paths.get(config.getString("output.folder"), "model_predictions", ensembleName, "predictions", testFolder + "_reports", "report.csv").toString());
CTFT.Summary summary_test = CTFT.applyThreshold(testStream.stream(), threshold);
CTFT.Summary summary_test_clipped = CTFT.applyThreshold(testStream.stream(), confidenceThresholdClipped);
logger.info("*****************");
logger.info("autocoding performance with unclipped CTFT " + summary_test.getConfidenceThreshold());
logger.info("autocoding percentage = " + summary_test.getAutoCodingPercentage());
logger.info("autocoding accuracy = " + summary_test.getAutoCodingAccuracy());
logger.info("autocoding F1 = " + summary_test.getAutoCodingF1());
logger.info("number of autocoded documents = " + summary_test.getNumAutoCoded());
logger.info("number of correct autocoded documents = " + summary_test.getNumCorrectAutoCoded());
logger.info("*****************");
logger.info("autocoding performance with clipped CTFT " + summary_test_clipped.getConfidenceThreshold());
logger.info("autocoding percentage = " + summary_test_clipped.getAutoCodingPercentage());
logger.info("autocoding accuracy = " + summary_test_clipped.getAutoCodingAccuracy());
logger.info("autocoding F1 = " + summary_test_clipped.getAutoCodingF1());
logger.info("number of autocoded documents = " + summary_test_clipped.getNumAutoCoded());
logger.info("number of correct autocoded documents = " + summary_test_clipped.getNumCorrectAutoCoded());
}
}
if (fileHandler != null) {
fileHandler.close();
}
}
Aggregations