use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.
the class LearningCurveMultiDataset method getLearningCurve.
/**
* use fixedNumIterations=-1 if you want to use the automatic convergence criterion
* <p>
* NB: assuming column format
*/
public static void getLearningCurve(Vector<Data> trainDataSet, Vector<Data> testDataSet, int fixedNumIterations) throws Exception {
double bestF1Level1 = -1;
int bestRoundLevel1 = 0;
// Get the directory name (<configname>.model is appended in LbjTagger/Parameters.java:139)
String modelPath = ParametersForLbjCode.currentParameters.pathToModelFile;
String modelPathDir = modelPath.substring(0, modelPath.lastIndexOf("/"));
if (IOUtils.exists(modelPathDir)) {
if (!IOUtils.isDirectory(modelPathDir)) {
String msg = "ERROR: " + NAME + ".getLearningCurve(): model directory '" + modelPathDir + "' already exists as a (non-directory) file.";
logger.error(msg);
throw new IOException(msg);
} else
logger.warn(NAME + ".getLearningCurve(): writing to existing model path '" + modelPathDir + "'...");
} else {
IOUtils.mkdir(modelPathDir);
}
NETaggerLevel1.Parameters paramLevel1 = new NETaggerLevel1.Parameters();
paramLevel1.baseLTU = new SparseAveragedPerceptron(ParametersForLbjCode.currentParameters.learningRatePredictionsLevel1, 0, ParametersForLbjCode.currentParameters.thicknessPredictionsLevel1);
logger.info("Level 1 classifier learning rate = " + ParametersForLbjCode.currentParameters.learningRatePredictionsLevel1 + ", thickness = " + ParametersForLbjCode.currentParameters.thicknessPredictionsLevel1);
NETaggerLevel1 tagger1 = new NETaggerLevel1(paramLevel1, modelPath + ".level1", modelPath + ".level1.lex");
tagger1.forget();
for (int dataId = 0; dataId < trainDataSet.size(); dataId++) {
Data trainData = trainDataSet.elementAt(dataId);
if (ParametersForLbjCode.currentParameters.featuresToUse.containsKey("PredictionsLevel1")) {
PredictionsAndEntitiesConfidenceScores.getAndMarkEntities(trainData, NEWord.LabelToLookAt.GoldLabel);
TwoLayerPredictionAggregationFeatures.setLevel1AggregationFeatures(trainData, true);
}
}
// preextract the L1 test and train data.
String path = ParametersForLbjCode.currentParameters.pathToModelFile;
String trainPathL1 = path + ".level1.prefetchedTrainData";
File deleteme = new File(trainPathL1);
if (deleteme.exists())
deleteme.delete();
String testPathL1 = path + ".level1.prefetchedTestData";
deleteme = new File(testPathL1);
if (deleteme.exists())
deleteme.delete();
logger.info("Pre-extracting the training data for Level 1 classifier, saving to " + trainPathL1);
BatchTrainer bt1train = prefetchAndGetBatchTrainer(tagger1, trainDataSet, trainPathL1);
logger.info("Pre-extracting the testing data for Level 1 classifier, saving to " + testPathL1);
BatchTrainer bt1test = prefetchAndGetBatchTrainer(tagger1, testDataSet, testPathL1);
Parser testParser1 = bt1test.getParser();
for (int i = 0; (fixedNumIterations == -1 && i < 200 && i - bestRoundLevel1 < 10) || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) {
bt1train.train(1);
testParser1.reset();
TestDiscrete simpleTest = new TestDiscrete();
simpleTest.addNull("O");
TestDiscrete.testDiscrete(simpleTest, tagger1, null, testParser1, true, 0);
double f1Level1 = simpleTest.getOverallStats()[2];
if (f1Level1 > bestF1Level1) {
bestF1Level1 = f1Level1;
bestRoundLevel1 = i;
tagger1.save();
}
logger.info(i + " rounds. Best so far for Level1 : (" + bestRoundLevel1 + ")=" + bestF1Level1);
}
logger.info("Level 1; best round : " + bestRoundLevel1 + "\tbest F1 : " + bestF1Level1);
// trash the l2 prefetch data
String trainPathL2 = path + ".level2.prefetchedTrainData";
deleteme = new File(trainPathL2);
if (deleteme.exists())
deleteme.delete();
String testPathL2 = path + ".level2.prefetchedTestData";
deleteme = new File(testPathL1);
if (deleteme.exists())
deleteme.delete();
NETaggerLevel2.Parameters paramLevel2 = new NETaggerLevel2.Parameters();
paramLevel2.baseLTU = new SparseAveragedPerceptron(ParametersForLbjCode.currentParameters.learningRatePredictionsLevel2, 0, ParametersForLbjCode.currentParameters.thicknessPredictionsLevel2);
NETaggerLevel2 tagger2 = new NETaggerLevel2(paramLevel2, ParametersForLbjCode.currentParameters.pathToModelFile + ".level2", ParametersForLbjCode.currentParameters.pathToModelFile + ".level2.lex");
tagger2.forget();
// Previously checked if PatternFeatures was in featuresToUse.
if (ParametersForLbjCode.currentParameters.featuresToUse.containsKey("PredictionsLevel1")) {
logger.info("Level 2 classifier learning rate = " + ParametersForLbjCode.currentParameters.learningRatePredictionsLevel2 + ", thickness = " + ParametersForLbjCode.currentParameters.thicknessPredictionsLevel2);
double bestF1Level2 = -1;
int bestRoundLevel2 = 0;
logger.info("Pre-extracting the training data for Level 2 classifier, saving to " + trainPathL2);
BatchTrainer bt2train = prefetchAndGetBatchTrainer(tagger2, trainDataSet, trainPathL2);
logger.info("Pre-extracting the testing data for Level 2 classifier, saving to " + testPathL2);
BatchTrainer bt2test = prefetchAndGetBatchTrainer(tagger2, testDataSet, testPathL2);
Parser testParser2 = bt2test.getParser();
for (int i = 0; (fixedNumIterations == -1 && i < 200 && i - bestRoundLevel2 < 10) || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) {
logger.info("Learning level 2 classifier; round " + i);
bt2train.train(1);
logger.info("Testing level 2 classifier; on prefetched data, round: " + i);
testParser2.reset();
TestDiscrete simpleTest = new TestDiscrete();
simpleTest.addNull("O");
TestDiscrete.testDiscrete(simpleTest, tagger2, null, testParser2, true, 0);
double f1Level2 = simpleTest.getOverallStats()[2];
if (f1Level2 > bestF1Level2) {
bestF1Level2 = f1Level2;
bestRoundLevel2 = i;
tagger2.save();
}
logger.info(i + " rounds. Best so far for Level2 : (" + bestRoundLevel2 + ") " + bestF1Level2);
}
// trash the l2 prefetch data
deleteme = new File(trainPathL2);
if (deleteme.exists())
deleteme.delete();
deleteme = new File(testPathL1);
if (deleteme.exists())
deleteme.delete();
logger.info("Level1: bestround=" + bestRoundLevel1 + "\t F1=" + bestF1Level1 + "\t Level2: bestround=" + bestRoundLevel2 + "\t F1=" + bestF1Level2);
}
/*
* This will override the models forcing to save the iteration we're interested in- the
* fixedNumIterations iteration, the last one. But note - both layers will be saved for this
* iteration. If the best performance for one of the layers came before the final iteration,
* we're in a small trouble- the performance will decrease
*/
if (fixedNumIterations > -1) {
tagger1.save();
tagger2.save();
}
}
use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.
the class BIOTester method TrainModel.
public static void TrainModel(String corpus) throws InvalidPortException, InvalidEndpointException, DatastoreException, IOException, JWNLException {
if (corpus.equals("ACE")) {
Parser train_parser_nam = new BIOReader(getPath("all", "ACE", 0), "ACE05-TRAIN", "NAM", false);
Parser train_parser_nom = new BIOReader(getPath("all", "ACE", 0), "ACE05-TRAIN", "NOM", false);
Parser train_parser_pro = new BIOReader(getPath("all", "ACE", 0), "ACE05-TRAIN", "PRO", false);
train_nam_classifier(train_parser_nam, "models/ACE_NAM");
train_nom_classifier(train_parser_nom, "models/ACE_NOM");
train_pro_classifier(train_parser_pro, "models/ACE_PRO");
} else if (corpus.equals("ERE")) {
Parser train_parser_nam = new BIOReader(getPath("all", "ERE", 0), "ACE05-TRAIN", "NAM", false);
Parser train_parser_nom = new BIOReader(getPath("all", "ERE", 0), "ACE05-TRAIN", "NOM", false);
Parser train_parser_pro = new BIOReader(getPath("all", "ERE", 0), "ACE05-TRAIN", "PRO", false);
train_nam_classifier(train_parser_nam, "models/ERE_NAM");
train_nom_classifier(train_parser_nom, "models/ERE_NOM");
train_pro_classifier(train_parser_pro, "models/ERE_PRO");
}
}
use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.
the class ExtentTester method testSimpleExtent.
public static void testSimpleExtent() throws InvalidPortException, InvalidEndpointException, IOException, JWNLException, DatastoreException {
int true_labeled = 0;
int true_predicted = 0;
int true_correct = 0;
int false_labeled = 0;
int false_predicted = 0;
int false_correct = 0;
for (int i = 0; i < 5; i++) {
ExtentReader train_parser = new ExtentReader("data/partition_with_dev/train/" + i);
extent_classifier classifier = train_extent_classifier(train_parser);
extentLabel output = new extentLabel();
Parser test_parser = new ExtentReader("data/partition_with_dev/eval/" + i);
for (Object example = test_parser.next(); example != null; example = test_parser.next()) {
String pTag = classifier.discreteValue(example);
String gTag = output.discreteValue(example);
if (pTag.equals("true")) {
true_predicted++;
} else {
false_predicted++;
}
if (gTag.equals("true")) {
true_labeled++;
} else {
false_labeled++;
}
if (pTag.equals(gTag)) {
if (pTag.equals("true")) {
true_correct++;
} else {
false_correct++;
}
}
}
}
System.out.println("Total Labeled True: " + true_labeled);
System.out.println("Total Predicted True: " + true_predicted);
System.out.println("Total Correct True: " + true_correct);
double p = (double) true_correct / (double) true_predicted;
double r = (double) true_correct / (double) true_labeled;
double f = 2 * p * r / (p + r);
System.out.println("True Precision: " + p);
System.out.println("True Recall: " + r);
System.out.println("True F1: " + f);
System.out.println("Total Labeled False: " + false_labeled);
System.out.println("Total Predicted False: " + false_predicted);
System.out.println("Total Correct False: " + false_correct);
p = (double) false_correct / (double) false_predicted;
r = (double) false_correct / (double) false_labeled;
f = 2 * p * r / (p + r);
System.out.println("False Precision: " + p);
System.out.println("False Recall: " + r);
System.out.println("False F1: " + f);
}
use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.
the class Main method train.
public void train() {
if (!IOUtils.exists(modelsDir))
IOUtils.mkdir(modelsDir);
Learner classifier = new PrepSRLClassifier(modelName + ".lc", modelName + ".lex");
Parser trainDataReader = new PrepSRLDataReader(dataDir, "train");
BatchTrainer trainer = new BatchTrainer(classifier, trainDataReader, 1000);
trainer.train(20);
classifier.save();
trainDataReader.close();
}
use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.
the class Main method test.
public void test() {
ConstrainedPrepSRLClassifier classifier = new ConstrainedPrepSRLClassifier();
Parser testDataReader = new PrepSRLDataReader(dataDir, "test");
TestDiscrete tester = new TestDiscrete();
TestDiscrete.testDiscrete(tester, classifier, new PrepSRLClassifier.Label(), testDataReader, true, 100);
testDataReader.close();
}
Aggregations