use of edu.illinois.cs.cogcomp.comma.utils.EvaluateDiscrete in project cogcomp-nlp by CogComp.
the class ClassifierComparison method reasonForBelievingThatStructuredIsPerformingWorseDueToOverfitting.
/**
* Structured's higher performance on the train set and lower performance on test set is
* indicative of overfitting
*/
public static void reasonForBelievingThatStructuredIsPerformingWorseDueToOverfitting(Parser parser, boolean useGoldFeatures) throws Exception {
List<Classifier> lbjExtractors = new ArrayList<>();
lbjExtractors.add(new LocalCommaClassifier().getExtractor());
Classifier lbjLabeler = new LocalCommaClassifier().getLabeler();
StructuredCommaClassifier structured = new StructuredCommaClassifier(lbjExtractors, lbjLabeler);
int learningRounds = 250;
double learningRate = 0.003;
double threshold = 0;
double thickness = 3.5;
EvaluateDiscrete structuredPerformanceOnTrainSet = structuredCVal(structured, parser, useGoldFeatures, true);
EvaluateDiscrete structuredPerformanceOnTestSet = structuredCVal(structured, parser, useGoldFeatures, false);
EvaluateDiscrete localPerformanceOnTrainSet = localCVal(useGoldFeatures, useGoldFeatures, parser, learningRounds, learningRate, threshold, thickness, true);
EvaluateDiscrete localPerformanceOnTestSet = localCVal(useGoldFeatures, useGoldFeatures, parser, learningRounds, learningRate, threshold, thickness, false);
System.out.println("Structured performance on train set " + structuredPerformanceOnTrainSet.getOverallStats()[2]);
System.out.println("Structured performance on test set " + structuredPerformanceOnTestSet.getOverallStats()[2]);
System.out.println("Local performance on train set " + localPerformanceOnTrainSet.getOverallStats()[2]);
System.out.println("Localperformance on test set " + localPerformanceOnTestSet.getOverallStats()[2]);
}
use of edu.illinois.cs.cogcomp.comma.utils.EvaluateDiscrete in project cogcomp-nlp by CogComp.
the class ClassifierComparison method main.
public static void main(String[] args) throws Exception {
PrettyCorpusReader pcr = new PrettyCorpusReader(CommaProperties.getInstance().getCommaLabeledDataFile());
CommaParser parser = new CommaParser(pcr.getSentences(), Ordering.ORDERED, true);
System.out.println("GOLD GOLD");
localCVal(true, true, parser, 250, 0.003, 0, 2.0, false);
System.out.println("GOLD AUTO");
localCVal(true, false, parser, 200, 0.003, 0, 2.0, false);
System.out.println("AUTO AUTO");
localCVal(false, false, parser, 250, 0.003, 0, 3.5, false);
List<Classifier> lbjExtractors = new ArrayList<>();
lbjExtractors.add(new LocalCommaClassifier().getExtractor());
Classifier lbjLabeler = new LocalCommaClassifier().getLabeler();
System.out.println("STRUCTURED GOLD");
StructuredCommaClassifier goldStructured = new StructuredCommaClassifier(lbjExtractors, lbjLabeler);
structuredCVal(goldStructured, parser, true, false);
System.out.println("STRUCTURED AUTO");
StructuredCommaClassifier autoStructured = new StructuredCommaClassifier(lbjExtractors, lbjLabeler);
structuredCVal(autoStructured, parser, false, false);
System.out.println("BAYRAKTAR GOLD");
EvaluateDiscrete bayraktarGold = getBayraktarBaselinePerformance(parser, true);
bayraktarGold.printPerformance(System.out);
System.out.println("BAYRAKTAR AUTO");
EvaluateDiscrete bayraktarAuto = getBayraktarBaselinePerformance(parser, false);
bayraktarAuto.printPerformance(System.out);
reasonForBelievingThatStructuredIsPerformingWorseDueToOverfitting(parser, false);
printConstrainedClassifierPerformance(parser);
}
use of edu.illinois.cs.cogcomp.comma.utils.EvaluateDiscrete in project cogcomp-nlp by CogComp.
the class StructuredCommaClassifier method test.
/**
* @param sentences the test set
* @param predictionFileName location to which to save the predictions of the model. If it is
* null, predictions are not saved
* @return and EvaluateDiscrete object which can provide the performance statistics
* @throws Exception
*/
public EvaluateDiscrete test(List<CommaSRLSentence> sentences, String predictionFileName) throws Exception {
lm.setAllowNewFeatures(false);
SLProblem sp = CommaIOManager.readProblem(sentences, lm, lbjExtractors, lbjLabeler);
EvaluateDiscrete SLEvaluator = new EvaluateDiscrete();
BufferedWriter writer = null;
if (predictionFileName != null) {
writer = new BufferedWriter(new FileWriter(predictionFileName));
}
for (int i = 0; i < sp.instanceList.size(); i++) {
CommaLabelSequence gold = (CommaLabelSequence) sp.goldStructureList.get(i);
CommaLabelSequence prediction = (CommaLabelSequence) infSolver.getBestStructure(wv, sp.instanceList.get(i));
for (int j = 0; j < prediction.labels.size(); j++) {
String predictedTag = prediction.labels.get(j);
String goldTag = gold.labels.get(j);
SLEvaluator.reportPrediction(predictedTag, goldTag);
}
if (predictionFileName != null) {
CommaSequence instance = ((CommaSequence) sp.instanceList.get(i));
instance.sortedCommas.get(i).getSentence().getAnnotatedText();
for (int j = 0; j < prediction.labels.size(); j++) {
int commaPosition = instance.sortedCommas.get(j).commaPosition;
String predictedLabel = lm.getLabelString(Integer.parseInt((prediction.labels.get(j))));
writer.write(commaPosition + "\t" + predictedLabel + "\n");
}
writer.write("\n");
}
}
if (predictionFileName != null) {
writer.close();
}
return SLEvaluator;
}
use of edu.illinois.cs.cogcomp.comma.utils.EvaluateDiscrete in project cogcomp-nlp by CogComp.
the class ClassifierComparison method printConstrainedClassifierPerformance.
public static void printConstrainedClassifierPerformance(Parser parser) {
List<Pair<Classifier, EvaluateDiscrete>> classifiers = new ArrayList<>();
LocalCommaClassifier learner = new LocalCommaClassifier();
EvaluateDiscrete unconstrainedPerformance = new EvaluateDiscrete();
learner.setLTU(new SparseAveragedPerceptron(0.003, 0, 3.5));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new SubstitutePairConstrainedCommaClassifier(), new EvaluateDiscrete()));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new LocativePairConstrainedCommaClassifier(), new EvaluateDiscrete()));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new ListCommasConstrainedCommaClassifier(), new EvaluateDiscrete()));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new OxfordCommaConstrainedCommaClassifier(), new EvaluateDiscrete()));
int k = 5;
parser.reset();
FoldParser foldParser = new FoldParser(parser, k, SplitPolicy.sequential, 0, false);
for (int i = 0; i < k; foldParser.setPivot(++i)) {
foldParser.setFromPivot(false);
foldParser.reset();
learner.forget();
BatchTrainer bt = new BatchTrainer(learner, foldParser);
Lexicon lexicon = bt.preExtract(null);
learner.setLexicon(lexicon);
bt.train(250);
learner.save();
foldParser.setFromPivot(true);
foldParser.reset();
unconstrainedPerformance.reportAll(EvaluateDiscrete.evaluateDiscrete(learner, learner.getLabeler(), foldParser));
for (Pair<Classifier, EvaluateDiscrete> pair : classifiers) {
foldParser.reset();
pair.getSecond().reportAll(EvaluateDiscrete.evaluateDiscrete(pair.getFirst(), learner.getLabeler(), foldParser));
}
}
for (Pair<Classifier, EvaluateDiscrete> pair : classifiers) {
System.out.println(pair.getFirst().name + " " + pair.getSecond().getOverallStats()[2]);
}
}
use of edu.illinois.cs.cogcomp.comma.utils.EvaluateDiscrete in project cogcomp-nlp by CogComp.
the class ClassifierComparison method localCVal.
public static EvaluateDiscrete localCVal(boolean trainOnGold, boolean testOnGold, Parser parser, int learningRounds, double learningRate, double threshold, double thickness, boolean testOnTrain) {
int k = 5;
LocalCommaClassifier learner = new LocalCommaClassifier();
learner.setLTU(new SparseAveragedPerceptron(learningRate, threshold, thickness));
parser.reset();
final FoldParser foldParser = new FoldParser(parser, k, SplitPolicy.sequential, 0, false);
EvaluateDiscrete performanceRecord = new EvaluateDiscrete();
for (int i = 0; i < k; foldParser.setPivot(++i)) {
foldParser.setFromPivot(false);
foldParser.reset();
learner.forget();
BatchTrainer bt = new BatchTrainer(learner, foldParser);
Comma.useGoldFeatures(trainOnGold);
Lexicon lexicon = bt.preExtract(null);
learner.setLexicon(lexicon);
bt.train(learningRounds);
if (!testOnTrain)
foldParser.setFromPivot(true);
foldParser.reset();
Comma.useGoldFeatures(testOnGold);
EvaluateDiscrete currentPerformance = EvaluateDiscrete.evaluateDiscrete(learner, learner.getLabeler(), foldParser);
performanceRecord.reportAll(currentPerformance);
}
// System.out.println(performanceRecord.getOverallStats()[2]);
performanceRecord.printPerformance(System.out);
// performanceRecord.printConfusion(System.out);
return performanceRecord;
}
Aggregations