use of edu.illinois.cs.cogcomp.comma.lbj.LocalCommaClassifier in project cogcomp-nlp by CogComp.
the class ClassifierComparison method main.
public static void main(String[] args) throws Exception {
PrettyCorpusReader pcr = new PrettyCorpusReader(CommaProperties.getInstance().getCommaLabeledDataFile());
CommaParser parser = new CommaParser(pcr.getSentences(), Ordering.ORDERED, true);
System.out.println("GOLD GOLD");
localCVal(true, true, parser, 250, 0.003, 0, 2.0, false);
System.out.println("GOLD AUTO");
localCVal(true, false, parser, 200, 0.003, 0, 2.0, false);
System.out.println("AUTO AUTO");
localCVal(false, false, parser, 250, 0.003, 0, 3.5, false);
List<Classifier> lbjExtractors = new ArrayList<>();
lbjExtractors.add(new LocalCommaClassifier().getExtractor());
Classifier lbjLabeler = new LocalCommaClassifier().getLabeler();
System.out.println("STRUCTURED GOLD");
StructuredCommaClassifier goldStructured = new StructuredCommaClassifier(lbjExtractors, lbjLabeler);
structuredCVal(goldStructured, parser, true, false);
System.out.println("STRUCTURED AUTO");
StructuredCommaClassifier autoStructured = new StructuredCommaClassifier(lbjExtractors, lbjLabeler);
structuredCVal(autoStructured, parser, false, false);
System.out.println("BAYRAKTAR GOLD");
EvaluateDiscrete bayraktarGold = getBayraktarBaselinePerformance(parser, true);
bayraktarGold.printPerformance(System.out);
System.out.println("BAYRAKTAR AUTO");
EvaluateDiscrete bayraktarAuto = getBayraktarBaselinePerformance(parser, false);
bayraktarAuto.printPerformance(System.out);
reasonForBelievingThatStructuredIsPerformingWorseDueToOverfitting(parser, false);
printConstrainedClassifierPerformance(parser);
}
use of edu.illinois.cs.cogcomp.comma.lbj.LocalCommaClassifier in project cogcomp-nlp by CogComp.
the class ClassifierComparison method reasonForBelievingThatStructuredIsPerformingWorseDueToOverfitting.
/**
* Structured's higher performance on the train set and lower performance on test set is
* indicative of overfitting
*/
public static void reasonForBelievingThatStructuredIsPerformingWorseDueToOverfitting(Parser parser, boolean useGoldFeatures) throws Exception {
List<Classifier> lbjExtractors = new ArrayList<>();
lbjExtractors.add(new LocalCommaClassifier().getExtractor());
Classifier lbjLabeler = new LocalCommaClassifier().getLabeler();
StructuredCommaClassifier structured = new StructuredCommaClassifier(lbjExtractors, lbjLabeler);
int learningRounds = 250;
double learningRate = 0.003;
double threshold = 0;
double thickness = 3.5;
EvaluateDiscrete structuredPerformanceOnTrainSet = structuredCVal(structured, parser, useGoldFeatures, true);
EvaluateDiscrete structuredPerformanceOnTestSet = structuredCVal(structured, parser, useGoldFeatures, false);
EvaluateDiscrete localPerformanceOnTrainSet = localCVal(useGoldFeatures, useGoldFeatures, parser, learningRounds, learningRate, threshold, thickness, true);
EvaluateDiscrete localPerformanceOnTestSet = localCVal(useGoldFeatures, useGoldFeatures, parser, learningRounds, learningRate, threshold, thickness, false);
System.out.println("Structured performance on train set " + structuredPerformanceOnTrainSet.getOverallStats()[2]);
System.out.println("Structured performance on test set " + structuredPerformanceOnTestSet.getOverallStats()[2]);
System.out.println("Local performance on train set " + localPerformanceOnTrainSet.getOverallStats()[2]);
System.out.println("Localperformance on test set " + localPerformanceOnTestSet.getOverallStats()[2]);
}
use of edu.illinois.cs.cogcomp.comma.lbj.LocalCommaClassifier in project cogcomp-nlp by CogComp.
the class CommaLabeler method initialize.
@Override
public void initialize(ResourceManager resourceManager) {
try {
classifier = new LocalCommaClassifier();
Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
File f = ds.getDirectory("org.cogcomp.comma-srl", "comma-srl-models", 2.2, false);
String folder = f.toString() + File.separator + "comma-srl-models" + File.separator;
classifier.readLexicon(folder + "LocalCommaClassifier.lex");
classifier.readModel(folder + "LocalCommaClassifier.lc");
} catch (Exception e) {
e.printStackTrace();
}
assert classifier.getPrunedLexiconSize() > 1000;
assert classifier.getLabelLexicon().size() > 5;
}
use of edu.illinois.cs.cogcomp.comma.lbj.LocalCommaClassifier in project cogcomp-nlp by CogComp.
the class ClassifierComparison method printConstrainedClassifierPerformance.
public static void printConstrainedClassifierPerformance(Parser parser) {
List<Pair<Classifier, EvaluateDiscrete>> classifiers = new ArrayList<>();
LocalCommaClassifier learner = new LocalCommaClassifier();
EvaluateDiscrete unconstrainedPerformance = new EvaluateDiscrete();
learner.setLTU(new SparseAveragedPerceptron(0.003, 0, 3.5));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new SubstitutePairConstrainedCommaClassifier(), new EvaluateDiscrete()));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new LocativePairConstrainedCommaClassifier(), new EvaluateDiscrete()));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new ListCommasConstrainedCommaClassifier(), new EvaluateDiscrete()));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new OxfordCommaConstrainedCommaClassifier(), new EvaluateDiscrete()));
int k = 5;
parser.reset();
FoldParser foldParser = new FoldParser(parser, k, SplitPolicy.sequential, 0, false);
for (int i = 0; i < k; foldParser.setPivot(++i)) {
foldParser.setFromPivot(false);
foldParser.reset();
learner.forget();
BatchTrainer bt = new BatchTrainer(learner, foldParser);
Lexicon lexicon = bt.preExtract(null);
learner.setLexicon(lexicon);
bt.train(250);
learner.save();
foldParser.setFromPivot(true);
foldParser.reset();
unconstrainedPerformance.reportAll(EvaluateDiscrete.evaluateDiscrete(learner, learner.getLabeler(), foldParser));
for (Pair<Classifier, EvaluateDiscrete> pair : classifiers) {
foldParser.reset();
pair.getSecond().reportAll(EvaluateDiscrete.evaluateDiscrete(pair.getFirst(), learner.getLabeler(), foldParser));
}
}
for (Pair<Classifier, EvaluateDiscrete> pair : classifiers) {
System.out.println(pair.getFirst().name + " " + pair.getSecond().getOverallStats()[2]);
}
}
use of edu.illinois.cs.cogcomp.comma.lbj.LocalCommaClassifier in project cogcomp-nlp by CogComp.
the class ClassifierComparison method localCVal.
public static EvaluateDiscrete localCVal(boolean trainOnGold, boolean testOnGold, Parser parser, int learningRounds, double learningRate, double threshold, double thickness, boolean testOnTrain) {
int k = 5;
LocalCommaClassifier learner = new LocalCommaClassifier();
learner.setLTU(new SparseAveragedPerceptron(learningRate, threshold, thickness));
parser.reset();
final FoldParser foldParser = new FoldParser(parser, k, SplitPolicy.sequential, 0, false);
EvaluateDiscrete performanceRecord = new EvaluateDiscrete();
for (int i = 0; i < k; foldParser.setPivot(++i)) {
foldParser.setFromPivot(false);
foldParser.reset();
learner.forget();
BatchTrainer bt = new BatchTrainer(learner, foldParser);
Comma.useGoldFeatures(trainOnGold);
Lexicon lexicon = bt.preExtract(null);
learner.setLexicon(lexicon);
bt.train(learningRounds);
if (!testOnTrain)
foldParser.setFromPivot(true);
foldParser.reset();
Comma.useGoldFeatures(testOnGold);
EvaluateDiscrete currentPerformance = EvaluateDiscrete.evaluateDiscrete(learner, learner.getLabeler(), foldParser);
performanceRecord.reportAll(currentPerformance);
}
// System.out.println(performanceRecord.getOverallStats()[2]);
performanceRecord.printPerformance(System.out);
// performanceRecord.printConfusion(System.out);
return performanceRecord;
}
Aggregations