use of edu.illinois.cs.cogcomp.lbjava.classify.Classifier in project cogcomp-nlp by CogComp.
the class ClassifierComparison method main.
public static void main(String[] args) throws Exception {
PrettyCorpusReader pcr = new PrettyCorpusReader(CommaProperties.getInstance().getCommaLabeledDataFile());
CommaParser parser = new CommaParser(pcr.getSentences(), Ordering.ORDERED, true);
System.out.println("GOLD GOLD");
localCVal(true, true, parser, 250, 0.003, 0, 2.0, false);
System.out.println("GOLD AUTO");
localCVal(true, false, parser, 200, 0.003, 0, 2.0, false);
System.out.println("AUTO AUTO");
localCVal(false, false, parser, 250, 0.003, 0, 3.5, false);
List<Classifier> lbjExtractors = new ArrayList<>();
lbjExtractors.add(new LocalCommaClassifier().getExtractor());
Classifier lbjLabeler = new LocalCommaClassifier().getLabeler();
System.out.println("STRUCTURED GOLD");
StructuredCommaClassifier goldStructured = new StructuredCommaClassifier(lbjExtractors, lbjLabeler);
structuredCVal(goldStructured, parser, true, false);
System.out.println("STRUCTURED AUTO");
StructuredCommaClassifier autoStructured = new StructuredCommaClassifier(lbjExtractors, lbjLabeler);
structuredCVal(autoStructured, parser, false, false);
System.out.println("BAYRAKTAR GOLD");
EvaluateDiscrete bayraktarGold = getBayraktarBaselinePerformance(parser, true);
bayraktarGold.printPerformance(System.out);
System.out.println("BAYRAKTAR AUTO");
EvaluateDiscrete bayraktarAuto = getBayraktarBaselinePerformance(parser, false);
bayraktarAuto.printPerformance(System.out);
reasonForBelievingThatStructuredIsPerformingWorseDueToOverfitting(parser, false);
printConstrainedClassifierPerformance(parser);
}
use of edu.illinois.cs.cogcomp.lbjava.classify.Classifier in project cogcomp-nlp by CogComp.
the class ClassifierComparison method reasonForBelievingThatStructuredIsPerformingWorseDueToOverfitting.
/**
* Structured's higher performance on the train set and lower performance on test set is
* indicative of overfitting
*/
public static void reasonForBelievingThatStructuredIsPerformingWorseDueToOverfitting(Parser parser, boolean useGoldFeatures) throws Exception {
List<Classifier> lbjExtractors = new ArrayList<>();
lbjExtractors.add(new LocalCommaClassifier().getExtractor());
Classifier lbjLabeler = new LocalCommaClassifier().getLabeler();
StructuredCommaClassifier structured = new StructuredCommaClassifier(lbjExtractors, lbjLabeler);
int learningRounds = 250;
double learningRate = 0.003;
double threshold = 0;
double thickness = 3.5;
EvaluateDiscrete structuredPerformanceOnTrainSet = structuredCVal(structured, parser, useGoldFeatures, true);
EvaluateDiscrete structuredPerformanceOnTestSet = structuredCVal(structured, parser, useGoldFeatures, false);
EvaluateDiscrete localPerformanceOnTrainSet = localCVal(useGoldFeatures, useGoldFeatures, parser, learningRounds, learningRate, threshold, thickness, true);
EvaluateDiscrete localPerformanceOnTestSet = localCVal(useGoldFeatures, useGoldFeatures, parser, learningRounds, learningRate, threshold, thickness, false);
System.out.println("Structured performance on train set " + structuredPerformanceOnTrainSet.getOverallStats()[2]);
System.out.println("Structured performance on test set " + structuredPerformanceOnTestSet.getOverallStats()[2]);
System.out.println("Local performance on train set " + localPerformanceOnTrainSet.getOverallStats()[2]);
System.out.println("Localperformance on test set " + localPerformanceOnTestSet.getOverallStats()[2]);
}
use of edu.illinois.cs.cogcomp.lbjava.classify.Classifier in project cogcomp-nlp by CogComp.
the class BIOTester method main.
/**
* The command line program simply instantiates an object of this class and
* calls its {@link #test()} method.
**/
public static void main(String[] args) {
String classifierName = null;
String labelerName = null;
String parserName = null;
String inputFile = null;
try {
classifierName = args[0];
labelerName = args[1];
parserName = args[2];
inputFile = args[3];
if (args.length > 4)
throw new Exception();
} catch (Exception e) {
System.err.println("usage: java edu.illinois.cs.cogcomp.lbjava.edu.illinois.cs.cogcomp.lbjava.nlp.seg.BIOTester <classifier> <labeler> <parser> <test file>");
System.exit(1);
}
Classifier classifier = ClassUtils.getClassifier(classifierName);
Classifier labeler = ClassUtils.getClassifier(labelerName);
Parser parser = ClassUtils.getParser(parserName, new Class[] { String.class }, new String[] { inputFile });
new BIOTester(classifier, labeler, parser).test().printPerformance(System.out);
}
use of edu.illinois.cs.cogcomp.lbjava.classify.Classifier in project cogcomp-nlp by CogComp.
the class ClassifierComparison method printConstrainedClassifierPerformance.
public static void printConstrainedClassifierPerformance(Parser parser) {
List<Pair<Classifier, EvaluateDiscrete>> classifiers = new ArrayList<>();
LocalCommaClassifier learner = new LocalCommaClassifier();
EvaluateDiscrete unconstrainedPerformance = new EvaluateDiscrete();
learner.setLTU(new SparseAveragedPerceptron(0.003, 0, 3.5));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new SubstitutePairConstrainedCommaClassifier(), new EvaluateDiscrete()));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new LocativePairConstrainedCommaClassifier(), new EvaluateDiscrete()));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new ListCommasConstrainedCommaClassifier(), new EvaluateDiscrete()));
classifiers.add(new Pair<Classifier, EvaluateDiscrete>(new OxfordCommaConstrainedCommaClassifier(), new EvaluateDiscrete()));
int k = 5;
parser.reset();
FoldParser foldParser = new FoldParser(parser, k, SplitPolicy.sequential, 0, false);
for (int i = 0; i < k; foldParser.setPivot(++i)) {
foldParser.setFromPivot(false);
foldParser.reset();
learner.forget();
BatchTrainer bt = new BatchTrainer(learner, foldParser);
Lexicon lexicon = bt.preExtract(null);
learner.setLexicon(lexicon);
bt.train(250);
learner.save();
foldParser.setFromPivot(true);
foldParser.reset();
unconstrainedPerformance.reportAll(EvaluateDiscrete.evaluateDiscrete(learner, learner.getLabeler(), foldParser));
for (Pair<Classifier, EvaluateDiscrete> pair : classifiers) {
foldParser.reset();
pair.getSecond().reportAll(EvaluateDiscrete.evaluateDiscrete(pair.getFirst(), learner.getLabeler(), foldParser));
}
}
for (Pair<Classifier, EvaluateDiscrete> pair : classifiers) {
System.out.println(pair.getFirst().name + " " + pair.getSecond().getOverallStats()[2]);
}
}
use of edu.illinois.cs.cogcomp.lbjava.classify.Classifier in project cogcomp-nlp by CogComp.
the class SegmentTagPlain method main.
public static void main(String[] args) {
String taggerName = null;
String inputFile = null;
String parserName = null;
try {
taggerName = args[0];
inputFile = args[1];
if (args.length > 2) {
parserName = args[2];
if (args.length > 3)
throw new Exception();
}
} catch (Exception e) {
System.err.println("usage: java edu.illinois.cs.cogcomp.lbjava.edu.illinois.cs.cogcomp.lbjava.nlp.seg.SegmentTagPlain <word classifier> " + "<input file> \\\n" + " [<parser>]");
System.exit(1);
}
Classifier tagger = ClassUtils.getClassifier(taggerName);
Parser parser;
if (parserName == null)
parser = new PlainToTokenParser(new WordSplitter(new SentenceSplitter(inputFile)));
else
parser = ClassUtils.getParser(parserName, new Class[] { Parser.class }, new Parser[] { new WordSplitter(new SentenceSplitter(inputFile)) });
String previous = "";
for (Word w = (Word) parser.next(); w != null; w = (Word) parser.next()) {
String prediction = tagger.discreteValue(w);
if (prediction.startsWith("B-") || prediction.startsWith("I-") && !previous.endsWith(prediction.substring(2)))
System.out.print("[" + prediction.substring(2) + " ");
System.out.print(w.form + " ");
if (!prediction.equals("O") && (w.next == null || tagger.discreteValue(w.next).equals("O") || tagger.discreteValue(w.next).startsWith("B-") || !tagger.discreteValue(w.next).endsWith(prediction.substring(2))))
System.out.print("] ");
if (w.next == null)
System.out.println();
previous = prediction;
}
}
Aggregations