use of edu.illinois.cs.cogcomp.sl.core.SLProblem in project cogcomp-nlp by CogComp.
the class CommaIOManager method readProblem.
public static SLProblem readProblem(List<CommaSRLSentence> sentences, Lexiconer lexicon, List<Classifier> lbjExtractors, Classifier lbjLabeler) {
if (lexicon.isAllowNewFeatures())
lexicon.addFeature(unknownFeature);
// lexicon.addLabel("occupy-zero-label-for-some-reason");
SLProblem sp = new SLProblem();
// READ PROBLEM
for (CommaSRLSentence sentence : sentences) {
List<CommaSequence> commaSequences = getCommaSequences(sentence, lexicon, lbjExtractors);
for (CommaSequence commaSequence : commaSequences) {
CommaLabelSequence labelSequence = new CommaLabelSequence(commaSequence, lexicon, lbjLabeler);
sp.addExample(commaSequence, labelSequence);
}
}
return sp;
}
use of edu.illinois.cs.cogcomp.sl.core.SLProblem in project cogcomp-nlp by CogComp.
the class StructuredCommaClassifier method test.
/**
*
* @param sentences the test set
* @param predictionFileName location to which to save the predictions of the model. If it is
* null, predictions are not saved
* @return and EvaluateDiscrete object which can provide the performance statistics
* @throws Exception
*/
public EvaluateDiscrete test(List<CommaSRLSentence> sentences, String predictionFileName) throws Exception {
lm.setAllowNewFeatures(false);
SLProblem sp = CommaIOManager.readProblem(sentences, lm, lbjExtractors, lbjLabeler);
EvaluateDiscrete SLEvaluator = new EvaluateDiscrete();
BufferedWriter writer = null;
if (predictionFileName != null) {
writer = new BufferedWriter(new FileWriter(predictionFileName));
}
for (int i = 0; i < sp.instanceList.size(); i++) {
CommaLabelSequence gold = (CommaLabelSequence) sp.goldStructureList.get(i);
CommaLabelSequence prediction = (CommaLabelSequence) infSolver.getBestStructure(wv, sp.instanceList.get(i));
for (int j = 0; j < prediction.labels.size(); j++) {
String predictedTag = prediction.labels.get(j);
String goldTag = gold.labels.get(j);
SLEvaluator.reportPrediction(predictedTag, goldTag);
}
if (predictionFileName != null) {
CommaSequence instance = ((CommaSequence) sp.instanceList.get(i));
instance.sortedCommas.get(i).getSentence().getAnnotatedText();
for (int j = 0; j < prediction.labels.size(); j++) {
int commaPosition = instance.sortedCommas.get(j).commaPosition;
String predictedLabel = lm.getLabelString(Integer.parseInt((prediction.labels.get(j))));
writer.write(commaPosition + "\t" + predictedLabel + "\n");
}
writer.write("\n");
}
}
if (predictionFileName != null) {
writer.close();
}
return SLEvaluator;
}
use of edu.illinois.cs.cogcomp.sl.core.SLProblem in project cogcomp-nlp by CogComp.
the class StructuredCommaClassifier method train.
/**
*
* @param sentences the training set
* @param modelPath the location to save the learnt model. If it is null, it is not saved
* @throws Exception
*/
public void train(List<CommaSRLSentence> sentences, String modelPath) throws Exception {
lm.setAllowNewFeatures(true);
SLProblem sp = CommaIOManager.readProblem(sentences, lm, lbjExtractors, lbjLabeler);
// numLabels*numLabels for transition features
// numWordsInVocab*numLabels for emission features
// numLabels for prior on labels
int numFeatures = lm.getNumOfFeature();
int numLabels = lm.getNumOfLabels();
para.TOTAL_NUMBER_FEATURE = numFeatures * numLabels + numLabels + numLabels * numLabels;
Learner learner = LearnerFactory.getLearner(infSolver, featureGenerator, para);
wv = learner.train(sp);
// save the model
if (modelPath != null)
saveModel(modelPath);
}
Aggregations