use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.POSBracketToToken in project cogcomp-nlp by CogComp.
the class POSTrain method trainModels.
/**
* Trains the taggers with specified, labeled training data.
*
* @param trainingData The labeled training data
*/
public void trainModels(String trainingData) {
// Set up the data
Parser trainingParser = new POSBracketToToken(trainingData);
Parser trainingParserUnknown = new POSLabeledUnknownWordParser(trainingData);
MikheevTable.isTraining = true;
BaselineTarget.isTraining = true;
Object ex;
// baseline and mikheev just count, they don't learn -- so one iteration should be enough
while ((ex = trainingParser.next()) != null) {
baselineTarget.learn(ex);
mikheevTable.learn(ex);
}
baselineTarget.doneLearning();
mikheevTable.doneLearning();
trainingParser.reset();
POSTaggerUnknown.isTraining = true;
POSTaggerKnown.isTraining = true;
// Run the learner
for (int i = 0; i < iter; i++) {
System.out.println("Training round " + i);
while ((ex = trainingParser.next()) != null) {
taggerKnown.learn(ex);
}
System.out.println("\tFinished training " + rm.getString("knownName"));
while ((ex = trainingParserUnknown.next()) != null) {
taggerUnknown.learn(ex);
}
System.out.println("\tFinished training " + rm.getString("unknownName"));
trainingParser.reset();
trainingParserUnknown.reset();
taggerKnown.doneWithRound();
taggerUnknown.doneWithRound();
}
taggerUnknown.doneLearning();
taggerKnown.doneLearning();
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.POSBracketToToken in project cogcomp-nlp by CogComp.
the class TestPOSModels method testAccuracy.
/**
* Tags the unlabeled data and compares the part-of-speech tags with the labeled data, keeping
* track of and reporting total accuracy at the end.
*/
public void testAccuracy() {
WordForm __wordForm = new WordForm();
Parser labeledParser = new POSBracketToToken(labeledTestFile);
int numSeen = 0;
int numEqual = 0;
Token labeledWord = (Token) labeledParser.next();
for (; labeledWord != null; labeledWord = (Token) labeledParser.next()) {
String labeledTag = labeledWord.label;
String testTag = tagger.discreteValue(labeledWord);
if (labeledTag.equals(testTag)) {
numEqual++;
}
numSeen++;
}
System.out.println("Total accuracy over " + numSeen + " items: " + String.format("%.2f", 100.0 * (double) numEqual / (double) numSeen) + "%");
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.POSBracketToToken in project cogcomp-nlp by CogComp.
the class TestPOS method main.
/**
* Implements the program described above.
*
* @param args The command line parameters.
*/
public static void main(String[] args) {
// Parse the command line
// if (args.length != 1) {
// logger.error("usage: java edu.illinois.cs.cogcomp.lbj.pos.TestPOS <text file>");
// System.exit(1);
// }
// String testingFile = args[0];
ResourceManager rm = new POSConfigurator().getDefaultConfig();
String testingFile = rm.getString("testData");
TestDiscrete.testDiscrete(new TestDiscrete(), new POSTagger(), new POSLabel(), new POSBracketToToken(testingFile), true, 0);
}
Aggregations