use of edu.stanford.nlp.io.PrintFile in project CoreNLP by stanfordnlp.
the class TestClassifier method test.
/**
* Test on a file containing correct tags already. when init'ing from trees
* TODO: Add the ability to have a second transformer to transform output back; possibly combine this method
* with method below
*/
private void test() throws IOException {
numSentences = 0;
confusionMatrix = new ConfusionMatrix<>();
PrintFile pf = null;
PrintFile pf1 = null;
PrintFile pf3 = null;
if (writeWords)
pf = new PrintFile(saveRoot + ".words");
if (writeUnknDict)
pf1 = new PrintFile(saveRoot + ".un.dict");
if (writeTopWords)
pf3 = new PrintFile(saveRoot + ".words.top");
boolean verboseResults = config.getVerboseResults();
if (config.getNThreads() != 1) {
MulticoreWrapper<List<TaggedWord>, TestSentence> wrapper = new MulticoreWrapper<>(config.getNThreads(), new TestSentenceProcessor(maxentTagger));
for (List<TaggedWord> taggedSentence : fileRecord.reader()) {
wrapper.put(taggedSentence);
while (wrapper.peek()) {
processResults(wrapper.poll(), pf, pf1, pf3, verboseResults);
}
}
wrapper.join();
while (wrapper.peek()) {
processResults(wrapper.poll(), pf, pf1, pf3, verboseResults);
}
} else {
for (List<TaggedWord> taggedSentence : fileRecord.reader()) {
TestSentence testS = new TestSentence(maxentTagger);
testS.setCorrectTags(taggedSentence);
testS.tagSentence(taggedSentence, false);
processResults(testS, pf, pf1, pf3, verboseResults);
}
}
if (pf != null)
pf.close();
if (pf1 != null)
pf1.close();
if (pf3 != null)
pf3.close();
}
use of edu.stanford.nlp.io.PrintFile in project CoreNLP by stanfordnlp.
the class MaxentTagger method runTraining.
/**
* Trains a tagger model.
*
* @param config Properties giving parameters for the training run
*/
private static void runTraining(TaggerConfig config) throws IOException {
Date now = new Date();
log.info("## tagger training invoked at " + now + " with arguments:");
config.dump();
Timing tim = new Timing();
PrintFile log = new PrintFile(config.getModel() + ".props");
log.println("## tagger training invoked at " + now + " with arguments:");
config.dump(log);
log.close();
trainAndSaveModel(config);
tim.done("Training POS tagger");
}
use of edu.stanford.nlp.io.PrintFile in project CoreNLP by stanfordnlp.
the class MaxentTagger method readModelAndInit.
/** This reads the complete tagger from a single model file, and inits
* the tagger using a combination of the properties passed in and
* parameters from the file.
* <p>
* <i>Note for the future: This assumes that the TaggerConfig in the file
* has already been read and used. It might be better to refactor
* things so that is all done inside this method, but for the moment
* it seemed better to leave working code alone [cdm 2008].</i>
*
* @param config The tagger config
* @param rf DataInputStream to read from. It's the caller's job to open and close this stream.
* @param printLoading Whether to print a message saying what model file is being loaded and how long it took when finished.
* @throws RuntimeIOException if I/O errors or serialization errors
*/
protected void readModelAndInit(Properties config, DataInputStream rf, boolean printLoading) {
try {
Timing t = new Timing();
String source = null;
if (printLoading) {
if (config != null) {
// TODO: "model"
source = config.getProperty("model");
}
if (source == null) {
source = "data stream";
}
}
TaggerConfig taggerConfig = TaggerConfig.readConfig(rf);
if (config != null) {
taggerConfig.setProperties(config);
}
// then init tagger
init(taggerConfig);
xSize = rf.readInt();
ySize = rf.readInt();
// dict = new Dictionary(); // this method is called in constructor, and it's initialized as empty already
dict.read(rf);
if (VERBOSE) {
log.info("Tagger dictionary read.");
}
tags.read(rf);
readExtractors(rf);
dict.setAmbClasses(ambClasses, veryCommonWordThresh, tags);
int[] numFA = new int[extractors.size() + extractorsRare.size()];
int sizeAssoc = rf.readInt();
fAssociations = Generics.newArrayList();
for (int i = 0; i < extractors.size() + extractorsRare.size(); ++i) {
fAssociations.add(Generics.<String, int[]>newHashMap());
}
if (VERBOSE)
log.info("Reading %d feature keys...%n", sizeAssoc);
PrintFile pfVP = null;
if (VERBOSE) {
pfVP = new PrintFile("pairs.txt");
}
for (int i = 0; i < sizeAssoc; i++) {
int numF = rf.readInt();
FeatureKey fK = new FeatureKey();
fK.read(rf);
numFA[fK.num]++;
// TODO: rewrite the writing / reading code to store
// fAssociations in a cleaner manner? Only do this when
// rebuilding all the tagger models anyway. When we do that, we
// can get rid of FeatureKey
Map<String, int[]> fValueAssociations = fAssociations.get(fK.num);
int[] fTagAssociations = fValueAssociations.get(fK.val);
if (fTagAssociations == null) {
fTagAssociations = new int[ySize];
for (int j = 0; j < ySize; ++j) {
fTagAssociations[j] = -1;
}
fValueAssociations.put(fK.val, fTagAssociations);
}
fTagAssociations[tags.getIndex(fK.tag)] = numF;
}
if (VERBOSE) {
IOUtils.closeIgnoringExceptions(pfVP);
}
if (VERBOSE) {
for (int k = 0; k < numFA.length; k++) {
log.info("Number of features of kind " + k + ' ' + numFA[k]);
}
}
prob = new LambdaSolveTagger(rf);
if (VERBOSE) {
log.info("prob read ");
}
if (printLoading) {
t.done(log, "Loading POS tagger from " + source);
}
} catch (IOException | ClassNotFoundException e) {
throw new RuntimeIOException("Error while loading a tagger model (probably missing model file)", e);
}
}
use of edu.stanford.nlp.io.PrintFile in project CoreNLP by stanfordnlp.
the class Problem method print.
public void print(String filename) {
try {
PrintFile pf = new PrintFile(filename);
pf.println(" Problem printing ");
data.print(pf);
pf.println(" Function printing ");
for (int i = 0; i < fSize; i++) {
functions.get(i).print(pf);
}
} catch (Exception e) {
System.out.println("Exception in Problem.print()");
}
}
Aggregations