Search in sources :

Example 1 with PrintFile

use of edu.stanford.nlp.io.PrintFile in project CoreNLP by stanfordnlp.

the class TestClassifier method test.

/**
   * Test on a file containing correct tags already. when init'ing from trees
   * TODO: Add the ability to have a second transformer to transform output back; possibly combine this method
   * with method below
   */
private void test() throws IOException {
    numSentences = 0;
    confusionMatrix = new ConfusionMatrix<>();
    PrintFile pf = null;
    PrintFile pf1 = null;
    PrintFile pf3 = null;
    if (writeWords)
        pf = new PrintFile(saveRoot + ".words");
    if (writeUnknDict)
        pf1 = new PrintFile(saveRoot + ".un.dict");
    if (writeTopWords)
        pf3 = new PrintFile(saveRoot + ".words.top");
    boolean verboseResults = config.getVerboseResults();
    if (config.getNThreads() != 1) {
        MulticoreWrapper<List<TaggedWord>, TestSentence> wrapper = new MulticoreWrapper<>(config.getNThreads(), new TestSentenceProcessor(maxentTagger));
        for (List<TaggedWord> taggedSentence : fileRecord.reader()) {
            wrapper.put(taggedSentence);
            while (wrapper.peek()) {
                processResults(wrapper.poll(), pf, pf1, pf3, verboseResults);
            }
        }
        wrapper.join();
        while (wrapper.peek()) {
            processResults(wrapper.poll(), pf, pf1, pf3, verboseResults);
        }
    } else {
        for (List<TaggedWord> taggedSentence : fileRecord.reader()) {
            TestSentence testS = new TestSentence(maxentTagger);
            testS.setCorrectTags(taggedSentence);
            testS.tagSentence(taggedSentence, false);
            processResults(testS, pf, pf1, pf3, verboseResults);
        }
    }
    if (pf != null)
        pf.close();
    if (pf1 != null)
        pf1.close();
    if (pf3 != null)
        pf3.close();
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord) MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) PrintFile(edu.stanford.nlp.io.PrintFile) List(java.util.List)

Example 2 with PrintFile

use of edu.stanford.nlp.io.PrintFile in project CoreNLP by stanfordnlp.

the class MaxentTagger method runTraining.

/**
   * Trains a tagger model.
   *
   * @param config Properties giving parameters for the training run
   */
private static void runTraining(TaggerConfig config) throws IOException {
    Date now = new Date();
    log.info("## tagger training invoked at " + now + " with arguments:");
    config.dump();
    Timing tim = new Timing();
    PrintFile log = new PrintFile(config.getModel() + ".props");
    log.println("## tagger training invoked at " + now + " with arguments:");
    config.dump(log);
    log.close();
    trainAndSaveModel(config);
    tim.done("Training POS tagger");
}
Also used : PrintFile(edu.stanford.nlp.io.PrintFile) Timing(edu.stanford.nlp.util.Timing)

Example 3 with PrintFile

use of edu.stanford.nlp.io.PrintFile in project CoreNLP by stanfordnlp.

the class MaxentTagger method readModelAndInit.

/** This reads the complete tagger from a single model file, and inits
   *  the tagger using a combination of the properties passed in and
   *  parameters from the file.
   *  <p>
   *  <i>Note for the future: This assumes that the TaggerConfig in the file
   *  has already been read and used.  It might be better to refactor
   *  things so that is all done inside this method, but for the moment
   *  it seemed better to leave working code alone [cdm 2008].</i>
   *
   *  @param config The tagger config
   *  @param rf DataInputStream to read from.  It's the caller's job to open and close this stream.
   *  @param printLoading Whether to print a message saying what model file is being loaded and how long it took when finished.
   *  @throws RuntimeIOException if I/O errors or serialization errors
   */
protected void readModelAndInit(Properties config, DataInputStream rf, boolean printLoading) {
    try {
        Timing t = new Timing();
        String source = null;
        if (printLoading) {
            if (config != null) {
                // TODO: "model"
                source = config.getProperty("model");
            }
            if (source == null) {
                source = "data stream";
            }
        }
        TaggerConfig taggerConfig = TaggerConfig.readConfig(rf);
        if (config != null) {
            taggerConfig.setProperties(config);
        }
        // then init tagger
        init(taggerConfig);
        xSize = rf.readInt();
        ySize = rf.readInt();
        // dict = new Dictionary();  // this method is called in constructor, and it's initialized as empty already
        dict.read(rf);
        if (VERBOSE) {
            log.info("Tagger dictionary read.");
        }
        tags.read(rf);
        readExtractors(rf);
        dict.setAmbClasses(ambClasses, veryCommonWordThresh, tags);
        int[] numFA = new int[extractors.size() + extractorsRare.size()];
        int sizeAssoc = rf.readInt();
        fAssociations = Generics.newArrayList();
        for (int i = 0; i < extractors.size() + extractorsRare.size(); ++i) {
            fAssociations.add(Generics.<String, int[]>newHashMap());
        }
        if (VERBOSE)
            log.info("Reading %d feature keys...%n", sizeAssoc);
        PrintFile pfVP = null;
        if (VERBOSE) {
            pfVP = new PrintFile("pairs.txt");
        }
        for (int i = 0; i < sizeAssoc; i++) {
            int numF = rf.readInt();
            FeatureKey fK = new FeatureKey();
            fK.read(rf);
            numFA[fK.num]++;
            // TODO: rewrite the writing / reading code to store
            // fAssociations in a cleaner manner?  Only do this when
            // rebuilding all the tagger models anyway.  When we do that, we
            // can get rid of FeatureKey
            Map<String, int[]> fValueAssociations = fAssociations.get(fK.num);
            int[] fTagAssociations = fValueAssociations.get(fK.val);
            if (fTagAssociations == null) {
                fTagAssociations = new int[ySize];
                for (int j = 0; j < ySize; ++j) {
                    fTagAssociations[j] = -1;
                }
                fValueAssociations.put(fK.val, fTagAssociations);
            }
            fTagAssociations[tags.getIndex(fK.tag)] = numF;
        }
        if (VERBOSE) {
            IOUtils.closeIgnoringExceptions(pfVP);
        }
        if (VERBOSE) {
            for (int k = 0; k < numFA.length; k++) {
                log.info("Number of features of kind " + k + ' ' + numFA[k]);
            }
        }
        prob = new LambdaSolveTagger(rf);
        if (VERBOSE) {
            log.info("prob read ");
        }
        if (printLoading) {
            t.done(log, "Loading POS tagger from " + source);
        }
    } catch (IOException | ClassNotFoundException e) {
        throw new RuntimeIOException("Error while loading a tagger model (probably missing model file)", e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) PrintFile(edu.stanford.nlp.io.PrintFile) Timing(edu.stanford.nlp.util.Timing)

Example 4 with PrintFile

use of edu.stanford.nlp.io.PrintFile in project CoreNLP by stanfordnlp.

the class Problem method print.

public void print(String filename) {
    try {
        PrintFile pf = new PrintFile(filename);
        pf.println(" Problem printing ");
        data.print(pf);
        pf.println(" Function printing ");
        for (int i = 0; i < fSize; i++) {
            functions.get(i).print(pf);
        }
    } catch (Exception e) {
        System.out.println("Exception in Problem.print()");
    }
}
Also used : PrintFile(edu.stanford.nlp.io.PrintFile)

Aggregations

PrintFile (edu.stanford.nlp.io.PrintFile)4 Timing (edu.stanford.nlp.util.Timing)2 RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)1 TaggedWord (edu.stanford.nlp.ling.TaggedWord)1 MulticoreWrapper (edu.stanford.nlp.util.concurrent.MulticoreWrapper)1 List (java.util.List)1