Search in sources :

Example 51 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class CorpusDictionary method readDict.

private static Set<String> readDict(String filename, boolean normalize) {
    Set<String> word = Generics.newHashSet();
    logger.info("Loading " + (normalize ? "normalized" : "unnormalized") + " dictionary from " + filename);
    try (InputStream is = IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(filename)) {
        BufferedReader wordDetectorReader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
        int i = 0;
        for (String wordDetectorLine; (wordDetectorLine = wordDetectorReader.readLine()) != null; ) {
            i++;
            // String[] fields = wordDetectorLine.split("	");
            // logger.debug("DEBUG: "+filename+" "+wordDetectorLine);
            int origLeng = wordDetectorLine.length();
            wordDetectorLine = wordDetectorLine.trim();
            int newLeng = wordDetectorLine.length();
            if (newLeng != origLeng) {
                EncodingPrintWriter.err.println("Line " + i + " of " + filename + " has leading/trailing whitespace: |" + wordDetectorLine + "|", "UTF-8");
            }
            if (newLeng == 0) {
                EncodingPrintWriter.err.println("Line " + i + " of " + filename + " is empty", "UTF-8");
            } else {
                if (normalize) {
                    wordDetectorLine = ChineseUtils.normalize(wordDetectorLine, ChineseUtils.ASCII, ChineseUtils.ASCII, ChineseUtils.NORMALIZE);
                }
                word.add(wordDetectorLine);
            }
        }
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
    return word;
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 52 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class CorpusChar method readDict.

private static Map<String, Set<String>> readDict(String filename) {
    Map<String, Set<String>> char_dict;
    try {
        BufferedReader detectorReader = IOUtils.readerFromString(filename, "UTF-8");
        char_dict = Generics.newHashMap();
        // logger.debug("DEBUG: in CorpusChar readDict");
        for (String detectorLine; (detectorLine = detectorReader.readLine()) != null; ) {
            String[] fields = detectorLine.split("	");
            String tag = fields[0];
            Set<String> chars = char_dict.get(tag);
            if (chars == null) {
                chars = Generics.newHashSet();
                char_dict.put(tag, chars);
            }
            // logger.debug("DEBUG: CorpusChar: "+filename+" "+fields[1]);
            chars.add(fields[1]);
        }
        detectorReader.close();
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
    logger.info("Loading character dictionary file from " + filename + " [done].");
    return char_dict;
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 53 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class MaxentTagger method readModelAndInit.

/**
 * This reads the complete tagger from a single model file, and inits
 *  the tagger using a combination of the properties passed in and
 *  parameters from the file.
 *  <br>
 *  <i>Note for the future: This assumes that the TaggerConfig in the file
 *  has already been read and used.  It might be better to refactor
 *  things so that is all done inside this method, but for the moment
 *  it seemed better to leave working code alone [cdm 2008].</i>
 *
 *  @param config The tagger config
 *  @param rf DataInputStream to read from.  It's the caller's job to open and close this stream.
 *  @param printLoading Whether to print a message saying what model file is being loaded and how long it took when finished.
 *  @throws RuntimeIOException if I/O errors or serialization errors
 */
protected void readModelAndInit(Properties config, DataInputStream rf, boolean printLoading) {
    try {
        Timing t = new Timing();
        String source = null;
        if (printLoading) {
            if (config != null) {
                // TODO: "model"
                source = config.getProperty("model");
            }
            if (source == null) {
                source = "data stream";
            }
        }
        TaggerConfig taggerConfig = TaggerConfig.readConfig(rf);
        if (config != null) {
            taggerConfig.setProperties(config);
        }
        // then init tagger
        init(taggerConfig);
        xSize = rf.readInt();
        ySize = rf.readInt();
        // dict = new Dictionary();  // this method is called in constructor, and it's initialized as empty already
        dict.read(rf);
        if (VERBOSE) {
            log.info("Tagger dictionary read.");
        }
        tags.read(rf);
        readExtractors(rf);
        dict.setAmbClasses(ambClasses, veryCommonWordThresh, tags);
        int[] numFA = new int[extractors.size() + extractorsRare.size()];
        int sizeAssoc = rf.readInt();
        fAssociations = Generics.newArrayList();
        for (int i = 0; i < extractors.size() + extractorsRare.size(); ++i) {
            fAssociations.add(Generics.newHashMap());
        }
        if (VERBOSE)
            log.infof("Reading %d feature keys...%n", sizeAssoc);
        PrintFile pfVP = null;
        if (VERBOSE) {
            pfVP = new PrintFile("pairs.txt");
        }
        // reused in for loop but not stored. just a temp variable
        FeatureKey fK = new FeatureKey();
        for (int i = 0; i < sizeAssoc; i++) {
            int numF = rf.readInt();
            fK.read(rf);
            numFA[fK.num]++;
            if (VERBOSE) {
                String eName = (fK.num < extractors.size() ? extractors.get(fK.num) : extractorsRare.get(fK.num - extractors.size())).toString();
                Map<String, int[]> valFeats = fAssociations.get(fK.num);
                pfVP.print(eName);
                pfVP.print(' ');
                pfVP.print(fK);
                pfVP.print(' ');
                if (valFeats != null) {
                    pfVP.print(valFeats.keySet());
                }
                pfVP.println();
            }
            // TODO: rewrite the writing / reading code to store fAssociations in a cleaner manner?
            // Only do this when rebuilding all the tagger models anyway.  When we do that, we can get rid of FeatureKey
            Map<String, int[]> fValueAssociations = fAssociations.get(fK.num);
            int[] fTagAssociations = fValueAssociations.get(fK.val);
            if (fTagAssociations == null) {
                fTagAssociations = new int[ySize];
                for (int j = 0; j < ySize; ++j) {
                    fTagAssociations[j] = -1;
                }
                fValueAssociations.put(fK.val, fTagAssociations);
            }
            fTagAssociations[tags.getIndex(fK.tag)] = numF;
        }
        if (VERBOSE) {
            IOUtils.closeIgnoringExceptions(pfVP);
        }
        if (VERBOSE) {
            for (int k = 0; k < numFA.length; k++) {
                log.info("Number of features of kind " + k + ' ' + (k < extractors.size() ? extractors.get(k) : extractorsRare.get(k - extractors.size())) + ": " + numFA[k]);
            }
        }
        prob = new LambdaSolveTagger(rf);
        if (VERBOSE) {
            log.info("prob read ");
        }
        if (printLoading) {
            t.done(log, "Loading POS tagger from " + source);
        }
    } catch (IOException | ClassNotFoundException e) {
        throw new RuntimeIOException("Error while loading a tagger model (probably missing model file)", e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) PrintFile(edu.stanford.nlp.io.PrintFile) Timing(edu.stanford.nlp.util.Timing)

Example 54 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class TagCount method readTagCount.

/**
 * A TagCount object's fields are read from the file. They are read from
 *  the current position and the file is not closed afterwards.
 */
public static TagCount readTagCount(DataInputStream rf, Interner<String> interner) {
    try {
        TagCount tc = new TagCount();
        int numTags = rf.readInt();
        tc.map = Generics.newHashMap(numTags);
        for (int i = 0; i < numTags; i++) {
            String tag = rf.readUTF();
            int count = rf.readInt();
            if (tag.equals(NULL_SYMBOL))
                tag = null;
            else
                tag = interner.intern(tag);
            tc.map.put(tag, count);
        }
        tc.getTagsCache = tc.map.keySet().toArray(new String[tc.map.keySet().size()]);
        tc.sumCache = tc.calculateSumCache();
        return tc;
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) IOException(java.io.IOException)

Example 55 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class TTags method save.

protected void save(String filename, Map<String, Set<String>> tagTokens) {
    try {
        DataOutputStream out = IOUtils.getDataOutputStream(filename);
        save(out, tagTokens);
        out.close();
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) DataOutputStream(java.io.DataOutputStream) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) IOException(java.io.IOException)

Aggregations

RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)114 IOException (java.io.IOException)61 BufferedReader (java.io.BufferedReader)22 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)12 CoreLabel (edu.stanford.nlp.ling.CoreLabel)11 File (java.io.File)9 ArrayList (java.util.ArrayList)7 Tree (edu.stanford.nlp.trees.Tree)6 CoreMap (edu.stanford.nlp.util.CoreMap)5 BufferedWriter (java.io.BufferedWriter)5 Properties (java.util.Properties)5 Timing (edu.stanford.nlp.util.Timing)4 FileNotFoundException (java.io.FileNotFoundException)4 FileOutputStream (java.io.FileOutputStream)4 ObjectOutputStream (java.io.ObjectOutputStream)4 PrintWriter (java.io.PrintWriter)4 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)3 Annotation (edu.stanford.nlp.pipeline.Annotation)3 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)3 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)3