Search in sources :

Example 1 with TrieMap

use of edu.stanford.nlp.ling.tokensregex.matcher.TrieMap in project CoreNLP by stanfordnlp.

the class TokensRegexNERAnnotator method readEntries.

/**
   *  Creates a combined list of Entries using the provided mapping files.
   *
   *  @param mappings List of mapping files
   *  @return list of Entries
   */
private static List<Entry> readEntries(String annotatorName, Set<String> noDefaultOverwriteLabels, List<Boolean> ignoreCaseList, Map<Entry, Integer> entryToMappingFileNumber, boolean verbose, String[] headerFields, String[] annotationFieldnames, String... mappings) {
    // Unlike RegexNERClassifier, we don't bother sorting the entries
    // We leave it to TokensRegex NER to sort out the priorities and matches
    //   (typically after all the matches has been made since for some TokenRegex expression,
    //       we don't know how many tokens are matched until after the matching is done)
    List<Entry> entries = new ArrayList<>();
    TrieMap<String, Entry> seenRegexes = new TrieMap<>();
    //Arrays.sort(mappings);
    for (int mappingFileIndex = 0; mappingFileIndex < mappings.length; mappingFileIndex++) {
        String mapping = mappings[mappingFileIndex];
        BufferedReader rd = null;
        try {
            rd = IOUtils.readerFromString(mapping);
            readEntries(annotatorName, headerFields, annotationFieldnames, entries, seenRegexes, mapping, rd, noDefaultOverwriteLabels, ignoreCaseList.get(mappingFileIndex), mappingFileIndex, entryToMappingFileNumber, verbose);
        } catch (IOException e) {
            throw new RuntimeIOException("Couldn't read TokensRegexNER from " + mapping, e);
        } finally {
            IOUtils.closeIgnoringExceptions(rd);
        }
    }
    if (mappings.length != 1) {
        logger.log("TokensRegexNERAnnotator " + annotatorName + ": Read " + entries.size() + " unique entries from " + mappings.length + " files");
    }
    return entries;
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) TrieMap(edu.stanford.nlp.ling.tokensregex.matcher.TrieMap) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Aggregations

RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)1 TrieMap (edu.stanford.nlp.ling.tokensregex.matcher.TrieMap)1 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1