use of edu.stanford.nlp.ling.tokensregex.matcher.TrieMap in project CoreNLP by stanfordnlp.
the class TokensRegexNERAnnotator method readEntries.
/**
* Creates a combined list of Entries using the provided mapping files.
*
* @param mappings List of mapping files
* @return list of Entries
*/
private static List<Entry> readEntries(String annotatorName, Set<String> noDefaultOverwriteLabels, List<Boolean> ignoreCaseList, Map<Entry, Integer> entryToMappingFileNumber, boolean verbose, String[] headerFields, String[] annotationFieldnames, String... mappings) {
// Unlike RegexNERClassifier, we don't bother sorting the entries
// We leave it to TokensRegex NER to sort out the priorities and matches
// (typically after all the matches has been made since for some TokenRegex expression,
// we don't know how many tokens are matched until after the matching is done)
List<Entry> entries = new ArrayList<>();
TrieMap<String, Entry> seenRegexes = new TrieMap<>();
//Arrays.sort(mappings);
for (int mappingFileIndex = 0; mappingFileIndex < mappings.length; mappingFileIndex++) {
String mapping = mappings[mappingFileIndex];
BufferedReader rd = null;
try {
rd = IOUtils.readerFromString(mapping);
readEntries(annotatorName, headerFields, annotationFieldnames, entries, seenRegexes, mapping, rd, noDefaultOverwriteLabels, ignoreCaseList.get(mappingFileIndex), mappingFileIndex, entryToMappingFileNumber, verbose);
} catch (IOException e) {
throw new RuntimeIOException("Couldn't read TokensRegexNER from " + mapping, e);
} finally {
IOUtils.closeIgnoringExceptions(rd);
}
}
if (mappings.length != 1) {
logger.log("TokensRegexNERAnnotator " + annotatorName + ": Read " + entries.size() + " unique entries from " + mappings.length + " files");
}
return entries;
}
Aggregations