use of edu.stanford.nlp.stats.ClassicCounter in project CoreNLP by stanfordnlp.
the class Dictionaries method loadSignatures.
private static void loadSignatures(String file, Map<String, Counter<String>> sigs) {
BufferedReader reader = null;
try {
reader = IOUtils.readerFromString(file);
while (reader.ready()) {
String[] split = reader.readLine().split("\t");
Counter<String> cntr = new ClassicCounter<>();
sigs.put(split[0], cntr);
for (int i = 1; i < split.length; i = i + 2) {
cntr.setCount(split[i], Double.parseDouble(split[i + 1]));
}
}
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
IOUtils.closeIgnoringExceptions(reader);
}
}
use of edu.stanford.nlp.stats.ClassicCounter in project CoreNLP by stanfordnlp.
the class BasicRelationFeatureFactory method addFeatures.
/**
* Creates all features for the datum corresponding to this relation mention
* Note: this assumes binary relations where both arguments are EntityMention
* @param features Stores all features
* @param rel The mention
* @param types Comma separated list of feature classes to use
*/
public boolean addFeatures(Counter<String> features, RelationMention rel, List<String> types, Logger logger) {
// sanity checks: must have two arguments, and each must be an entity mention
if (rel.getArgs().size() != 2)
return false;
if (!(rel.getArg(0) instanceof EntityMention))
return false;
if (!(rel.getArg(1) instanceof EntityMention))
return false;
EntityMention arg0 = (EntityMention) rel.getArg(0);
EntityMention arg1 = (EntityMention) rel.getArg(1);
Tree tree = rel.getSentence().get(TreeAnnotation.class);
if (tree == null) {
throw new RuntimeException("ERROR: Relation extraction requires full syntactic analysis!");
}
List<Tree> leaves = tree.getLeaves();
List<CoreLabel> tokens = rel.getSentence().get(TokensAnnotation.class);
// this assumes that both args are in the same sentence as the relation object
// let's check for this to be safe
CoreMap relSentence = rel.getSentence();
CoreMap arg0Sentence = arg0.getSentence();
CoreMap arg1Sentence = arg1.getSentence();
if (arg0Sentence != relSentence) {
log.info("WARNING: Found relation with arg0 in a different sentence: " + rel);
log.info("Relation sentence: " + relSentence.get(TextAnnotation.class));
log.info("Arg0 sentence: " + arg0Sentence.get(TextAnnotation.class));
return false;
}
if (arg1Sentence != relSentence) {
log.info("WARNING: Found relation with arg1 in a different sentence: " + rel);
log.info("Relation sentence: " + relSentence.get(TextAnnotation.class));
log.info("Arg1 sentence: " + arg1Sentence.get(TextAnnotation.class));
return false;
}
// Checklist keeps track of which features have been handled by an if clause
// Should be empty after all the clauses have been gone through.
List<String> checklist = new ArrayList<>(types);
// arg_subtype: similar, for entity subtypes
if (usingFeature(types, checklist, "arg_type")) {
features.setCount("arg1type=" + arg0.getType() + "_and_arg2type=" + arg1.getType(), 1.0);
}
if (usingFeature(types, checklist, "arg_subtype")) {
features.setCount("arg1subtype=" + arg0.getSubType() + "_and_arg2subtype=" + arg1.getSubType(), 1.0);
}
// arg_order: which arg comes first in the sentence
if (usingFeature(types, checklist, "arg_order")) {
if (arg0.getSyntacticHeadTokenPosition() < arg1.getSyntacticHeadTokenPosition())
features.setCount("arg1BeforeArg2", 1.0);
}
// same_head: whether the two args share the same syntactic head token
if (usingFeature(types, checklist, "same_head")) {
if (arg0.getSyntacticHeadTokenPosition() == arg1.getSyntacticHeadTokenPosition())
features.setCount("arguments_have_same_head", 1.0);
}
// e.g., NNP -> PP -> NN <- NNP
if (usingFeature(types, checklist, "full_tree_path")) {
//log.info("SENTENCE: " + sentToString(arg0.getSentence()));
if (arg0.getSyntacticHeadTokenPosition() < leaves.size() && arg1.getSyntacticHeadTokenPosition() < leaves.size()) {
Tree arg0preterm = leaves.get(arg0.getSyntacticHeadTokenPosition()).parent(tree);
Tree arg1preterm = leaves.get(arg1.getSyntacticHeadTokenPosition()).parent(tree);
Tree join = tree.joinNode(arg0preterm, arg1preterm);
StringBuilder pathStringBuilder = new StringBuilder();
List<Tree> pathUp = join.dominationPath(arg0preterm);
Collections.reverse(pathUp);
for (Tree node : pathUp) {
if (node != join) {
pathStringBuilder.append(node.label().value() + " <- ");
}
}
for (Tree node : join.dominationPath(arg1preterm)) {
pathStringBuilder.append(((node == join) ? "" : " -> ") + node.label().value());
}
String pathString = pathStringBuilder.toString();
if (logger != null && !rel.getType().equals(RelationMention.UNRELATED))
logger.info("full_tree_path: " + pathString);
features.setCount("treepath:" + pathString, 1.0);
} else {
log.info("WARNING: found weird argument offsets. Most likely because arguments appear in different sentences than the relation:");
log.info("ARG0: " + arg0);
log.info("ARG0 HEAD: " + arg0.getSyntacticHeadTokenPosition());
log.info("ARG0 SENTENCE: " + sentToString(arg0.getSentence()));
log.info("ARG1: " + arg1);
log.info("ARG1 HEAD: " + arg1.getSyntacticHeadTokenPosition());
log.info("ARG1 SENTENCE: " + sentToString(arg1.getSentence()));
log.info("RELATION TREE: " + tree);
}
}
int pathLength = tree.pathNodeToNode(tree.getLeaves().get(arg0.getSyntacticHeadTokenPosition()), tree.getLeaves().get(arg1.getSyntacticHeadTokenPosition())).size();
// path_length: Length of the path in the phrase structure parse tree, integer-valued feature
if (usingFeature(types, checklist, "path_length")) {
features.setCount("path_length", pathLength);
}
// path_length_binary: Length of the path in the phrase structure parse tree, binary features
if (usingFeature(types, checklist, "path_length_binary")) {
features.setCount("path_length_" + pathLength, 1.0);
}
/* entity_order
* This tells you for each of the two args
* whether there are other entities before or after that arg.
* In particular, it can tell whether an arg is the first entity of its type in the sentence
* (which can be useful for example for telling the gameWinner and gameLoser in NFL).
* TODO: restrict this feature so that it only looks for
* entities of the same type?
* */
if (usingFeature(types, checklist, "entity_order")) {
for (int i = 0; i < rel.getArgs().size(); i++) {
// We already checked the class of the args at the beginning of the method
EntityMention arg = (EntityMention) rel.getArgs().get(i);
if (rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class) != null) {
// may be null due to annotation error
for (EntityMention otherArg : rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class)) {
String feature;
if (otherArg.getSyntacticHeadTokenPosition() > arg.getSyntacticHeadTokenPosition()) {
feature = "arg" + i + "_before_" + otherArg.getType();
features.setCount(feature, 1.0);
}
if (otherArg.getSyntacticHeadTokenPosition() < arg.getSyntacticHeadTokenPosition()) {
feature = "arg" + i + "_after_" + otherArg.getType();
features.setCount(feature, 1.0);
}
}
}
}
}
// surface_distance: Number of tokens in the sentence between the two words, integer-valued feature
int surfaceDistance = Math.abs(arg0.getSyntacticHeadTokenPosition() - arg1.getSyntacticHeadTokenPosition());
if (usingFeature(types, checklist, "surface_distance")) {
features.setCount("surface_distance", surfaceDistance);
}
// surface_distance_binary: Number of tokens in the sentence between the two words, binary features
if (usingFeature(types, checklist, "surface_distance_binary")) {
features.setCount("surface_distance_" + surfaceDistance, 1.0);
}
// surface_distance_bins: number of tokens between the two args, binned to several intervals
if (usingFeature(types, checklist, "surface_distance_bins")) {
if (surfaceDistance < 4) {
features.setCount("surface_distance_bin" + surfaceDistance, 1.0);
} else if (surfaceDistance < 6) {
features.setCount("surface_distance_bin_lt6", 1.0);
} else if (surfaceDistance < 10) {
features.setCount("surface_distance_bin_lt10", 1.0);
} else {
features.setCount("surface_distance_bin_ge10", 1.0);
}
}
// separate_surface_windows: windows of 1,2,3 tokens before and after args, for each arg separately
// Separate features are generated for windows to the left and to the right of the args.
// Features are concatenations of words in the window (or NULL for sentence boundary).
//
// conjunction_surface_windows: concatenation of the windows of the two args
//
// separate_surface_windows_POS: windows of POS tags of size 1,2,3 for each arg
//
// conjunction_surface_windows_POS: concatenation of windows of the args
List<EntityMention> args = new ArrayList<>();
args.add(arg0);
args.add(arg1);
for (int windowSize = 1; windowSize <= 3; windowSize++) {
String[] leftWindow, rightWindow, leftWindowPOS, rightWindowPOS;
leftWindow = new String[2];
rightWindow = new String[2];
leftWindowPOS = new String[2];
rightWindowPOS = new String[2];
for (int argn = 0; argn <= 1; argn++) {
int ind = args.get(argn).getSyntacticHeadTokenPosition();
for (int winnum = 1; winnum <= windowSize; winnum++) {
int windex = ind - winnum;
if (windex > 0) {
leftWindow[argn] = leaves.get(windex).label().value() + "_" + leftWindow[argn];
leftWindowPOS[argn] = leaves.get(windex).parent(tree).label().value() + "_" + leftWindowPOS[argn];
} else {
leftWindow[argn] = "NULL_" + leftWindow[argn];
leftWindowPOS[argn] = "NULL_" + leftWindowPOS[argn];
}
windex = ind + winnum;
if (windex < leaves.size()) {
rightWindow[argn] = rightWindow[argn] + "_" + leaves.get(windex).label().value();
rightWindowPOS[argn] = rightWindowPOS[argn] + "_" + leaves.get(windex).parent(tree).label().value();
} else {
rightWindow[argn] = rightWindow[argn] + "_NULL";
rightWindowPOS[argn] = rightWindowPOS[argn] + "_NULL";
}
}
if (usingFeature(types, checklist, "separate_surface_windows")) {
features.setCount("left_window_" + windowSize + "_arg_" + argn + ": " + leftWindow[argn], 1.0);
features.setCount("left_window_" + windowSize + "_POS_arg_" + argn + ": " + leftWindowPOS[argn], 1.0);
}
if (usingFeature(types, checklist, "separate_surface_windows_POS")) {
features.setCount("right_window_" + windowSize + "_arg_" + argn + ": " + rightWindow[argn], 1.0);
features.setCount("right_window_" + windowSize + "_POS_arg_" + argn + ": " + rightWindowPOS[argn], 1.0);
}
}
if (usingFeature(types, checklist, "conjunction_surface_windows")) {
features.setCount("left_windows_" + windowSize + ": " + leftWindow[0] + "__" + leftWindow[1], 1.0);
features.setCount("right_windows_" + windowSize + ": " + rightWindow[0] + "__" + rightWindow[1], 1.0);
}
if (usingFeature(types, checklist, "conjunction_surface_windows_POS")) {
features.setCount("left_windows_" + windowSize + "_POS: " + leftWindowPOS[0] + "__" + leftWindowPOS[1], 1.0);
features.setCount("right_windows_" + windowSize + "_POS: " + rightWindowPOS[0] + "__" + rightWindowPOS[1], 1.0);
}
}
// arg_words: The actual arg tokens as separate features, and concatenated
String word0 = leaves.get(arg0.getSyntacticHeadTokenPosition()).label().value();
String word1 = leaves.get(arg1.getSyntacticHeadTokenPosition()).label().value();
if (usingFeature(types, checklist, "arg_words")) {
if (doNotLexicalizeFirstArg == false)
features.setCount("word_arg0: " + word0, 1.0);
features.setCount("word_arg1: " + word1, 1.0);
if (doNotLexicalizeFirstArg == false)
features.setCount("words: " + word0 + "__" + word1, 1.0);
}
// arg_POS: POS tags of the args, as separate features and concatenated
String pos0 = leaves.get(arg0.getSyntacticHeadTokenPosition()).parent(tree).label().value();
String pos1 = leaves.get(arg1.getSyntacticHeadTokenPosition()).parent(tree).label().value();
if (usingFeature(types, checklist, "arg_POS")) {
features.setCount("POS_arg0: " + pos0, 1.0);
features.setCount("POS_arg1: " + pos1, 1.0);
features.setCount("POSs: " + pos0 + "__" + pos1, 1.0);
}
// adjacent_words: words immediately to the left and right of the args
if (usingFeature(types, checklist, "adjacent_words")) {
for (int i = 0; i < rel.getArgs().size(); i++) {
Span s = ((EntityMention) rel.getArg(i)).getHead();
if (s.start() > 0) {
String v = tokens.get(s.start() - 1).word();
features.setCount("leftarg" + i + "-" + v, 1.0);
}
if (s.end() < tokens.size()) {
String v = tokens.get(s.end()).word();
features.setCount("rightarg" + i + "-" + v, 1.0);
}
}
}
// e.g. "entity_between_args: Loc" means there is at least one entity of type Loc between the two args
if (usingFeature(types, checklist, "entities_between_args")) {
CoreMap sent = rel.getSentence();
if (sent == null)
throw new RuntimeException("NULL sentence for relation " + rel);
List<EntityMention> relArgs = sent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
if (relArgs != null) {
// may be null due to annotation errors!
for (EntityMention arg : relArgs) {
if ((arg.getSyntacticHeadTokenPosition() > arg0.getSyntacticHeadTokenPosition() && arg.getSyntacticHeadTokenPosition() < arg1.getSyntacticHeadTokenPosition()) || (arg.getSyntacticHeadTokenPosition() > arg1.getSyntacticHeadTokenPosition() && arg.getSyntacticHeadTokenPosition() < arg0.getSyntacticHeadTokenPosition())) {
features.setCount("entity_between_args: " + arg.getType(), 1.0);
}
}
}
}
// entity_counts: For each type, the total number of entities of that type in the sentence (integer-valued feature)
// entity_counts_binary: Counts of entity types as binary features.
Counter<String> typeCounts = new ClassicCounter<>();
if (rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class) != null) {
// may be null due to annotation errors!
for (EntityMention arg : rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class)) typeCounts.incrementCount(arg.getType());
for (String type : typeCounts.keySet()) {
if (usingFeature(types, checklist, "entity_counts"))
features.setCount("entity_counts_" + type, typeCounts.getCount(type));
if (usingFeature(types, checklist, "entity_counts_binary"))
features.setCount("entity_counts_" + type + ": " + typeCounts.getCount(type), 1.0);
}
}
// surface_path: concatenation of tokens between the two args
// surface_path_POS: concatenation of POS tags between the args
// surface_path_selective: concatenation of tokens between the args which are nouns or verbs
StringBuilder sb = new StringBuilder();
StringBuilder sbPOS = new StringBuilder();
StringBuilder sbSelective = new StringBuilder();
for (int i = Math.min(arg0.getSyntacticHeadTokenPosition(), arg1.getSyntacticHeadTokenPosition()) + 1; i < Math.max(arg0.getSyntacticHeadTokenPosition(), arg1.getSyntacticHeadTokenPosition()); i++) {
String word = leaves.get(i).label().value();
sb.append(word + "_");
String pos = leaves.get(i).parent(tree).label().value();
sbPOS.append(pos + "_");
if (pos.equals("NN") || pos.equals("NNS") || pos.equals("NNP") || pos.equals("NNPS") || pos.equals("VB") || pos.equals("VBN") || pos.equals("VBD") || pos.equals("VBG") || pos.equals("VBP") || pos.equals("VBZ")) {
sbSelective.append(word + "_");
}
}
if (usingFeature(types, checklist, "surface_path")) {
features.setCount("surface_path: " + sb, 1.0);
}
if (usingFeature(types, checklist, "surface_path_POS")) {
features.setCount("surface_path_POS: " + sbPOS, 1.0);
}
if (usingFeature(types, checklist, "surface_path_selective")) {
features.setCount("surface_path_selective: " + sbSelective, 1.0);
}
// must be initialized below
int swStart, swEnd;
if (arg0.getSyntacticHeadTokenPosition() < arg1.getSyntacticHeadTokenPosition()) {
swStart = arg0.getExtentTokenEnd();
swEnd = arg1.getExtentTokenStart();
} else {
swStart = arg1.getExtentTokenEnd();
swEnd = arg0.getExtentTokenStart();
}
// span_words_unigrams: words that appear in between the two arguments
if (usingFeature(types, checklist, "span_words_unigrams")) {
for (int i = swStart; i < swEnd; i++) {
features.setCount("span_word:" + tokens.get(i).word(), 1.0);
}
}
// span_words_bigrams: bigrams of words that appear in between the two arguments
if (usingFeature(types, checklist, "span_words_bigrams")) {
for (int i = swStart; i < swEnd - 1; i++) {
features.setCount("span_bigram:" + tokens.get(i).word() + "-" + tokens.get(i + 1).word(), 1.0);
}
}
if (usingFeature(types, checklist, "span_words_trigger")) {
for (int i = swStart; i < swEnd; i++) {
String trigger = tokens.get(i).get(TriggerAnnotation.class);
if (trigger != null && trigger.startsWith("B-"))
features.incrementCount("span_words_trigger=" + trigger.substring(2));
}
}
if (usingFeature(types, checklist, "arg2_number")) {
if (arg1.getType().equals("NUMBER")) {
try {
int value = Integer.parseInt(arg1.getValue());
if (2 <= value && value <= 100)
features.setCount("arg2_number", 1.0);
if (2 <= value && value <= 19)
features.setCount("arg2_number_2", 1.0);
if (20 <= value && value <= 59)
features.setCount("arg2_number_20", 1.0);
if (60 <= value && value <= 100)
features.setCount("arg2_number_60", 1.0);
if (value >= 100)
features.setCount("arg2_number_100", 1.0);
} catch (NumberFormatException e) {
}
}
}
if (usingFeature(types, checklist, "arg2_date")) {
if (arg1.getType().equals("DATE")) {
try {
int value = Integer.parseInt(arg1.getValue());
if (0 <= value && value <= 2010)
features.setCount("arg2_date", 1.0);
if (0 <= value && value <= 999)
features.setCount("arg2_date_0", 1.0);
if (1000 <= value && value <= 1599)
features.setCount("arg2_date_1000", 1.0);
if (1600 <= value && value <= 1799)
features.setCount("arg2_date_1600", 1.0);
if (1800 <= value && value <= 1899)
features.setCount("arg2_date_1800", 1.0);
if (1900 <= value && value <= 1999)
features.setCount("arg2_date_1900", 1.0);
if (value >= 2000)
features.setCount("arg2_date_2000", 1.0);
} catch (NumberFormatException e) {
}
}
}
if (usingFeature(types, checklist, "arg_gender")) {
boolean arg0Male = false, arg0Female = false;
boolean arg1Male = false, arg1Female = false;
System.out.println("Adding gender annotations!");
int index = arg0.getExtentTokenStart();
String gender = tokens.get(index).get(GenderAnnotation.class);
System.out.println(tokens.get(index).word() + " -- " + gender);
if (gender.equals("MALE"))
arg0Male = true;
else if (gender.equals("FEMALE"))
arg0Female = true;
index = arg1.getExtentTokenStart();
gender = tokens.get(index).get(GenderAnnotation.class);
if (gender.equals("MALE"))
arg1Male = true;
else if (gender.equals("FEMALE"))
arg1Female = true;
if (arg0Male)
features.setCount("arg1_male", 1.0);
if (arg0Female)
features.setCount("arg1_female", 1.0);
if (arg1Male)
features.setCount("arg2_male", 1.0);
if (arg1Female)
features.setCount("arg2_female", 1.0);
if ((arg0Male && arg1Male) || (arg0Female && arg1Female))
features.setCount("arg_same_gender", 1.0);
if ((arg0Male && arg1Female) || (arg0Female && arg1Male))
features.setCount("arg_different_gender", 1.0);
}
List<String> tempDepFeatures = new ArrayList<>(dependencyFeatures);
if (tempDepFeatures.removeAll(types) || types.contains("all")) {
// dependencyFeatures contains at least one of the features listed in types
addDependencyPathFeatures(features, rel, arg0, arg1, types, checklist, logger);
}
if (!checklist.isEmpty() && !checklist.contains("all"))
throw new AssertionError("RelationFeatureFactory: features not handled: " + checklist);
List<String> featureList = new ArrayList<>(features.keySet());
Collections.sort(featureList);
return true;
}
use of edu.stanford.nlp.stats.ClassicCounter in project CoreNLP by stanfordnlp.
the class BasicEntityExtractor method runTestSet.
// TODO not called any more, but possibly useful as a reference
/**
* This should be called after the classifier has been trained and
* parseAndTrain has been called to accumulate test set
*
* This will return precision,recall and F1 measure
*/
public void runTestSet(List<List<CoreLabel>> testSet) {
Counter<String> tp = new ClassicCounter<>();
Counter<String> fp = new ClassicCounter<>();
Counter<String> fn = new ClassicCounter<>();
Counter<String> actual = new ClassicCounter<>();
for (List<CoreLabel> labels : testSet) {
List<CoreLabel> unannotatedLabels = new ArrayList<>();
// create a new label without answer annotation
for (CoreLabel label : labels) {
CoreLabel newLabel = new CoreLabel();
newLabel.set(annotationForWord, label.get(annotationForWord));
newLabel.set(PartOfSpeechAnnotation.class, label.get(PartOfSpeechAnnotation.class));
unannotatedLabels.add(newLabel);
}
List<CoreLabel> annotatedLabels = this.classifier.classify(unannotatedLabels);
int ind = 0;
for (CoreLabel expectedLabel : labels) {
CoreLabel annotatedLabel = annotatedLabels.get(ind);
String answer = annotatedLabel.get(AnswerAnnotation.class);
String expectedAnswer = expectedLabel.get(AnswerAnnotation.class);
actual.incrementCount(expectedAnswer);
// match only non background symbols
if (!SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL.equals(expectedAnswer) && expectedAnswer.equals(answer)) {
// true positives
tp.incrementCount(answer);
System.out.println("True Positive:" + annotatedLabel);
} else if (!SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL.equals(answer)) {
// false positives
fp.incrementCount(answer);
System.out.println("False Positive:" + annotatedLabel);
} else if (!SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL.equals(expectedAnswer)) {
// false negatives
fn.incrementCount(expectedAnswer);
System.out.println("False Negative:" + expectedLabel);
}
// else true negatives
ind++;
}
}
actual.remove(SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
}
use of edu.stanford.nlp.stats.ClassicCounter in project CoreNLP by stanfordnlp.
the class AbstractSequenceClassifier method classifyAndWriteAnswers.
/**
*
* @param documents
* @param printWriter
* @param readerWriter
* @param outputScores Whether to calculate and output the performance scores (P/R/F1) of the classifier
* @return A Triple of overall P/R/F1, if outputScores is true, else {@code null}. The scores are done
* on a 0-100 scale like percentages.
* @throws IOException
*/
public Triple<Double, Double, Double> classifyAndWriteAnswers(Collection<List<IN>> documents, PrintWriter printWriter, DocumentReaderAndWriter<IN> readerWriter, boolean outputScores) throws IOException {
if (flags.exportFeatures != null) {
dumpFeatures(documents);
}
Timing timer = new Timing();
Counter<String> entityTP = new ClassicCounter<>();
Counter<String> entityFP = new ClassicCounter<>();
Counter<String> entityFN = new ClassicCounter<>();
boolean resultsCounted = outputScores;
int numWords = 0;
int numDocs = 0;
final AtomicInteger threadCompletionCounter = new AtomicInteger(0);
ThreadsafeProcessor<List<IN>, List<IN>> threadProcessor = new ThreadsafeProcessor<List<IN>, List<IN>>() {
@Override
public List<IN> process(List<IN> doc) {
doc = classify(doc);
int completedNo = threadCompletionCounter.incrementAndGet();
if (flags.verboseMode)
log.info(completedNo + " examples completed");
return doc;
}
@Override
public ThreadsafeProcessor<List<IN>, List<IN>> newInstance() {
return this;
}
};
MulticoreWrapper<List<IN>, List<IN>> wrapper = null;
if (flags.multiThreadClassifier != 0) {
wrapper = new MulticoreWrapper<>(flags.multiThreadClassifier, threadProcessor);
}
for (List<IN> doc : documents) {
numWords += doc.size();
numDocs++;
if (wrapper != null) {
wrapper.put(doc);
while (wrapper.peek()) {
List<IN> results = wrapper.poll();
writeAnswers(results, printWriter, readerWriter);
resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN);
}
} else {
List<IN> results = threadProcessor.process(doc);
writeAnswers(results, printWriter, readerWriter);
resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN);
}
}
if (wrapper != null) {
wrapper.join();
while (wrapper.peek()) {
List<IN> results = wrapper.poll();
writeAnswers(results, printWriter, readerWriter);
resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN);
}
}
long millis = timer.stop();
double wordspersec = numWords / (((double) millis) / 1000);
// easier way!
NumberFormat nf = new DecimalFormat("0.00");
log.info(StringUtils.getShortClassName(this) + " tagged " + numWords + " words in " + numDocs + " documents at " + nf.format(wordspersec) + " words per second.");
if (outputScores) {
return printResults(entityTP, entityFP, entityFN);
} else {
return null;
}
}
use of edu.stanford.nlp.stats.ClassicCounter in project CoreNLP by stanfordnlp.
the class QuasiDeterminizer method processGraph.
public TransducerGraph processGraph(TransducerGraph graph) {
// compute lambda function
// not destructive
ClassicCounter lambda = computeLambda(graph);
// do the pushing
// creates a new one
TransducerGraph result = pushLambdas(graph, lambda);
return result;
}
Aggregations