Search in sources :

Example 56 with ClassicCounter

use of edu.stanford.nlp.stats.ClassicCounter in project CoreNLP by stanfordnlp.

the class Dictionaries method loadSignatures.

private static void loadSignatures(String file, Map<String, Counter<String>> sigs) {
    BufferedReader reader = null;
    try {
        reader = IOUtils.readerFromString(file);
        while (reader.ready()) {
            String[] split = reader.readLine().split("\t");
            Counter<String> cntr = new ClassicCounter<>();
            sigs.put(split[0], cntr);
            for (int i = 1; i < split.length; i = i + 2) {
                cntr.setCount(split[i], Double.parseDouble(split[i + 1]));
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeIgnoringExceptions(reader);
    }
}
Also used : BufferedReader(java.io.BufferedReader) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) IOException(java.io.IOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 57 with ClassicCounter

use of edu.stanford.nlp.stats.ClassicCounter in project CoreNLP by stanfordnlp.

the class BasicRelationFeatureFactory method addFeatures.

/**
   * Creates all features for the datum corresponding to this relation mention
   * Note: this assumes binary relations where both arguments are EntityMention
   * @param features Stores all features
   * @param rel The mention
   * @param types Comma separated list of feature classes to use
   */
public boolean addFeatures(Counter<String> features, RelationMention rel, List<String> types, Logger logger) {
    // sanity checks: must have two arguments, and each must be an entity mention
    if (rel.getArgs().size() != 2)
        return false;
    if (!(rel.getArg(0) instanceof EntityMention))
        return false;
    if (!(rel.getArg(1) instanceof EntityMention))
        return false;
    EntityMention arg0 = (EntityMention) rel.getArg(0);
    EntityMention arg1 = (EntityMention) rel.getArg(1);
    Tree tree = rel.getSentence().get(TreeAnnotation.class);
    if (tree == null) {
        throw new RuntimeException("ERROR: Relation extraction requires full syntactic analysis!");
    }
    List<Tree> leaves = tree.getLeaves();
    List<CoreLabel> tokens = rel.getSentence().get(TokensAnnotation.class);
    // this assumes that both args are in the same sentence as the relation object
    // let's check for this to be safe
    CoreMap relSentence = rel.getSentence();
    CoreMap arg0Sentence = arg0.getSentence();
    CoreMap arg1Sentence = arg1.getSentence();
    if (arg0Sentence != relSentence) {
        log.info("WARNING: Found relation with arg0 in a different sentence: " + rel);
        log.info("Relation sentence: " + relSentence.get(TextAnnotation.class));
        log.info("Arg0 sentence: " + arg0Sentence.get(TextAnnotation.class));
        return false;
    }
    if (arg1Sentence != relSentence) {
        log.info("WARNING: Found relation with arg1 in a different sentence: " + rel);
        log.info("Relation sentence: " + relSentence.get(TextAnnotation.class));
        log.info("Arg1 sentence: " + arg1Sentence.get(TextAnnotation.class));
        return false;
    }
    // Checklist keeps track of which features have been handled by an if clause
    // Should be empty after all the clauses have been gone through.
    List<String> checklist = new ArrayList<>(types);
    // arg_subtype: similar, for entity subtypes
    if (usingFeature(types, checklist, "arg_type")) {
        features.setCount("arg1type=" + arg0.getType() + "_and_arg2type=" + arg1.getType(), 1.0);
    }
    if (usingFeature(types, checklist, "arg_subtype")) {
        features.setCount("arg1subtype=" + arg0.getSubType() + "_and_arg2subtype=" + arg1.getSubType(), 1.0);
    }
    // arg_order: which arg comes first in the sentence
    if (usingFeature(types, checklist, "arg_order")) {
        if (arg0.getSyntacticHeadTokenPosition() < arg1.getSyntacticHeadTokenPosition())
            features.setCount("arg1BeforeArg2", 1.0);
    }
    // same_head: whether the two args share the same syntactic head token
    if (usingFeature(types, checklist, "same_head")) {
        if (arg0.getSyntacticHeadTokenPosition() == arg1.getSyntacticHeadTokenPosition())
            features.setCount("arguments_have_same_head", 1.0);
    }
    // e.g., NNP -> PP -> NN <- NNP
    if (usingFeature(types, checklist, "full_tree_path")) {
        //log.info("SENTENCE: " + sentToString(arg0.getSentence()));
        if (arg0.getSyntacticHeadTokenPosition() < leaves.size() && arg1.getSyntacticHeadTokenPosition() < leaves.size()) {
            Tree arg0preterm = leaves.get(arg0.getSyntacticHeadTokenPosition()).parent(tree);
            Tree arg1preterm = leaves.get(arg1.getSyntacticHeadTokenPosition()).parent(tree);
            Tree join = tree.joinNode(arg0preterm, arg1preterm);
            StringBuilder pathStringBuilder = new StringBuilder();
            List<Tree> pathUp = join.dominationPath(arg0preterm);
            Collections.reverse(pathUp);
            for (Tree node : pathUp) {
                if (node != join) {
                    pathStringBuilder.append(node.label().value() + " <- ");
                }
            }
            for (Tree node : join.dominationPath(arg1preterm)) {
                pathStringBuilder.append(((node == join) ? "" : " -> ") + node.label().value());
            }
            String pathString = pathStringBuilder.toString();
            if (logger != null && !rel.getType().equals(RelationMention.UNRELATED))
                logger.info("full_tree_path: " + pathString);
            features.setCount("treepath:" + pathString, 1.0);
        } else {
            log.info("WARNING: found weird argument offsets. Most likely because arguments appear in different sentences than the relation:");
            log.info("ARG0: " + arg0);
            log.info("ARG0 HEAD: " + arg0.getSyntacticHeadTokenPosition());
            log.info("ARG0 SENTENCE: " + sentToString(arg0.getSentence()));
            log.info("ARG1: " + arg1);
            log.info("ARG1 HEAD: " + arg1.getSyntacticHeadTokenPosition());
            log.info("ARG1 SENTENCE: " + sentToString(arg1.getSentence()));
            log.info("RELATION TREE: " + tree);
        }
    }
    int pathLength = tree.pathNodeToNode(tree.getLeaves().get(arg0.getSyntacticHeadTokenPosition()), tree.getLeaves().get(arg1.getSyntacticHeadTokenPosition())).size();
    // path_length: Length of the path in the phrase structure parse tree, integer-valued feature
    if (usingFeature(types, checklist, "path_length")) {
        features.setCount("path_length", pathLength);
    }
    // path_length_binary: Length of the path in the phrase structure parse tree, binary features
    if (usingFeature(types, checklist, "path_length_binary")) {
        features.setCount("path_length_" + pathLength, 1.0);
    }
    /* entity_order
           * This tells you for each of the two args
           * whether there are other entities before or after that arg.
           * In particular, it can tell whether an arg is the first entity of its type in the sentence
           * (which can be useful for example for telling the gameWinner and gameLoser in NFL).
           * TODO: restrict this feature so that it only looks for
           * entities of the same type?
           * */
    if (usingFeature(types, checklist, "entity_order")) {
        for (int i = 0; i < rel.getArgs().size(); i++) {
            // We already checked the class of the args at the beginning of the method
            EntityMention arg = (EntityMention) rel.getArgs().get(i);
            if (rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class) != null) {
                // may be null due to annotation error
                for (EntityMention otherArg : rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class)) {
                    String feature;
                    if (otherArg.getSyntacticHeadTokenPosition() > arg.getSyntacticHeadTokenPosition()) {
                        feature = "arg" + i + "_before_" + otherArg.getType();
                        features.setCount(feature, 1.0);
                    }
                    if (otherArg.getSyntacticHeadTokenPosition() < arg.getSyntacticHeadTokenPosition()) {
                        feature = "arg" + i + "_after_" + otherArg.getType();
                        features.setCount(feature, 1.0);
                    }
                }
            }
        }
    }
    // surface_distance: Number of tokens in the sentence between the two words, integer-valued feature
    int surfaceDistance = Math.abs(arg0.getSyntacticHeadTokenPosition() - arg1.getSyntacticHeadTokenPosition());
    if (usingFeature(types, checklist, "surface_distance")) {
        features.setCount("surface_distance", surfaceDistance);
    }
    // surface_distance_binary: Number of tokens in the sentence between the two words, binary features
    if (usingFeature(types, checklist, "surface_distance_binary")) {
        features.setCount("surface_distance_" + surfaceDistance, 1.0);
    }
    // surface_distance_bins: number of tokens between the two args, binned to several intervals
    if (usingFeature(types, checklist, "surface_distance_bins")) {
        if (surfaceDistance < 4) {
            features.setCount("surface_distance_bin" + surfaceDistance, 1.0);
        } else if (surfaceDistance < 6) {
            features.setCount("surface_distance_bin_lt6", 1.0);
        } else if (surfaceDistance < 10) {
            features.setCount("surface_distance_bin_lt10", 1.0);
        } else {
            features.setCount("surface_distance_bin_ge10", 1.0);
        }
    }
    // separate_surface_windows: windows of 1,2,3 tokens before and after args, for each arg separately
    // Separate features are generated for windows to the left and to the right of the args.
    // Features are concatenations of words in the window (or NULL for sentence boundary).
    //
    // conjunction_surface_windows: concatenation of the windows of the two args
    //
    // separate_surface_windows_POS: windows of POS tags of size 1,2,3 for each arg
    //
    // conjunction_surface_windows_POS: concatenation of windows of the args
    List<EntityMention> args = new ArrayList<>();
    args.add(arg0);
    args.add(arg1);
    for (int windowSize = 1; windowSize <= 3; windowSize++) {
        String[] leftWindow, rightWindow, leftWindowPOS, rightWindowPOS;
        leftWindow = new String[2];
        rightWindow = new String[2];
        leftWindowPOS = new String[2];
        rightWindowPOS = new String[2];
        for (int argn = 0; argn <= 1; argn++) {
            int ind = args.get(argn).getSyntacticHeadTokenPosition();
            for (int winnum = 1; winnum <= windowSize; winnum++) {
                int windex = ind - winnum;
                if (windex > 0) {
                    leftWindow[argn] = leaves.get(windex).label().value() + "_" + leftWindow[argn];
                    leftWindowPOS[argn] = leaves.get(windex).parent(tree).label().value() + "_" + leftWindowPOS[argn];
                } else {
                    leftWindow[argn] = "NULL_" + leftWindow[argn];
                    leftWindowPOS[argn] = "NULL_" + leftWindowPOS[argn];
                }
                windex = ind + winnum;
                if (windex < leaves.size()) {
                    rightWindow[argn] = rightWindow[argn] + "_" + leaves.get(windex).label().value();
                    rightWindowPOS[argn] = rightWindowPOS[argn] + "_" + leaves.get(windex).parent(tree).label().value();
                } else {
                    rightWindow[argn] = rightWindow[argn] + "_NULL";
                    rightWindowPOS[argn] = rightWindowPOS[argn] + "_NULL";
                }
            }
            if (usingFeature(types, checklist, "separate_surface_windows")) {
                features.setCount("left_window_" + windowSize + "_arg_" + argn + ": " + leftWindow[argn], 1.0);
                features.setCount("left_window_" + windowSize + "_POS_arg_" + argn + ": " + leftWindowPOS[argn], 1.0);
            }
            if (usingFeature(types, checklist, "separate_surface_windows_POS")) {
                features.setCount("right_window_" + windowSize + "_arg_" + argn + ": " + rightWindow[argn], 1.0);
                features.setCount("right_window_" + windowSize + "_POS_arg_" + argn + ": " + rightWindowPOS[argn], 1.0);
            }
        }
        if (usingFeature(types, checklist, "conjunction_surface_windows")) {
            features.setCount("left_windows_" + windowSize + ": " + leftWindow[0] + "__" + leftWindow[1], 1.0);
            features.setCount("right_windows_" + windowSize + ": " + rightWindow[0] + "__" + rightWindow[1], 1.0);
        }
        if (usingFeature(types, checklist, "conjunction_surface_windows_POS")) {
            features.setCount("left_windows_" + windowSize + "_POS: " + leftWindowPOS[0] + "__" + leftWindowPOS[1], 1.0);
            features.setCount("right_windows_" + windowSize + "_POS: " + rightWindowPOS[0] + "__" + rightWindowPOS[1], 1.0);
        }
    }
    // arg_words:  The actual arg tokens as separate features, and concatenated
    String word0 = leaves.get(arg0.getSyntacticHeadTokenPosition()).label().value();
    String word1 = leaves.get(arg1.getSyntacticHeadTokenPosition()).label().value();
    if (usingFeature(types, checklist, "arg_words")) {
        if (doNotLexicalizeFirstArg == false)
            features.setCount("word_arg0: " + word0, 1.0);
        features.setCount("word_arg1: " + word1, 1.0);
        if (doNotLexicalizeFirstArg == false)
            features.setCount("words: " + word0 + "__" + word1, 1.0);
    }
    // arg_POS:  POS tags of the args, as separate features and concatenated
    String pos0 = leaves.get(arg0.getSyntacticHeadTokenPosition()).parent(tree).label().value();
    String pos1 = leaves.get(arg1.getSyntacticHeadTokenPosition()).parent(tree).label().value();
    if (usingFeature(types, checklist, "arg_POS")) {
        features.setCount("POS_arg0: " + pos0, 1.0);
        features.setCount("POS_arg1: " + pos1, 1.0);
        features.setCount("POSs: " + pos0 + "__" + pos1, 1.0);
    }
    // adjacent_words: words immediately to the left and right of the args
    if (usingFeature(types, checklist, "adjacent_words")) {
        for (int i = 0; i < rel.getArgs().size(); i++) {
            Span s = ((EntityMention) rel.getArg(i)).getHead();
            if (s.start() > 0) {
                String v = tokens.get(s.start() - 1).word();
                features.setCount("leftarg" + i + "-" + v, 1.0);
            }
            if (s.end() < tokens.size()) {
                String v = tokens.get(s.end()).word();
                features.setCount("rightarg" + i + "-" + v, 1.0);
            }
        }
    }
    // e.g. "entity_between_args: Loc" means there is at least one entity of type Loc between the two args
    if (usingFeature(types, checklist, "entities_between_args")) {
        CoreMap sent = rel.getSentence();
        if (sent == null)
            throw new RuntimeException("NULL sentence for relation " + rel);
        List<EntityMention> relArgs = sent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
        if (relArgs != null) {
            // may be null due to annotation errors!
            for (EntityMention arg : relArgs) {
                if ((arg.getSyntacticHeadTokenPosition() > arg0.getSyntacticHeadTokenPosition() && arg.getSyntacticHeadTokenPosition() < arg1.getSyntacticHeadTokenPosition()) || (arg.getSyntacticHeadTokenPosition() > arg1.getSyntacticHeadTokenPosition() && arg.getSyntacticHeadTokenPosition() < arg0.getSyntacticHeadTokenPosition())) {
                    features.setCount("entity_between_args: " + arg.getType(), 1.0);
                }
            }
        }
    }
    // entity_counts: For each type, the total number of entities of that type in the sentence (integer-valued feature)
    // entity_counts_binary: Counts of entity types as binary features.
    Counter<String> typeCounts = new ClassicCounter<>();
    if (rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class) != null) {
        // may be null due to annotation errors!
        for (EntityMention arg : rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class)) typeCounts.incrementCount(arg.getType());
        for (String type : typeCounts.keySet()) {
            if (usingFeature(types, checklist, "entity_counts"))
                features.setCount("entity_counts_" + type, typeCounts.getCount(type));
            if (usingFeature(types, checklist, "entity_counts_binary"))
                features.setCount("entity_counts_" + type + ": " + typeCounts.getCount(type), 1.0);
        }
    }
    // surface_path: concatenation of tokens between the two args
    // surface_path_POS: concatenation of POS tags between the args
    // surface_path_selective: concatenation of tokens between the args which are nouns or verbs
    StringBuilder sb = new StringBuilder();
    StringBuilder sbPOS = new StringBuilder();
    StringBuilder sbSelective = new StringBuilder();
    for (int i = Math.min(arg0.getSyntacticHeadTokenPosition(), arg1.getSyntacticHeadTokenPosition()) + 1; i < Math.max(arg0.getSyntacticHeadTokenPosition(), arg1.getSyntacticHeadTokenPosition()); i++) {
        String word = leaves.get(i).label().value();
        sb.append(word + "_");
        String pos = leaves.get(i).parent(tree).label().value();
        sbPOS.append(pos + "_");
        if (pos.equals("NN") || pos.equals("NNS") || pos.equals("NNP") || pos.equals("NNPS") || pos.equals("VB") || pos.equals("VBN") || pos.equals("VBD") || pos.equals("VBG") || pos.equals("VBP") || pos.equals("VBZ")) {
            sbSelective.append(word + "_");
        }
    }
    if (usingFeature(types, checklist, "surface_path")) {
        features.setCount("surface_path: " + sb, 1.0);
    }
    if (usingFeature(types, checklist, "surface_path_POS")) {
        features.setCount("surface_path_POS: " + sbPOS, 1.0);
    }
    if (usingFeature(types, checklist, "surface_path_selective")) {
        features.setCount("surface_path_selective: " + sbSelective, 1.0);
    }
    // must be initialized below
    int swStart, swEnd;
    if (arg0.getSyntacticHeadTokenPosition() < arg1.getSyntacticHeadTokenPosition()) {
        swStart = arg0.getExtentTokenEnd();
        swEnd = arg1.getExtentTokenStart();
    } else {
        swStart = arg1.getExtentTokenEnd();
        swEnd = arg0.getExtentTokenStart();
    }
    // span_words_unigrams: words that appear in between the two arguments
    if (usingFeature(types, checklist, "span_words_unigrams")) {
        for (int i = swStart; i < swEnd; i++) {
            features.setCount("span_word:" + tokens.get(i).word(), 1.0);
        }
    }
    // span_words_bigrams: bigrams of words that appear in between the two arguments
    if (usingFeature(types, checklist, "span_words_bigrams")) {
        for (int i = swStart; i < swEnd - 1; i++) {
            features.setCount("span_bigram:" + tokens.get(i).word() + "-" + tokens.get(i + 1).word(), 1.0);
        }
    }
    if (usingFeature(types, checklist, "span_words_trigger")) {
        for (int i = swStart; i < swEnd; i++) {
            String trigger = tokens.get(i).get(TriggerAnnotation.class);
            if (trigger != null && trigger.startsWith("B-"))
                features.incrementCount("span_words_trigger=" + trigger.substring(2));
        }
    }
    if (usingFeature(types, checklist, "arg2_number")) {
        if (arg1.getType().equals("NUMBER")) {
            try {
                int value = Integer.parseInt(arg1.getValue());
                if (2 <= value && value <= 100)
                    features.setCount("arg2_number", 1.0);
                if (2 <= value && value <= 19)
                    features.setCount("arg2_number_2", 1.0);
                if (20 <= value && value <= 59)
                    features.setCount("arg2_number_20", 1.0);
                if (60 <= value && value <= 100)
                    features.setCount("arg2_number_60", 1.0);
                if (value >= 100)
                    features.setCount("arg2_number_100", 1.0);
            } catch (NumberFormatException e) {
            }
        }
    }
    if (usingFeature(types, checklist, "arg2_date")) {
        if (arg1.getType().equals("DATE")) {
            try {
                int value = Integer.parseInt(arg1.getValue());
                if (0 <= value && value <= 2010)
                    features.setCount("arg2_date", 1.0);
                if (0 <= value && value <= 999)
                    features.setCount("arg2_date_0", 1.0);
                if (1000 <= value && value <= 1599)
                    features.setCount("arg2_date_1000", 1.0);
                if (1600 <= value && value <= 1799)
                    features.setCount("arg2_date_1600", 1.0);
                if (1800 <= value && value <= 1899)
                    features.setCount("arg2_date_1800", 1.0);
                if (1900 <= value && value <= 1999)
                    features.setCount("arg2_date_1900", 1.0);
                if (value >= 2000)
                    features.setCount("arg2_date_2000", 1.0);
            } catch (NumberFormatException e) {
            }
        }
    }
    if (usingFeature(types, checklist, "arg_gender")) {
        boolean arg0Male = false, arg0Female = false;
        boolean arg1Male = false, arg1Female = false;
        System.out.println("Adding gender annotations!");
        int index = arg0.getExtentTokenStart();
        String gender = tokens.get(index).get(GenderAnnotation.class);
        System.out.println(tokens.get(index).word() + " -- " + gender);
        if (gender.equals("MALE"))
            arg0Male = true;
        else if (gender.equals("FEMALE"))
            arg0Female = true;
        index = arg1.getExtentTokenStart();
        gender = tokens.get(index).get(GenderAnnotation.class);
        if (gender.equals("MALE"))
            arg1Male = true;
        else if (gender.equals("FEMALE"))
            arg1Female = true;
        if (arg0Male)
            features.setCount("arg1_male", 1.0);
        if (arg0Female)
            features.setCount("arg1_female", 1.0);
        if (arg1Male)
            features.setCount("arg2_male", 1.0);
        if (arg1Female)
            features.setCount("arg2_female", 1.0);
        if ((arg0Male && arg1Male) || (arg0Female && arg1Female))
            features.setCount("arg_same_gender", 1.0);
        if ((arg0Male && arg1Female) || (arg0Female && arg1Male))
            features.setCount("arg_different_gender", 1.0);
    }
    List<String> tempDepFeatures = new ArrayList<>(dependencyFeatures);
    if (tempDepFeatures.removeAll(types) || types.contains("all")) {
        // dependencyFeatures contains at least one of the features listed in types
        addDependencyPathFeatures(features, rel, arg0, arg1, types, checklist, logger);
    }
    if (!checklist.isEmpty() && !checklist.contains("all"))
        throw new AssertionError("RelationFeatureFactory: features not handled: " + checklist);
    List<String> featureList = new ArrayList<>(features.keySet());
    Collections.sort(featureList);
    return true;
}
Also used : GenderAnnotation(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations.GenderAnnotation) ArrayList(java.util.ArrayList) CoreLabel(edu.stanford.nlp.ling.CoreLabel) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) Tree(edu.stanford.nlp.trees.Tree) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 58 with ClassicCounter

use of edu.stanford.nlp.stats.ClassicCounter in project CoreNLP by stanfordnlp.

the class BasicEntityExtractor method runTestSet.

// TODO not called any more, but possibly useful as a reference
/**
   * This should be called after the classifier has been trained and
   * parseAndTrain has been called to accumulate test set
   *
   * This will return precision,recall and F1 measure
   */
public void runTestSet(List<List<CoreLabel>> testSet) {
    Counter<String> tp = new ClassicCounter<>();
    Counter<String> fp = new ClassicCounter<>();
    Counter<String> fn = new ClassicCounter<>();
    Counter<String> actual = new ClassicCounter<>();
    for (List<CoreLabel> labels : testSet) {
        List<CoreLabel> unannotatedLabels = new ArrayList<>();
        // create a new label without answer annotation
        for (CoreLabel label : labels) {
            CoreLabel newLabel = new CoreLabel();
            newLabel.set(annotationForWord, label.get(annotationForWord));
            newLabel.set(PartOfSpeechAnnotation.class, label.get(PartOfSpeechAnnotation.class));
            unannotatedLabels.add(newLabel);
        }
        List<CoreLabel> annotatedLabels = this.classifier.classify(unannotatedLabels);
        int ind = 0;
        for (CoreLabel expectedLabel : labels) {
            CoreLabel annotatedLabel = annotatedLabels.get(ind);
            String answer = annotatedLabel.get(AnswerAnnotation.class);
            String expectedAnswer = expectedLabel.get(AnswerAnnotation.class);
            actual.incrementCount(expectedAnswer);
            // match only non background symbols
            if (!SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL.equals(expectedAnswer) && expectedAnswer.equals(answer)) {
                // true positives
                tp.incrementCount(answer);
                System.out.println("True Positive:" + annotatedLabel);
            } else if (!SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL.equals(answer)) {
                // false positives
                fp.incrementCount(answer);
                System.out.println("False Positive:" + annotatedLabel);
            } else if (!SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL.equals(expectedAnswer)) {
                // false negatives
                fn.incrementCount(expectedAnswer);
                System.out.println("False Negative:" + expectedLabel);
            }
            // else true negatives
            ind++;
        }
    }
    actual.remove(SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) ArrayList(java.util.ArrayList) PartOfSpeechAnnotation(edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation)

Example 59 with ClassicCounter

use of edu.stanford.nlp.stats.ClassicCounter in project CoreNLP by stanfordnlp.

the class AbstractSequenceClassifier method classifyAndWriteAnswers.

/**
   *
   * @param documents
   * @param printWriter
   * @param readerWriter
   * @param outputScores Whether to calculate and output the performance scores (P/R/F1) of the classifier
   * @return A Triple of overall P/R/F1, if outputScores is true, else {@code null}. The scores are done
   *         on a 0-100 scale like percentages.
   * @throws IOException
   */
public Triple<Double, Double, Double> classifyAndWriteAnswers(Collection<List<IN>> documents, PrintWriter printWriter, DocumentReaderAndWriter<IN> readerWriter, boolean outputScores) throws IOException {
    if (flags.exportFeatures != null) {
        dumpFeatures(documents);
    }
    Timing timer = new Timing();
    Counter<String> entityTP = new ClassicCounter<>();
    Counter<String> entityFP = new ClassicCounter<>();
    Counter<String> entityFN = new ClassicCounter<>();
    boolean resultsCounted = outputScores;
    int numWords = 0;
    int numDocs = 0;
    final AtomicInteger threadCompletionCounter = new AtomicInteger(0);
    ThreadsafeProcessor<List<IN>, List<IN>> threadProcessor = new ThreadsafeProcessor<List<IN>, List<IN>>() {

        @Override
        public List<IN> process(List<IN> doc) {
            doc = classify(doc);
            int completedNo = threadCompletionCounter.incrementAndGet();
            if (flags.verboseMode)
                log.info(completedNo + " examples completed");
            return doc;
        }

        @Override
        public ThreadsafeProcessor<List<IN>, List<IN>> newInstance() {
            return this;
        }
    };
    MulticoreWrapper<List<IN>, List<IN>> wrapper = null;
    if (flags.multiThreadClassifier != 0) {
        wrapper = new MulticoreWrapper<>(flags.multiThreadClassifier, threadProcessor);
    }
    for (List<IN> doc : documents) {
        numWords += doc.size();
        numDocs++;
        if (wrapper != null) {
            wrapper.put(doc);
            while (wrapper.peek()) {
                List<IN> results = wrapper.poll();
                writeAnswers(results, printWriter, readerWriter);
                resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN);
            }
        } else {
            List<IN> results = threadProcessor.process(doc);
            writeAnswers(results, printWriter, readerWriter);
            resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN);
        }
    }
    if (wrapper != null) {
        wrapper.join();
        while (wrapper.peek()) {
            List<IN> results = wrapper.poll();
            writeAnswers(results, printWriter, readerWriter);
            resultsCounted = resultsCounted && countResults(results, entityTP, entityFP, entityFN);
        }
    }
    long millis = timer.stop();
    double wordspersec = numWords / (((double) millis) / 1000);
    // easier way!
    NumberFormat nf = new DecimalFormat("0.00");
    log.info(StringUtils.getShortClassName(this) + " tagged " + numWords + " words in " + numDocs + " documents at " + nf.format(wordspersec) + " words per second.");
    if (outputScores) {
        return printResults(entityTP, entityFP, entityFN);
    } else {
        return null;
    }
}
Also used : DecimalFormat(java.text.DecimalFormat) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) NumberFormat(java.text.NumberFormat)

Example 60 with ClassicCounter

use of edu.stanford.nlp.stats.ClassicCounter in project CoreNLP by stanfordnlp.

the class QuasiDeterminizer method processGraph.

public TransducerGraph processGraph(TransducerGraph graph) {
    // compute lambda function
    // not destructive
    ClassicCounter lambda = computeLambda(graph);
    // do the pushing
    // creates a new one
    TransducerGraph result = pushLambdas(graph, lambda);
    return result;
}
Also used : ClassicCounter(edu.stanford.nlp.stats.ClassicCounter)

Aggregations

ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)69 CoreLabel (edu.stanford.nlp.ling.CoreLabel)27 ArrayList (java.util.ArrayList)21 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)18 Tree (edu.stanford.nlp.trees.Tree)13 Pair (edu.stanford.nlp.util.Pair)11 Counter (edu.stanford.nlp.stats.Counter)10 List (java.util.List)10 Mention (edu.stanford.nlp.coref.data.Mention)8 Language (edu.stanford.nlp.international.Language)7 RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)7 CoreMap (edu.stanford.nlp.util.CoreMap)7 IOUtils (edu.stanford.nlp.io.IOUtils)6 Label (edu.stanford.nlp.ling.Label)6 TreebankLangParserParams (edu.stanford.nlp.parser.lexparser.TreebankLangParserParams)6 PrintWriter (java.io.PrintWriter)6 java.util (java.util)6 HashSet (java.util.HashSet)6 RVFDatum (edu.stanford.nlp.ling.RVFDatum)5 DiskTreebank (edu.stanford.nlp.trees.DiskTreebank)5