Search in sources :

Example 6 with Triple

use of edu.stanford.nlp.util.Triple in project CoreNLP by stanfordnlp.

the class NERDemo method main.

public static void main(String[] args) throws Exception {
    String serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz";
    if (args.length > 0) {
        serializedClassifier = args[0];
    }
    AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier.getClassifier(serializedClassifier);
    if (args.length > 1) {
        /* For the file, it shows (1) how to run NER on a String, (2) how
         to get the entities in the String with character offsets, and
         (3) how to run NER on a whole file (without loading it into a String).
      */
        String fileContents = IOUtils.slurpFile(args[1]);
        List<List<CoreLabel>> out = classifier.classify(fileContents);
        for (List<CoreLabel> sentence : out) {
            for (CoreLabel word : sentence) {
                System.out.print(word.word() + '/' + word.get(CoreAnnotations.AnswerAnnotation.class) + ' ');
            }
            System.out.println();
        }
        System.out.println("---");
        out = classifier.classifyFile(args[1]);
        for (List<CoreLabel> sentence : out) {
            for (CoreLabel word : sentence) {
                System.out.print(word.word() + '/' + word.get(CoreAnnotations.AnswerAnnotation.class) + ' ');
            }
            System.out.println();
        }
        System.out.println("---");
        List<Triple<String, Integer, Integer>> list = classifier.classifyToCharacterOffsets(fileContents);
        for (Triple<String, Integer, Integer> item : list) {
            System.out.println(item.first() + ": " + fileContents.substring(item.second(), item.third()));
        }
        System.out.println("---");
        System.out.println("Ten best entity labelings");
        DocumentReaderAndWriter<CoreLabel> readerAndWriter = classifier.makePlainTextReaderAndWriter();
        classifier.classifyAndWriteAnswersKBest(args[1], 10, readerAndWriter);
        System.out.println("---");
        System.out.println("Per-token marginalized probabilities");
        classifier.printProbs(args[1], readerAndWriter);
    // -- This code prints out the first order (token pair) clique probabilities.
    // -- But that output is a bit overwhelming, so we leave it commented out by default.
    // System.out.println("---");
    // System.out.println("First Order Clique Probabilities");
    // ((CRFClassifier) classifier).printFirstOrderProbs(args[1], readerAndWriter);
    } else {
        /* For the hard-coded String, it shows how to run it on a single
         sentence, and how to do this and produce several formats, including
         slash tags and an inline XML output format. It also shows the full
         contents of the {@code CoreLabel}s that are constructed by the
         classifier. And it shows getting out the probabilities of different
         assignments and an n-best list of classifications with probabilities.
      */
        String[] example = { "Good afternoon Rajat Raina, how are you today?", "I go to school at Stanford University, which is located in California." };
        for (String str : example) {
            System.out.println(classifier.classifyToString(str));
        }
        System.out.println("---");
        for (String str : example) {
            // This one puts in spaces and newlines between tokens, so just print not println.
            System.out.print(classifier.classifyToString(str, "slashTags", false));
        }
        System.out.println("---");
        for (String str : example) {
            // This one is best for dealing with the output as a TSV (tab-separated column) file.
            // The first column gives entities, the second their classes, and the third the remaining text in a document
            System.out.print(classifier.classifyToString(str, "tabbedEntities", false));
        }
        System.out.println("---");
        for (String str : example) {
            System.out.println(classifier.classifyWithInlineXML(str));
        }
        System.out.println("---");
        for (String str : example) {
            System.out.println(classifier.classifyToString(str, "xml", true));
        }
        System.out.println("---");
        for (String str : example) {
            System.out.print(classifier.classifyToString(str, "tsv", false));
        }
        System.out.println("---");
        // This gets out entities with character offsets
        int j = 0;
        for (String str : example) {
            j++;
            List<Triple<String, Integer, Integer>> triples = classifier.classifyToCharacterOffsets(str);
            for (Triple<String, Integer, Integer> trip : triples) {
                System.out.printf("%s over character offsets [%d, %d) in sentence %d.%n", trip.first(), trip.second(), trip.third, j);
            }
        }
        System.out.println("---");
        // This prints out all the details of what is stored for each token
        int i = 0;
        for (String str : example) {
            for (List<CoreLabel> lcl : classifier.classify(str)) {
                for (CoreLabel cl : lcl) {
                    System.out.print(i++ + ": ");
                    System.out.println(cl.toShorterString());
                }
            }
        }
        System.out.println("---");
    }
}
Also used : Triple(edu.stanford.nlp.util.Triple) CoreLabel(edu.stanford.nlp.ling.CoreLabel) List(java.util.List)

Example 7 with Triple

use of edu.stanford.nlp.util.Triple in project CoreNLP by stanfordnlp.

the class ChineseSimWordAvgDepGrammar method probTBwithSimWords.

/*
  ** An alternative kind of smoothing.
  ** The first one is "probSimilarWordAvg" implemented by Galen
  ** This one is trying to modify "probTB" in MLEDependencyGrammar using the simWords list we have
  ** -pichuan
  */
private double probTBwithSimWords(IntDependency dependency) {
    boolean leftHeaded = dependency.leftHeaded && directional;
    IntTaggedWord unknownHead = new IntTaggedWord(-1, dependency.head.tag);
    IntTaggedWord unknownArg = new IntTaggedWord(-1, dependency.arg.tag);
    if (verbose) {
        System.out.println("Generating " + dependency);
    }
    short distance = dependency.distance;
    // int hW = dependency.head.word;
    // int aW = dependency.arg.word;
    IntTaggedWord aTW = dependency.arg;
    // IntTaggedWord hTW = dependency.head;
    double pb_stop_hTWds = getStopProb(dependency);
    boolean isRoot = rootTW(dependency.head);
    if (dependency.arg.word == -2) {
        // did we generate stop?
        if (isRoot) {
            return 0.0;
        }
        return pb_stop_hTWds;
    }
    double pb_go_hTWds = 1.0 - pb_stop_hTWds;
    if (isRoot) {
        pb_go_hTWds = 1.0;
    }
    // generate the argument
    int valenceBinDistance = valenceBin(distance);
    // KEY:
    // c_     count of
    // p_     MLE prob of
    // pb_    MAP prob of
    // a      arg
    // h      head
    // T      tag
    // W      word
    // d      direction
    // ds     distance
    IntDependency temp = new IntDependency(dependency.head, dependency.arg, leftHeaded, valenceBinDistance);
    double c_aTW_hTWd = argCounter.getCount(temp);
    temp = new IntDependency(dependency.head, unknownArg, leftHeaded, valenceBinDistance);
    double c_aT_hTWd = argCounter.getCount(temp);
    temp = new IntDependency(dependency.head, wildTW, leftHeaded, valenceBinDistance);
    double c_hTWd = argCounter.getCount(temp);
    temp = new IntDependency(unknownHead, dependency.arg, leftHeaded, valenceBinDistance);
    double c_aTW_hTd = argCounter.getCount(temp);
    temp = new IntDependency(unknownHead, unknownArg, leftHeaded, valenceBinDistance);
    double c_aT_hTd = argCounter.getCount(temp);
    temp = new IntDependency(unknownHead, wildTW, leftHeaded, valenceBinDistance);
    double c_hTd = argCounter.getCount(temp);
    temp = new IntDependency(wildTW, dependency.arg, false, -1);
    double c_aTW = argCounter.getCount(temp);
    temp = new IntDependency(wildTW, unknownArg, false, -1);
    double c_aT = argCounter.getCount(temp);
    // do the magic
    double p_aTW_hTd = (c_hTd > 0.0 ? c_aTW_hTd / c_hTd : 0.0);
    double p_aT_hTd = (c_hTd > 0.0 ? c_aT_hTd / c_hTd : 0.0);
    double p_aTW_aT = (c_aTW > 0.0 ? c_aTW / c_aT : 1.0);
    // = (c_aTW_hTWd + smooth_aTW_hTWd * p_aTW_hTd) / (c_hTWd + smooth_aTW_hTWd);
    double pb_aTW_hTWd;
    double pb_aT_hTWd = (c_aT_hTWd + smooth_aT_hTWd * p_aT_hTd) / (c_hTWd + smooth_aT_hTWd);
    // = (interp * pb_aTW_hTWd + (1.0 - interp) * p_aTW_aT * pb_aT_hTWd) * pb_go_hTWds;
    double score;
    /* smooth by simWords -pichuan */
    List<Triple<Integer, String, Double>> sim2arg = simArgMap.get(new Pair<>(dependency.arg.word, stringBasicCategory(dependency.arg.tag)));
    List<Triple<Integer, String, Double>> sim2head = simHeadMap.get(new Pair<>(dependency.head.word, stringBasicCategory(dependency.head.tag)));
    List<Integer> simArg = new ArrayList<>();
    List<Integer> simHead = new ArrayList<>();
    if (sim2arg != null) {
        for (Triple<Integer, String, Double> t : sim2arg) {
            simArg.add(t.first);
        }
    }
    if (sim2head != null) {
        for (Triple<Integer, String, Double> t : sim2head) {
            simHead.add(t.first);
        }
    }
    double cSim_aTW_hTd = 0;
    double cSim_hTd = 0;
    for (int h : simHead) {
        IntTaggedWord hWord = new IntTaggedWord(h, dependency.head.tag);
        temp = new IntDependency(hWord, dependency.arg, dependency.leftHeaded, dependency.distance);
        cSim_aTW_hTd += argCounter.getCount(temp);
        temp = new IntDependency(hWord, wildTW, dependency.leftHeaded, dependency.distance);
        cSim_hTd += argCounter.getCount(temp);
    }
    // P(Wa,Ta|Th)
    double pSim_aTW_hTd = (cSim_hTd > 0.0 ? cSim_aTW_hTd / cSim_hTd : 0.0);
    if (debug) {
        //if (simHead.size() > 0 && cSim_hTd == 0.0) {
        if (pSim_aTW_hTd > 0.0) {
            //System.out.println("# simHead("+dependency.head.word+"-"+wordNumberer.object(dependency.head.word)+") =\t"+cSim_hTd);
            System.out.println(dependency + "\t" + pSim_aTW_hTd);
        //System.out.println(wordNumberer);
        }
    }
    //pb_aTW_hTWd = (c_aTW_hTWd + smooth_aTW_hTWd * pSim_aTW_hTd + smooth_aTW_hTWd * p_aTW_hTd) / (c_hTWd + smooth_aTW_hTWd + smooth_aTW_hTWd);
    //if (pSim_aTW_hTd > 0.0) {
    double smoothSim_aTW_hTWd = 17.7;
    double smooth_aTW_hTWd = 17.7 * 2;
    //smooth_aTW_hTWd = smooth_aTW_hTWd*2;
    pb_aTW_hTWd = (c_aTW_hTWd + smoothSim_aTW_hTWd * pSim_aTW_hTd + smooth_aTW_hTWd * p_aTW_hTd) / (c_hTWd + smoothSim_aTW_hTWd + smooth_aTW_hTWd);
    System.out.println(dependency);
    System.out.println(c_aTW_hTWd + " + " + smoothSim_aTW_hTWd + " * " + pSim_aTW_hTd + " + " + smooth_aTW_hTWd + " * " + p_aTW_hTd);
    System.out.println("--------------------------------  = " + pb_aTW_hTWd);
    System.out.println(c_hTWd + " + " + smoothSim_aTW_hTWd + " + " + smooth_aTW_hTWd);
    System.out.println();
    //}
    //pb_aT_hTWd = (c_aT_hTWd + smooth_aT_hTWd * p_aT_hTd) / (c_hTWd + smooth_aT_hTWd);
    score = (interp * pb_aTW_hTWd + (1.0 - interp) * p_aTW_aT * pb_aT_hTWd) * pb_go_hTWds;
    if (verbose) {
        NumberFormat nf = NumberFormat.getNumberInstance();
        nf.setMaximumFractionDigits(2);
        System.out.println("  c_aTW_hTWd: " + c_aTW_hTWd + "; c_aT_hTWd: " + c_aT_hTWd + "; c_hTWd: " + c_hTWd);
        System.out.println("  c_aTW_hTd: " + c_aTW_hTd + "; c_aT_hTd: " + c_aT_hTd + "; c_hTd: " + c_hTd);
        System.out.println("  Generated with pb_go_hTWds: " + nf.format(pb_go_hTWds) + " pb_aTW_hTWd: " + nf.format(pb_aTW_hTWd) + " p_aTW_aT: " + nf.format(p_aTW_aT) + " pb_aT_hTWd: " + nf.format(pb_aT_hTWd));
        System.out.println("  NoDist score: " + score);
    }
    if (op.testOptions.prunePunc && pruneTW(aTW)) {
        return 1.0;
    }
    if (Double.isNaN(score)) {
        score = 0.0;
    }
    if (score < MIN_PROBABILITY) {
        score = 0.0;
    }
    return score;
}
Also used : Triple(edu.stanford.nlp.util.Triple) NumberFormat(java.text.NumberFormat)

Example 8 with Triple

use of edu.stanford.nlp.util.Triple in project CoreNLP by stanfordnlp.

the class LexicalizedParser method main.

/**
   * A main program for using the parser with various options.
   * This program can be used for building and serializing
   * a parser from treebank data, for parsing sentences from a file
   * or URL using a serialized or text grammar parser,
   * and (mainly for parser quality testing)
   * for training and testing a parser on a treebank all in one go.
   *
   * <p>
   * Sample Usages:
   * <ul>
   *   <li> <b>Train a parser (saved to <i>serializedGrammarFilename</i>)
   *      from a directory of trees (<i>trainFilesPath</i>, with an optional <i>fileRange</i>, e.g., 0-1000):</b>
   *    {@code java -mx1500m edu.stanford.nlp.parser.lexparser.LexicalizedParser [-v] -train trainFilesPath [fileRange] -saveToSerializedFile serializedGrammarFilename}
   *   </li>
   *
   *   <li> <b>Train a parser (not saved) from a directory of trees, and test it (reporting scores) on a directory of trees</b>
   *    {@code java -mx1500m edu.stanford.nlp.parser.lexparser.LexicalizedParser [-v] -train trainFilesPath [fileRange] -testTreebank testFilePath [fileRange] }
   *   </li>
   *
   *   <li> <b>Parse one or more files, given a serialized grammar and a list of files</b>
   *    {@code java -mx512m edu.stanford.nlp.parser.lexparser.LexicalizedParser [-v] serializedGrammarPath filename [filename]*}
   *   </li>
   *
   *   <li> <b>Test and report scores for a serialized grammar on trees in an output directory</b>
   *    {@code java -mx512m edu.stanford.nlp.parser.lexparser.LexicalizedParser [-v] -loadFromSerializedFile serializedGrammarPath -testTreebank testFilePath [fileRange]}
   *   </li>
   * </ul>
   *
   *<p>
   * If the {@code serializedGrammarPath} ends in {@code .gz},
   * then the grammar is written and read as a compressed file (GZip).
   * If the {@code serializedGrammarPath} is a URL, starting with
   * {@code http://}, then the parser is read from the URL.
   * A fileRange specifies a numeric value that must be included within a
   * filename for it to be used in training or testing (this works well with
   * most current treebanks).  It can be specified like a range of pages to be
   * printed, for instance as {@code 200-2199} or
   * {@code 1-300,500-725,9000} or just as {@code 1} (if all your
   * trees are in a single file, either omit this parameter or just give a dummy
   * argument such as {@code 0}).
   * If the filename to parse is "-" then the parser parses from stdin.
   * If no files are supplied to parse, then a hardwired sentence
   * is parsed.
   *
   * <p>
   * The parser can write a grammar as either a serialized Java object file
   * or in a text format (or as both), specified with the following options:
   * <blockquote>{@code
   * java edu.stanford.nlp.parser.lexparser.LexicalizedParser
   * [-v] -train
   * trainFilesPath [fileRange] [-saveToSerializedFile grammarPath]
   * [-saveToTextFile grammarPath]
   * }</blockquote>
   *
   * <p>
   * In the same position as the verbose flag ({@code -v}), many other
   * options can be specified.  The most useful to an end user are:
   * <ul>
   * <LI>{@code -tLPP class} Specify a different
   * TreebankLangParserParams, for when using a different language or
   * treebank (the default is English Penn Treebank). <i>This option MUST occur
   * before any other language-specific options that are used (or else they
   * are ignored!).</i>
   * (It's usually a good idea to specify this option even when loading a
   * serialized grammar; it is necessary if the language pack specifies a
   * needed character encoding or you wish to specify language-specific
   * options on the command line.)</LI>
   * <LI>{@code -encoding charset} Specify the character encoding of the
   * input and output files.  This will override the value in the
   * {@code TreebankLangParserParams}, provided this option appears
   * <i>after</i> any {@code -tLPP} option.</LI>
   * <LI>{@code -tokenized} Says that the input is already separated
   * into whitespace-delimited tokens.  If this option is specified, any
   * tokenizer specified for the language is ignored, and a universal (Unicode)
   * tokenizer, which divides only on whitespace, is used.
   * Unless you also specify
   * {@code -escaper}, the tokens <i>must</i> all be correctly
   * tokenized tokens of the appropriate treebank for the parser to work
   * well (for instance, if using the Penn English Treebank, you must have
   * coded "(" as "-LRB-", "3/4" as "3\/4", etc.)</LI>
   * <li>{@code -escaper class} Specify a class of type
   * {@link Function}&lt;List&lt;HasWord&gt;,List&lt;HasWord&gt;&gt; to do
   * customized escaping of tokenized text.  This class will be run over the
   * tokenized text and can fix the representation of tokens. For instance,
   * it could change "(" to "-LRB-" for the Penn English Treebank.  A
   * provided escaper that does such things for the Penn English Treebank is
   * {@code edu.stanford.nlp.process.PTBEscapingProcessor}
   * <li>{@code -tokenizerFactory class} Specifies a
   * TokenizerFactory class to be used for tokenization</li>
   * <li>{@code -tokenizerOptions options} Specifies options to a
   * TokenizerFactory class to be used for tokenization.   A comma-separated
   * list. For PTBTokenizer, options of interest include
   * {@code americanize=false} and {@code asciiQuotes} (for German).
   * Note that any choice of tokenizer options that conflicts with the
   * tokenization used in the parser training data will likely degrade parser
   * performance. </li>
   * <li>{@code -sentences token } Specifies a token that marks sentence
   * boundaries.  A value of {@code newline} causes sentence breaking on
   * newlines.  A value of {@code onePerElement} causes each element
   * (using the XML {@code -parseInside} option) to be treated as a
   * sentence. All other tokens will be interpreted literally, and must be
   * exactly the same as tokens returned by the tokenizer.  For example,
   * you might specify "|||" and put that symbol sequence as a token between
   * sentences.
   * If no explicit sentence breaking option is chosen, sentence breaking
   * is done based on a set of language-particular sentence-ending patterns.
   * </li>
   * <LI>{@code -parseInside element} Specifies that parsing should only
   * be done for tokens inside the indicated XML-style
   * elements (done as simple pattern matching, rather than XML parsing).
   * For example, if this is specified as {@code sentence}, then
   * the text inside the {@code sentence} element
   * would be parsed.
   * Using "-parseInside s" gives you support for the input format of
   * Charniak's parser. Sentences cannot span elements. Whether the
   * contents of the element are treated as one sentence or potentially
   * multiple sentences is controlled by the {@code -sentences} flag.
   * The default is potentially multiple sentences.
   * This option gives support for extracting and parsing
   * text from very simple SGML and XML documents, and is provided as a
   * user convenience for that purpose. If you want to really parse XML
   * documents before NLP parsing them, you should use an XML parser, and then
   * call to a LexicalizedParser on appropriate CDATA.
   * <LI>{@code -tagSeparator char} Specifies to look for tags on words
   * following the word and separated from it by a special character
   * {@code char}.  For instance, many tagged corpora have the
   * representation "house/NN" and you would use {@code -tagSeparator /}.
   * Notes: This option requires that the input be pretokenized.
   * The separator has to be only a single character, and there is no
   * escaping mechanism. However, splitting is done on the <i>last</i>
   * instance of the character in the token, so that cases like
   * "3\/4/CD" are handled correctly.  The parser will in all normal
   * circumstances use the tag you provide, but will override it in the
   * case of very common words in cases where the tag that you provide
   * is not one that it regards as a possible tagging for the word.
   * The parser supports a format where only some of the words in a sentence
   * have a tag (if you are calling the parser programmatically, you indicate
   * them by having them implement the {@code HasTag} interface).
   * You can do this at the command-line by only having tags after some words,
   * but you are limited by the fact that there is no way to escape the
   * tagSeparator character.</LI>
   * <LI>{@code -maxLength leng} Specify the longest sentence that
   * will be parsed (and hence indirectly the amount of memory
   * needed for the parser). If this is not specified, the parser will
   * try to dynamically grow its parse chart when long sentence are
   * encountered, but may run out of memory trying to do so.</LI>
   * <LI>{@code -outputFormat styles} Choose the style(s) of output
   * sentences: {@code penn} for prettyprinting as in the Penn
   * treebank files, or {@code oneline} for printing sentences one
   * per line, {@code words}, {@code wordsAndTags},
   * {@code dependencies}, {@code typedDependencies},
   * or {@code typedDependenciesCollapsed}.
   * Multiple options may be specified as a comma-separated
   * list.  See TreePrint class for further documentation.</LI>
   * <LI>{@code -outputFormatOptions} Provide options that control the
   * behavior of various {@code -outputFormat} choices, such as
   * {@code lexicalize}, {@code stem}, {@code markHeadNodes},
   * or {@code xml}.  {@link edu.stanford.nlp.trees.TreePrint}
   * Options are specified as a comma-separated list.</LI>
   * <LI>{@code -writeOutputFiles} Write output files corresponding
   * to the input files, with the same name but a {@code ".stp"}
   * file extension.  The format of these files depends on the
   * {@code outputFormat} option.  (If not specified, output is sent
   * to stdout.)</LI>
   * <LI>{@code -outputFilesExtension} The extension that is appended to
   * the filename that is being parsed to produce an output file name (with the
   * -writeOutputFiles option). The default is {@code stp}.  Don't
   * include the period.
   * <LI>{@code -outputFilesDirectory} The directory in which output
   * files are written (when the -writeOutputFiles option is specified).
   * If not specified, output files are written in the same directory as the
   * input files.
   * <LI>{@code -nthreads} Parsing files and testing on treebanks
   * can use multiple threads.  This option tells the parser how many
   * threads to use.  A negative number indicates to use as many
   * threads as the machine has cores.
   * </ul>
   * See also the package documentation for more details and examples of use.
   *
   * @param args Command line arguments, as above
   */
public static void main(String[] args) {
    boolean train = false;
    boolean saveToSerializedFile = false;
    boolean saveToTextFile = false;
    String serializedInputFileOrUrl = null;
    String textInputFileOrUrl = null;
    String serializedOutputFileOrUrl = null;
    String textOutputFileOrUrl = null;
    String treebankPath = null;
    Treebank testTreebank = null;
    Treebank tuneTreebank = null;
    String testPath = null;
    FileFilter testFilter = null;
    String tunePath = null;
    FileFilter tuneFilter = null;
    FileFilter trainFilter = null;
    String secondaryTreebankPath = null;
    double secondaryTreebankWeight = 1.0;
    FileFilter secondaryTrainFilter = null;
    // variables needed to process the files to be parsed
    TokenizerFactory<? extends HasWord> tokenizerFactory = null;
    String tokenizerOptions = null;
    String tokenizerFactoryClass = null;
    String tokenizerMethod = null;
    // whether or not the input file has already been tokenized
    boolean tokenized = false;
    Function<List<HasWord>, List<HasWord>> escaper = null;
    String tagDelimiter = null;
    String sentenceDelimiter = null;
    String elementDelimiter = null;
    int argIndex = 0;
    if (args.length < 1) {
        log.info("Basic usage (see Javadoc for more): java edu.stanford.nlp.parser.lexparser.LexicalizedParser parserFileOrUrl filename*");
        return;
    }
    Options op = new Options();
    List<String> optionArgs = new ArrayList<>();
    String encoding = null;
    // while loop through option arguments
    while (argIndex < args.length && args[argIndex].charAt(0) == '-') {
        if (args[argIndex].equalsIgnoreCase("-train") || args[argIndex].equalsIgnoreCase("-trainTreebank")) {
            train = true;
            Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-train");
            argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
            treebankPath = treebankDescription.first();
            trainFilter = treebankDescription.second();
        } else if (args[argIndex].equalsIgnoreCase("-train2")) {
            // train = true;     // cdm july 2005: should require -train for this
            Triple<String, FileFilter, Double> treebankDescription = ArgUtils.getWeightedTreebankDescription(args, argIndex, "-train2");
            argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
            secondaryTreebankPath = treebankDescription.first();
            secondaryTrainFilter = treebankDescription.second();
            secondaryTreebankWeight = treebankDescription.third();
        } else if (args[argIndex].equalsIgnoreCase("-tLPP") && (argIndex + 1 < args.length)) {
            try {
                op.tlpParams = (TreebankLangParserParams) Class.forName(args[argIndex + 1]).newInstance();
            } catch (ClassNotFoundException e) {
                log.info("Class not found: " + args[argIndex + 1]);
                throw new RuntimeException(e);
            } catch (InstantiationException e) {
                log.info("Couldn't instantiate: " + args[argIndex + 1] + ": " + e.toString());
                throw new RuntimeException(e);
            } catch (IllegalAccessException e) {
                log.info("Illegal access" + e);
                throw new RuntimeException(e);
            }
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-encoding")) {
            // sets encoding for TreebankLangParserParams
            // redone later to override any serialized parser one read in
            encoding = args[argIndex + 1];
            op.tlpParams.setInputEncoding(encoding);
            op.tlpParams.setOutputEncoding(encoding);
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-tokenized")) {
            tokenized = true;
            argIndex += 1;
        } else if (args[argIndex].equalsIgnoreCase("-escaper")) {
            try {
                escaper = ReflectionLoading.loadByReflection(args[argIndex + 1]);
            } catch (Exception e) {
                log.info("Couldn't instantiate escaper " + args[argIndex + 1] + ": " + e);
            }
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-tokenizerOptions")) {
            tokenizerOptions = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-tokenizerFactory")) {
            tokenizerFactoryClass = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-tokenizerMethod")) {
            tokenizerMethod = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-sentences")) {
            sentenceDelimiter = args[argIndex + 1];
            if (sentenceDelimiter.equalsIgnoreCase("newline")) {
                sentenceDelimiter = "\n";
            }
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-parseInside")) {
            elementDelimiter = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-tagSeparator")) {
            tagDelimiter = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-loadFromSerializedFile") || args[argIndex].equalsIgnoreCase("-model")) {
            // load the parser from a binary serialized file
            // the next argument must be the path to the parser file
            serializedInputFileOrUrl = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-loadFromTextFile")) {
            // load the parser from declarative text file
            // the next argument must be the path to the parser file
            textInputFileOrUrl = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-saveToSerializedFile")) {
            saveToSerializedFile = true;
            if (ArgUtils.numSubArgs(args, argIndex) < 1) {
                log.info("Missing path: -saveToSerialized filename");
            } else {
                serializedOutputFileOrUrl = args[argIndex + 1];
            }
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-saveToTextFile")) {
            // save the parser to declarative text file
            saveToTextFile = true;
            textOutputFileOrUrl = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-saveTrainTrees")) {
            // save the training trees to a binary file
            op.trainOptions.trainTreeFile = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-treebank") || args[argIndex].equalsIgnoreCase("-testTreebank") || args[argIndex].equalsIgnoreCase("-test")) {
            Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-test");
            argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
            testPath = treebankDescription.first();
            testFilter = treebankDescription.second();
        } else if (args[argIndex].equalsIgnoreCase("-tune")) {
            Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-tune");
            argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
            tunePath = treebankDescription.first();
            tuneFilter = treebankDescription.second();
        } else {
            int oldIndex = argIndex;
            argIndex = op.setOptionOrWarn(args, argIndex);
            optionArgs.addAll(Arrays.asList(args).subList(oldIndex, argIndex));
        }
    }
    if (tuneFilter != null || tunePath != null) {
        if (tunePath == null) {
            if (treebankPath == null) {
                throw new RuntimeException("No tune treebank path specified...");
            } else {
                log.info("No tune treebank path specified.  Using train path: \"" + treebankPath + '\"');
                tunePath = treebankPath;
            }
        }
        tuneTreebank = op.tlpParams.testMemoryTreebank();
        tuneTreebank.loadPath(tunePath, tuneFilter);
    }
    if (!train && op.testOptions.verbose) {
        StringUtils.logInvocationString(log, args);
    }
    // always initialized in next if-then-else block
    LexicalizedParser lp;
    if (train) {
        StringUtils.logInvocationString(log, args);
        // so we train a parser using the treebank
        GrammarCompactor compactor = null;
        if (op.trainOptions.compactGrammar() == 3) {
            compactor = new ExactGrammarCompactor(op, false, false);
        }
        Treebank trainTreebank = makeTreebank(treebankPath, op, trainFilter);
        Treebank secondaryTrainTreebank = null;
        if (secondaryTreebankPath != null) {
            secondaryTrainTreebank = makeSecondaryTreebank(secondaryTreebankPath, op, secondaryTrainFilter);
        }
        List<List<TaggedWord>> extraTaggedWords = null;
        if (op.trainOptions.taggedFiles != null) {
            extraTaggedWords = new ArrayList<>();
            List<TaggedFileRecord> fileRecords = TaggedFileRecord.createRecords(new Properties(), op.trainOptions.taggedFiles);
            for (TaggedFileRecord record : fileRecords) {
                for (List<TaggedWord> sentence : record.reader()) {
                    extraTaggedWords.add(sentence);
                }
            }
        }
        lp = getParserFromTreebank(trainTreebank, secondaryTrainTreebank, secondaryTreebankWeight, compactor, op, tuneTreebank, extraTaggedWords);
    } else if (textInputFileOrUrl != null) {
        // so we load the parser from a text grammar file
        lp = getParserFromTextFile(textInputFileOrUrl, op);
    } else {
        // so we load a serialized parser
        if (serializedInputFileOrUrl == null && argIndex < args.length) {
            // the next argument must be the path to the serialized parser
            serializedInputFileOrUrl = args[argIndex];
            argIndex++;
        }
        if (serializedInputFileOrUrl == null) {
            log.info("No grammar specified, exiting...");
            return;
        }
        String[] extraArgs = new String[optionArgs.size()];
        extraArgs = optionArgs.toArray(extraArgs);
        try {
            lp = loadModel(serializedInputFileOrUrl, op, extraArgs);
            op = lp.op;
        } catch (IllegalArgumentException e) {
            log.info("Error loading parser, exiting...");
            throw e;
        }
    }
    // set up tokenizerFactory with options if provided
    if (tokenizerFactoryClass != null || tokenizerOptions != null) {
        try {
            if (tokenizerFactoryClass != null) {
                Class<TokenizerFactory<? extends HasWord>> clazz = ErasureUtils.uncheckedCast(Class.forName(tokenizerFactoryClass));
                Method factoryMethod;
                if (tokenizerOptions != null) {
                    factoryMethod = clazz.getMethod(tokenizerMethod != null ? tokenizerMethod : "newWordTokenizerFactory", String.class);
                    tokenizerFactory = ErasureUtils.uncheckedCast(factoryMethod.invoke(null, tokenizerOptions));
                } else {
                    factoryMethod = clazz.getMethod(tokenizerMethod != null ? tokenizerMethod : "newTokenizerFactory");
                    tokenizerFactory = ErasureUtils.uncheckedCast(factoryMethod.invoke(null));
                }
            } else {
                // have options but no tokenizer factory.  use the parser
                // langpack's factory and set its options
                tokenizerFactory = lp.op.langpack().getTokenizerFactory();
                tokenizerFactory.setOptions(tokenizerOptions);
            }
        } catch (IllegalAccessException | InvocationTargetException | ClassNotFoundException | NoSuchMethodException e) {
            log.info("Couldn't instantiate TokenizerFactory " + tokenizerFactoryClass + " with options " + tokenizerOptions);
            throw new RuntimeException(e);
        }
    }
    // OVERWRITTEN BY ONE SPECIFIED IN SERIALIZED PARSER
    if (encoding != null) {
        op.tlpParams.setInputEncoding(encoding);
        op.tlpParams.setOutputEncoding(encoding);
    }
    if (testFilter != null || testPath != null) {
        if (testPath == null) {
            if (treebankPath == null) {
                throw new RuntimeException("No test treebank path specified...");
            } else {
                log.info("No test treebank path specified.  Using train path: \"" + treebankPath + '\"');
                testPath = treebankPath;
            }
        }
        testTreebank = op.tlpParams.testMemoryTreebank();
        testTreebank.loadPath(testPath, testFilter);
    }
    op.trainOptions.sisterSplitters = Generics.newHashSet(Arrays.asList(op.tlpParams.sisterSplitters()));
    // Now what do we do with the parser we've made
    if (saveToTextFile) {
        // save the parser to textGrammar format
        if (textOutputFileOrUrl != null) {
            lp.saveParserToTextFile(textOutputFileOrUrl);
        } else {
            log.info("Usage: must specify a text grammar output path");
        }
    }
    if (saveToSerializedFile) {
        if (serializedOutputFileOrUrl != null) {
            lp.saveParserToSerialized(serializedOutputFileOrUrl);
        } else if (textOutputFileOrUrl == null && testTreebank == null) {
            // no saving/parsing request has been specified
            log.info("usage: " + "java edu.stanford.nlp.parser.lexparser.LexicalizedParser " + "-train trainFilesPath [fileRange] -saveToSerializedFile serializedParserFilename");
        }
    }
    if (op.testOptions.verbose || train) {
        // Tell the user a little or a lot about what we have made
        // get lexicon size separately as it may have its own prints in it....
        String lexNumRules = lp.lex != null ? Integer.toString(lp.lex.numRules()) : "";
        log.info("Grammar\tStates\tTags\tWords\tUnaryR\tBinaryR\tTaggings");
        log.info("Grammar\t" + lp.stateIndex.size() + '\t' + lp.tagIndex.size() + '\t' + lp.wordIndex.size() + '\t' + (lp.ug != null ? lp.ug.numRules() : "") + '\t' + (lp.bg != null ? lp.bg.numRules() : "") + '\t' + lexNumRules);
        log.info("ParserPack is " + op.tlpParams.getClass().getName());
        log.info("Lexicon is " + lp.lex.getClass().getName());
        if (op.testOptions.verbose) {
            log.info("Tags are: " + lp.tagIndex);
        // log.info("States are: " + lp.pd.stateIndex); // This is too verbose. It was already printed out by the below printOptions command if the flag -printStates is given (at training time)!
        }
        printOptions(false, op);
    }
    if (testTreebank != null) {
        // test parser on treebank
        EvaluateTreebank evaluator = new EvaluateTreebank(lp);
        evaluator.testOnTreebank(testTreebank);
    } else if (argIndex >= args.length) {
        // no more arguments, so we just parse our own test sentence
        PrintWriter pwOut = op.tlpParams.pw();
        PrintWriter pwErr = op.tlpParams.pw(System.err);
        ParserQuery pq = lp.parserQuery();
        if (pq.parse(op.tlpParams.defaultTestSentence())) {
            lp.getTreePrint().printTree(pq.getBestParse(), pwOut);
        } else {
            pwErr.println("Error. Can't parse test sentence: " + op.tlpParams.defaultTestSentence());
        }
    } else {
        // We parse filenames given by the remaining arguments
        ParseFiles.parseFiles(args, argIndex, tokenized, tokenizerFactory, elementDelimiter, sentenceDelimiter, escaper, tagDelimiter, op, lp.getTreePrint(), lp);
    }
}
Also used : TaggedFileRecord(edu.stanford.nlp.tagger.io.TaggedFileRecord) Pair(edu.stanford.nlp.util.Pair) HasWord(edu.stanford.nlp.ling.HasWord) TokenizerFactory(edu.stanford.nlp.process.TokenizerFactory) Method(java.lang.reflect.Method) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) InvocationTargetException(java.lang.reflect.InvocationTargetException) InvocationTargetException(java.lang.reflect.InvocationTargetException) Triple(edu.stanford.nlp.util.Triple) TaggedWord(edu.stanford.nlp.ling.TaggedWord) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery)

Example 9 with Triple

use of edu.stanford.nlp.util.Triple in project CoreNLP by stanfordnlp.

the class TreeAnnotatorAndBinarizer method getAnnotatedBinaryTreebankFromTreebank.

/** @return A Triple of binaryTrainTreebank, binarySecondaryTreebank, binaryTuneTreebank.
   */
public static Triple<Treebank, Treebank, Treebank> getAnnotatedBinaryTreebankFromTreebank(Treebank trainTreebank, Treebank secondaryTreebank, Treebank tuneTreebank, Options op) {
    // setup tree transforms
    TreebankLangParserParams tlpParams = op.tlpParams;
    TreebankLanguagePack tlp = tlpParams.treebankLanguagePack();
    if (op.testOptions.verbose) {
        PrintWriter pwErr = tlpParams.pw(System.err);
        pwErr.print("Training ");
        pwErr.println(trainTreebank.textualSummary(tlp));
        if (secondaryTreebank != null) {
            pwErr.print("Secondary training ");
            pwErr.println(secondaryTreebank.textualSummary(tlp));
        }
    }
    CompositeTreeTransformer trainTransformer = new CompositeTreeTransformer();
    if (op.trainOptions.preTransformer != null) {
        trainTransformer.addTransformer(op.trainOptions.preTransformer);
    }
    if (op.trainOptions.collinsPunc) {
        CollinsPuncTransformer collinsPuncTransformer = new CollinsPuncTransformer(tlp);
        trainTransformer.addTransformer(collinsPuncTransformer);
    }
    log.info("Binarizing trees...");
    TreeAnnotatorAndBinarizer binarizer;
    if (!op.trainOptions.leftToRight) {
        binarizer = new TreeAnnotatorAndBinarizer(tlpParams, op.forceCNF, !op.trainOptions.outsideFactor(), !op.trainOptions.predictSplits, op);
    } else {
        binarizer = new TreeAnnotatorAndBinarizer(tlpParams.headFinder(), new LeftHeadFinder(), tlpParams, op.forceCNF, !op.trainOptions.outsideFactor(), !op.trainOptions.predictSplits, op);
    }
    trainTransformer.addTransformer(binarizer);
    if (op.wordFunction != null) {
        TreeTransformer wordFunctionTransformer = new TreeLeafLabelTransformer(op.wordFunction);
        trainTransformer.addTransformer(wordFunctionTransformer);
    }
    Treebank wholeTreebank;
    if (secondaryTreebank == null) {
        wholeTreebank = trainTreebank;
    } else {
        wholeTreebank = new CompositeTreebank(trainTreebank, secondaryTreebank);
    }
    if (op.trainOptions.selectiveSplit) {
        op.trainOptions.splitters = ParentAnnotationStats.getSplitCategories(wholeTreebank, op.trainOptions.tagSelectiveSplit, 0, op.trainOptions.selectiveSplitCutOff, op.trainOptions.tagSelectiveSplitCutOff, tlp);
        removeDeleteSplittersFromSplitters(tlp, op);
        if (op.testOptions.verbose) {
            List<String> list = new ArrayList<>(op.trainOptions.splitters);
            Collections.sort(list);
            log.info("Parent split categories: " + list);
        }
    }
    if (op.trainOptions.selectivePostSplit) {
        // Do all the transformations once just to learn selective splits on annotated categories
        TreeTransformer myTransformer = new TreeAnnotator(tlpParams.headFinder(), tlpParams, op);
        wholeTreebank = wholeTreebank.transform(myTransformer);
        op.trainOptions.postSplitters = ParentAnnotationStats.getSplitCategories(wholeTreebank, true, 0, op.trainOptions.selectivePostSplitCutOff, op.trainOptions.tagSelectivePostSplitCutOff, tlp);
        if (op.testOptions.verbose) {
            log.info("Parent post annotation split categories: " + op.trainOptions.postSplitters);
        }
    }
    if (op.trainOptions.hSelSplit) {
        // We run through all the trees once just to gather counts for hSelSplit!
        int ptt = op.trainOptions.printTreeTransformations;
        op.trainOptions.printTreeTransformations = 0;
        binarizer.setDoSelectiveSplit(false);
        for (Tree tree : wholeTreebank) {
            trainTransformer.transformTree(tree);
        }
        binarizer.setDoSelectiveSplit(true);
        op.trainOptions.printTreeTransformations = ptt;
    }
    // we've done all the setup now. here's where the train treebank is transformed.
    trainTreebank = trainTreebank.transform(trainTransformer);
    if (secondaryTreebank != null) {
        secondaryTreebank = secondaryTreebank.transform(trainTransformer);
    }
    if (op.trainOptions.printAnnotatedStateCounts) {
        binarizer.printStateCounts();
    }
    if (op.trainOptions.printAnnotatedRuleCounts) {
        binarizer.printRuleCounts();
    }
    if (tuneTreebank != null) {
        tuneTreebank = tuneTreebank.transform(trainTransformer);
    }
    if (op.testOptions.verbose) {
        binarizer.dumpStats();
    }
    return new Triple<>(trainTreebank, secondaryTreebank, tuneTreebank);
}
Also used : Triple(edu.stanford.nlp.util.Triple) PrintWriter(java.io.PrintWriter)

Example 10 with Triple

use of edu.stanford.nlp.util.Triple in project CoreNLP by stanfordnlp.

the class Evalb method main.

/**
   * Run the Evalb scoring metric on guess/gold input. The default language is English.
   *
   * @param args
   */
public static void main(String[] args) {
    if (args.length < minArgs) {
        log.info(usage());
        System.exit(-1);
    }
    Properties options = StringUtils.argsToProperties(args, optionArgDefs());
    Language language = PropertiesUtils.get(options, "l", Language.English, Language.class);
    final TreebankLangParserParams tlpp = language.params;
    final int maxGoldYield = PropertiesUtils.getInt(options, "y", Integer.MAX_VALUE);
    final boolean VERBOSE = PropertiesUtils.getBool(options, "v", false);
    final boolean sortByF1 = PropertiesUtils.hasProperty(options, "s");
    int worstKTreesToEmit = PropertiesUtils.getInt(options, "s", 0);
    PriorityQueue<Triple<Double, Tree, Tree>> queue = sortByF1 ? new PriorityQueue<>(2000, new F1Comparator()) : null;
    boolean doCatLevel = PropertiesUtils.getBool(options, "c", false);
    String labelRegex = options.getProperty("f", null);
    String encoding = options.getProperty("e", "UTF-8");
    String[] parsedArgs = options.getProperty("", "").split("\\s+");
    if (parsedArgs.length != minArgs) {
        log.info(usage());
        System.exit(-1);
    }
    String goldFile = parsedArgs[0];
    String guessFile = parsedArgs[1];
    // Command-line has been parsed. Configure the metric for evaluation.
    tlpp.setInputEncoding(encoding);
    final PrintWriter pwOut = tlpp.pw();
    final Treebank guessTreebank = tlpp.diskTreebank();
    guessTreebank.loadPath(guessFile);
    pwOut.println("GUESS TREEBANK:");
    pwOut.println(guessTreebank.textualSummary());
    final Treebank goldTreebank = tlpp.diskTreebank();
    goldTreebank.loadPath(goldFile);
    pwOut.println("GOLD TREEBANK:");
    pwOut.println(goldTreebank.textualSummary());
    final Evalb metric = new Evalb("Evalb LP/LR", true);
    final EvalbByCat evalbCat = (doCatLevel) ? new EvalbByCat("EvalbByCat LP/LR", true, labelRegex) : null;
    final TreeTransformer tc = tlpp.collinizer();
    //The evalb ref implementation assigns status for each tree pair as follows:
    //
    //   0 - Ok (yields match)
    //   1 - length mismatch
    //   2 - null parse e.g. (()).
    //
    //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
    final Iterator<Tree> goldItr = goldTreebank.iterator();
    final Iterator<Tree> guessItr = guessTreebank.iterator();
    int goldLineId = 0;
    int guessLineId = 0;
    int skippedGuessTrees = 0;
    while (guessItr.hasNext() && goldItr.hasNext()) {
        Tree guessTree = guessItr.next();
        List<Label> guessYield = guessTree.yield();
        guessLineId++;
        Tree goldTree = goldItr.next();
        List<Label> goldYield = goldTree.yield();
        goldLineId++;
        // Check that we should evaluate this tree
        if (goldYield.size() > maxGoldYield) {
            skippedGuessTrees++;
            continue;
        }
        // Only trees with equal yields can be evaluated
        if (goldYield.size() != guessYield.size()) {
            pwOut.printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.size(), guessYield.size(), goldLineId, guessLineId);
            skippedGuessTrees++;
            continue;
        }
        final Tree evalGuess = tc.transformTree(guessTree);
        final Tree evalGold = tc.transformTree(goldTree);
        metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
        if (doCatLevel)
            evalbCat.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
        if (sortByF1)
            storeTrees(queue, guessTree, goldTree, metric.getLastF1());
    }
    if (guessItr.hasNext() || goldItr.hasNext()) {
        System.err.printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
    }
    pwOut.println("================================================================================");
    if (skippedGuessTrees != 0)
        pwOut.printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
    metric.display(true, pwOut);
    pwOut.println();
    if (doCatLevel) {
        evalbCat.display(true, pwOut);
        pwOut.println();
    }
    if (sortByF1)
        emitSortedTrees(queue, worstKTreesToEmit, guessFile);
    pwOut.close();
}
Also used : Treebank(edu.stanford.nlp.trees.Treebank) Label(edu.stanford.nlp.ling.Label) TreebankLangParserParams(edu.stanford.nlp.parser.lexparser.TreebankLangParserParams) Properties(java.util.Properties) Triple(edu.stanford.nlp.util.Triple) Language(edu.stanford.nlp.international.Language) Tree(edu.stanford.nlp.trees.Tree) TreeTransformer(edu.stanford.nlp.trees.TreeTransformer) PrintWriter(java.io.PrintWriter)

Aggregations

Triple (edu.stanford.nlp.util.Triple)12 CoreLabel (edu.stanford.nlp.ling.CoreLabel)4 Pair (edu.stanford.nlp.util.Pair)3 TwoDimensionalCounter (edu.stanford.nlp.stats.TwoDimensionalCounter)2 Tree (edu.stanford.nlp.trees.Tree)2 CollectionValuedMap (edu.stanford.nlp.util.CollectionValuedMap)2 PrintWriter (java.io.PrintWriter)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 TransducerGraph (edu.stanford.nlp.fsm.TransducerGraph)1 Language (edu.stanford.nlp.international.Language)1 RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)1 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 HasWord (edu.stanford.nlp.ling.HasWord)1 Label (edu.stanford.nlp.ling.Label)1 TaggedWord (edu.stanford.nlp.ling.TaggedWord)1 SequenceMatchResult (edu.stanford.nlp.ling.tokensregex.SequenceMatchResult)1 TokenSequenceMatcher (edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher)1 TokenSequencePattern (edu.stanford.nlp.ling.tokensregex.TokenSequencePattern)1 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)1