Search in sources :

Example 1 with StringLabel

use of edu.stanford.nlp.ling.StringLabel in project CoreNLP by stanfordnlp.

the class NegraPennCollinizer method transformTree.

public Tree transformTree(Tree tree) {
    Label l = tree.label();
    if (tree.isLeaf()) {
        return tf.newLeaf(l);
    }
    String s = l.value();
    s = tlpp.treebankLanguagePack().basicCategory(s);
    if (deletePunct) {
        // since it ignores punctuation anyway
        if (tree.isPreTerminal() && tlpp.treebankLanguagePack().isEvalBIgnoredPunctuationTag(s)) {
            return null;
        }
    }
    // TEMPORARY: eliminate the TOPP constituent
    if (tree.children()[0].label().value().equals("TOPP")) {
        log.info("Found a TOPP");
        tree.setChildren(tree.children()[0].children());
    }
    // Negra has lots of non-unary roots; delete unary roots
    if (tlpp.treebankLanguagePack().isStartSymbol(s) && tree.numChildren() == 1) {
        // NB: This deletes the boundary symbol, which is in the tree!
        return transformTree(tree.getChild(0));
    }
    List<Tree> children = new ArrayList<>();
    for (int cNum = 0, numC = tree.numChildren(); cNum < numC; cNum++) {
        Tree child = tree.getChild(cNum);
        Tree newChild = transformTree(child);
        if (newChild != null) {
            children.add(newChild);
        }
    }
    if (children.isEmpty()) {
        return null;
    }
    return tf.newTreeNode(new StringLabel(s), children);
}
Also used : StringLabel(edu.stanford.nlp.ling.StringLabel) Label(edu.stanford.nlp.ling.Label) ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree) StringLabel(edu.stanford.nlp.ling.StringLabel)

Example 2 with StringLabel

use of edu.stanford.nlp.ling.StringLabel in project CoreNLP by stanfordnlp.

the class ChineseCollinizer method transformTree.

private Tree transformTree(Tree tree, boolean isRoot) {
    String label = tree.label().value();
    if (tree.isLeaf()) {
        if (deletePunct && ctlp.isPunctuationWord(label)) {
            return null;
        } else {
            return tf.newLeaf(new StringLabel(label));
        }
    }
    if (tree.isPreTerminal() && deletePunct && ctlp.isPunctuationTag(label)) {
        // System.out.println("Deleting punctuation");
        return null;
    }
    List<Tree> children = new ArrayList<>();
    if (label.matches("ROOT.*") && tree.numChildren() == 1) {
        // keep non-unary roots for now
        return transformTree(tree.children()[0], true);
    }
    //System.out.println("Enhanced label is " + label);
    // remove all functional and machine-generated annotations
    label = label.replaceFirst("[^A-Z].*$", "");
    // merge parentheticals with adverb phrases
    label = label.replaceFirst("PRN", "ADVP");
    for (int cNum = 0; cNum < tree.children().length; cNum++) {
        Tree child = tree.children()[cNum];
        Tree newChild = transformTree(child, false);
        if (newChild != null) {
            children.add(newChild);
        }
    }
    // Chinese treebank that only have punctuation in them!!!
    if (children.isEmpty() && !isRoot) {
        if (VERBOSE) {
            log.info("ChineseCollinizer: all children of " + label + " deleted; returning null");
        }
        return null;
    }
    return tf.newTreeNode(new StringLabel(label), children);
}
Also used : ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree) StringLabel(edu.stanford.nlp.ling.StringLabel)

Example 3 with StringLabel

use of edu.stanford.nlp.ling.StringLabel in project CoreNLP by stanfordnlp.

the class Treebank method textualSummary.

/**
   * Return various statistics about the treebank (number of sentences,
   * words, tag set, etc.).
   *
   * @param tlp The TreebankLanguagePack used to determine punctuation and an
   *            appropriate character encoding
   * @return A big string for human consumption describing the treebank
   */
public String textualSummary(TreebankLanguagePack tlp) {
    int numTrees = 0;
    int numTreesLE40 = 0;
    int numNonUnaryRoots = 0;
    Tree nonUnaryEg = null;
    ClassicCounter<Tree> nonUnaries = new ClassicCounter<>();
    ClassicCounter<String> roots = new ClassicCounter<>();
    ClassicCounter<String> starts = new ClassicCounter<>();
    ClassicCounter<String> puncts = new ClassicCounter<>();
    int numUnenclosedLeaves = 0;
    int numLeaves = 0;
    int numNonPhrasal = 0;
    int numPreTerminalWithMultipleChildren = 0;
    int numWords = 0;
    int numTags = 0;
    int shortestSentence = Integer.MAX_VALUE;
    int longestSentence = 0;
    int numNullLabel = 0;
    Set<String> words = Generics.newHashSet();
    ClassicCounter<String> tags = new ClassicCounter<>();
    ClassicCounter<String> cats = new ClassicCounter<>();
    Tree leafEg = null;
    Tree preTerminalMultipleChildrenEg = null;
    Tree nullLabelEg = null;
    Tree rootRewritesAsTaggedWordEg = null;
    for (Tree t : this) {
        roots.incrementCount(t.value());
        numTrees++;
        int leng = t.yield().size();
        if (leng <= 40) {
            numTreesLE40++;
        }
        if (leng < shortestSentence) {
            shortestSentence = leng;
        }
        if (leng > longestSentence) {
            longestSentence = leng;
        }
        if (t.numChildren() > 1) {
            if (numNonUnaryRoots == 0) {
                nonUnaryEg = t;
            }
            if (numNonUnaryRoots < 100) {
                nonUnaries.incrementCount(t.localTree());
            }
            numNonUnaryRoots++;
        } else if (t.isLeaf()) {
            numUnenclosedLeaves++;
        } else {
            Tree t2 = t.firstChild();
            if (t2.isLeaf()) {
                numLeaves++;
                leafEg = t;
            } else if (t2.isPreTerminal()) {
                if (numNonPhrasal == 0) {
                    rootRewritesAsTaggedWordEg = t;
                }
                numNonPhrasal++;
            }
            starts.incrementCount(t2.value());
        }
        for (Tree subtree : t) {
            Label lab = subtree.label();
            if (lab == null || lab.value() == null || "".equals(lab.value())) {
                if (numNullLabel == 0) {
                    nullLabelEg = subtree;
                }
                numNullLabel++;
                if (lab == null) {
                    subtree.setLabel(new StringLabel(""));
                } else if (lab.value() == null) {
                    subtree.label().setValue("");
                }
            }
            if (subtree.isLeaf()) {
                numWords++;
                words.add(subtree.value());
            } else if (subtree.isPreTerminal()) {
                numTags++;
                tags.incrementCount(subtree.value());
                if (tlp != null && tlp.isPunctuationTag(subtree.value())) {
                    puncts.incrementCount(subtree.firstChild().value());
                }
            } else if (subtree.isPhrasal()) {
                boolean hasLeafChild = false;
                for (Tree kt : subtree.children()) {
                    if (kt.isLeaf()) {
                        hasLeafChild = true;
                    }
                }
                if (hasLeafChild) {
                    numPreTerminalWithMultipleChildren++;
                    if (preTerminalMultipleChildrenEg == null) {
                        preTerminalMultipleChildrenEg = subtree;
                    }
                }
                cats.incrementCount(subtree.value());
            } else {
                throw new IllegalStateException("Treebank: Bad tree in treebank!: " + subtree);
            }
        }
    }
    StringWriter sw = new StringWriter(2000);
    PrintWriter pw = new PrintWriter(sw);
    NumberFormat nf = NumberFormat.getNumberInstance();
    nf.setMaximumFractionDigits(0);
    pw.println("Treebank has " + numTrees + " trees (" + numTreesLE40 + " of length <= 40) and " + numWords + " words (tokens)");
    if (numTrees > 0) {
        if (numTags != numWords) {
            pw.println("  Warning! numTags differs and is " + numTags);
        }
        if (roots.size() == 1) {
            String root = (String) roots.keySet().toArray()[0];
            pw.println("  The root category is: " + root);
        } else {
            pw.println("  Warning! " + roots.size() + " different roots in treebank: " + Counters.toString(roots, nf));
        }
        if (numNonUnaryRoots > 0) {
            pw.print("  Warning! " + numNonUnaryRoots + " trees without unary initial rewrite.  ");
            if (numNonUnaryRoots > 100) {
                pw.print("First 100 ");
            }
            pw.println("Rewrites: " + Counters.toString(nonUnaries, nf));
            pw.println("    Example: " + nonUnaryEg);
        }
        if (numUnenclosedLeaves > 0 || numLeaves > 0 || numNonPhrasal > 0) {
            pw.println("  Warning! Non-phrasal trees: " + numUnenclosedLeaves + " bare leaves; " + numLeaves + " root rewrites as leaf; and " + numNonPhrasal + " root rewrites as tagged word");
            if (numLeaves > 0) {
                pw.println("  Example bad root rewrites as leaf: " + leafEg);
            }
            if (numNonPhrasal > 0) {
                pw.println("  Example bad root rewrites as tagged word: " + rootRewritesAsTaggedWordEg);
            }
        }
        if (numNullLabel > 0) {
            pw.println("  Warning!  " + numNullLabel + " tree nodes with null or empty string labels, e.g.:");
            pw.println("    " + nullLabelEg);
        }
        if (numPreTerminalWithMultipleChildren > 0) {
            pw.println("  Warning! " + numPreTerminalWithMultipleChildren + " preterminal nodes with multiple children.");
            pw.println("    Example: " + preTerminalMultipleChildrenEg);
        }
        pw.println("  Sentences range from " + shortestSentence + " to " + longestSentence + " words, with an average length of " + (((numWords * 100) / numTrees) / 100.0) + " words.");
        pw.println("  " + cats.size() + " phrasal category types, " + tags.size() + " tag types, and " + words.size() + " word types");
        String[] empties = { "*", "0", "*T*", "*RNR*", "*U*", "*?*", "*EXP*", "*ICH*", "*NOT*", "*PPA*", "*OP*", "*pro*", "*PRO*" };
        // What a dopey choice using 0 as an empty element name!!
        // The problem with the below is that words aren't turned into a basic
        // category, but empties commonly are indexed....  Would need to look
        // for them with a suffix of -[0-9]+
        Set<String> knownEmpties = Generics.newHashSet(Arrays.asList(empties));
        Set<String> emptiesIntersection = Sets.intersection(words, knownEmpties);
        if (!emptiesIntersection.isEmpty()) {
            pw.println("  Caution! " + emptiesIntersection.size() + " word types are known empty elements: " + emptiesIntersection);
        }
        Set<String> joint = Sets.intersection(cats.keySet(), tags.keySet());
        if (!joint.isEmpty()) {
            pw.println("  Warning! " + joint.size() + " items are tags and categories: " + joint);
        }
        for (String cat : cats.keySet()) {
            if (cat != null && cat.contains("@")) {
                pw.println("  Warning!!  Stanford Parser does not work with categories containing '@' like: " + cat);
                break;
            }
        }
        for (String cat : tags.keySet()) {
            if (cat != null && cat.contains("@")) {
                pw.println("  Warning!!  Stanford Parser does not work with tags containing '@' like: " + cat);
                break;
            }
        }
        pw.println("    Cats: " + Counters.toString(cats, nf));
        pw.println("    Tags: " + Counters.toString(tags, nf));
        pw.println("    " + starts.size() + " start categories: " + Counters.toString(starts, nf));
        if (!puncts.isEmpty()) {
            pw.println("    Puncts: " + Counters.toString(puncts, nf));
        }
    }
    return sw.toString();
}
Also used : StringLabel(edu.stanford.nlp.ling.StringLabel) Label(edu.stanford.nlp.ling.Label) StringLabel(edu.stanford.nlp.ling.StringLabel) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) NumberFormat(java.text.NumberFormat)

Example 4 with StringLabel

use of edu.stanford.nlp.ling.StringLabel in project CoreNLP by stanfordnlp.

the class ChineseCharacterBasedLexiconTraining method main.

public static void main(String[] args) throws IOException {
    Map<String, Integer> flagsToNumArgs = Generics.newHashMap();
    flagsToNumArgs.put("-parser", Integer.valueOf(3));
    flagsToNumArgs.put("-lex", Integer.valueOf(3));
    flagsToNumArgs.put("-test", Integer.valueOf(2));
    flagsToNumArgs.put("-out", Integer.valueOf(1));
    flagsToNumArgs.put("-lengthPenalty", Integer.valueOf(1));
    flagsToNumArgs.put("-penaltyType", Integer.valueOf(1));
    flagsToNumArgs.put("-maxLength", Integer.valueOf(1));
    flagsToNumArgs.put("-stats", Integer.valueOf(2));
    Map<String, String[]> argMap = StringUtils.argsToMap(args, flagsToNumArgs);
    boolean eval = argMap.containsKey("-eval");
    PrintWriter pw = null;
    if (argMap.containsKey("-out")) {
        pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream((argMap.get("-out"))[0]), "GB18030"), true);
    }
    log.info("ChineseCharacterBasedLexicon called with args:");
    ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();
    for (int i = 0; i < args.length; i++) {
        ctpp.setOptionFlag(args, i);
        log.info(" " + args[i]);
    }
    log.info();
    Options op = new Options(ctpp);
    if (argMap.containsKey("-stats")) {
        String[] statArgs = (argMap.get("-stats"));
        MemoryTreebank rawTrainTreebank = op.tlpParams.memoryTreebank();
        FileFilter trainFilt = new NumberRangesFileFilter(statArgs[1], false);
        rawTrainTreebank.loadPath(new File(statArgs[0]), trainFilt);
        log.info("Done reading trees.");
        MemoryTreebank trainTreebank;
        if (argMap.containsKey("-annotate")) {
            trainTreebank = new MemoryTreebank();
            TreeAnnotator annotator = new TreeAnnotator(ctpp.headFinder(), ctpp, op);
            for (Tree tree : rawTrainTreebank) {
                trainTreebank.add(annotator.transformTree(tree));
            }
            log.info("Done annotating trees.");
        } else {
            trainTreebank = rawTrainTreebank;
        }
        printStats(trainTreebank, pw);
        System.exit(0);
    }
    int maxLength = 1000000;
    //    Test.verbose = true;
    if (argMap.containsKey("-norm")) {
        op.testOptions.lengthNormalization = true;
    }
    if (argMap.containsKey("-maxLength")) {
        maxLength = Integer.parseInt((argMap.get("-maxLength"))[0]);
    }
    op.testOptions.maxLength = 120;
    boolean combo = argMap.containsKey("-combo");
    if (combo) {
        ctpp.useCharacterBasedLexicon = true;
        op.testOptions.maxSpanForTags = 10;
        op.doDep = false;
        op.dcTags = false;
    }
    LexicalizedParser lp = null;
    Lexicon lex = null;
    if (argMap.containsKey("-parser")) {
        String[] parserArgs = (argMap.get("-parser"));
        if (parserArgs.length > 1) {
            FileFilter trainFilt = new NumberRangesFileFilter(parserArgs[1], false);
            lp = LexicalizedParser.trainFromTreebank(parserArgs[0], trainFilt, op);
            if (parserArgs.length == 3) {
                String filename = parserArgs[2];
                log.info("Writing parser in serialized format to file " + filename + " ");
                System.err.flush();
                ObjectOutputStream out = IOUtils.writeStreamFromString(filename);
                out.writeObject(lp);
                out.close();
                log.info("done.");
            }
        } else {
            String parserFile = parserArgs[0];
            lp = LexicalizedParser.loadModel(parserFile, op);
        }
        lex = lp.getLexicon();
        op = lp.getOp();
        ctpp = (ChineseTreebankParserParams) op.tlpParams;
    }
    if (argMap.containsKey("-rad")) {
        ctpp.useUnknownCharacterModel = true;
    }
    if (argMap.containsKey("-lengthPenalty")) {
        ctpp.lengthPenalty = Double.parseDouble((argMap.get("-lengthPenalty"))[0]);
    }
    if (argMap.containsKey("-penaltyType")) {
        ctpp.penaltyType = Integer.parseInt((argMap.get("-penaltyType"))[0]);
    }
    if (argMap.containsKey("-lex")) {
        String[] lexArgs = (argMap.get("-lex"));
        if (lexArgs.length > 1) {
            Index<String> wordIndex = new HashIndex<>();
            Index<String> tagIndex = new HashIndex<>();
            lex = ctpp.lex(op, wordIndex, tagIndex);
            MemoryTreebank rawTrainTreebank = op.tlpParams.memoryTreebank();
            FileFilter trainFilt = new NumberRangesFileFilter(lexArgs[1], false);
            rawTrainTreebank.loadPath(new File(lexArgs[0]), trainFilt);
            log.info("Done reading trees.");
            MemoryTreebank trainTreebank;
            if (argMap.containsKey("-annotate")) {
                trainTreebank = new MemoryTreebank();
                TreeAnnotator annotator = new TreeAnnotator(ctpp.headFinder(), ctpp, op);
                for (Tree tree : rawTrainTreebank) {
                    tree = annotator.transformTree(tree);
                    trainTreebank.add(tree);
                }
                log.info("Done annotating trees.");
            } else {
                trainTreebank = rawTrainTreebank;
            }
            lex.initializeTraining(trainTreebank.size());
            lex.train(trainTreebank);
            lex.finishTraining();
            log.info("Done training lexicon.");
            if (lexArgs.length == 3) {
                String filename = lexArgs.length == 3 ? lexArgs[2] : "parsers/chineseCharLex.ser.gz";
                log.info("Writing lexicon in serialized format to file " + filename + " ");
                System.err.flush();
                ObjectOutputStream out = IOUtils.writeStreamFromString(filename);
                out.writeObject(lex);
                out.close();
                log.info("done.");
            }
        } else {
            String lexFile = lexArgs.length == 1 ? lexArgs[0] : "parsers/chineseCharLex.ser.gz";
            log.info("Reading Lexicon from file " + lexFile);
            ObjectInputStream in = IOUtils.readStreamFromString(lexFile);
            try {
                lex = (Lexicon) in.readObject();
            } catch (ClassNotFoundException e) {
                throw new RuntimeException("Bad serialized file: " + lexFile);
            }
            in.close();
        }
    }
    if (argMap.containsKey("-test")) {
        boolean segmentWords = ctpp.segment;
        boolean parse = lp != null;
        assert (parse || segmentWords);
        //      WordCatConstituent.collinizeWords = argMap.containsKey("-collinizeWords");
        //      WordCatConstituent.collinizeTags = argMap.containsKey("-collinizeTags");
        WordSegmenter seg = null;
        if (segmentWords) {
            seg = (WordSegmenter) lex;
        }
        String[] testArgs = (argMap.get("-test"));
        MemoryTreebank testTreebank = op.tlpParams.memoryTreebank();
        FileFilter testFilt = new NumberRangesFileFilter(testArgs[1], false);
        testTreebank.loadPath(new File(testArgs[0]), testFilt);
        TreeTransformer subcategoryStripper = op.tlpParams.subcategoryStripper();
        TreeTransformer collinizer = ctpp.collinizer();
        WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
        WordCatEqualityChecker eqcheck = new WordCatEqualityChecker();
        EquivalenceClassEval basicEval = new EquivalenceClassEval(eqclass, eqcheck, "basic");
        EquivalenceClassEval collinsEval = new EquivalenceClassEval(eqclass, eqcheck, "collinized");
        List<String> evalTypes = new ArrayList<>(3);
        boolean goodPOS = false;
        if (segmentWords) {
            evalTypes.add(WordCatConstituent.wordType);
            if (ctpp.segmentMarkov && !parse) {
                evalTypes.add(WordCatConstituent.tagType);
                goodPOS = true;
            }
        }
        if (parse) {
            evalTypes.add(WordCatConstituent.tagType);
            evalTypes.add(WordCatConstituent.catType);
            if (combo) {
                evalTypes.add(WordCatConstituent.wordType);
                goodPOS = true;
            }
        }
        TreeToBracketProcessor proc = new TreeToBracketProcessor(evalTypes);
        log.info("Testing...");
        for (Tree goldTop : testTreebank) {
            Tree gold = goldTop.firstChild();
            List<HasWord> goldSentence = gold.yieldHasWord();
            if (goldSentence.size() > maxLength) {
                log.info("Skipping sentence; too long: " + goldSentence.size());
                continue;
            } else {
                log.info("Processing sentence; length: " + goldSentence.size());
            }
            List<HasWord> s;
            if (segmentWords) {
                StringBuilder goldCharBuf = new StringBuilder();
                for (HasWord aGoldSentence : goldSentence) {
                    StringLabel word = (StringLabel) aGoldSentence;
                    goldCharBuf.append(word.value());
                }
                String goldChars = goldCharBuf.toString();
                s = seg.segment(goldChars);
            } else {
                s = goldSentence;
            }
            Tree tree;
            if (parse) {
                tree = lp.parseTree(s);
                if (tree == null) {
                    throw new RuntimeException("PARSER RETURNED NULL!!!");
                }
            } else {
                tree = Trees.toFlatTree(s);
                tree = subcategoryStripper.transformTree(tree);
            }
            if (pw != null) {
                if (parse) {
                    tree.pennPrint(pw);
                } else {
                    Iterator sentIter = s.iterator();
                    for (; ; ) {
                        Word word = (Word) sentIter.next();
                        pw.print(word.word());
                        if (sentIter.hasNext()) {
                            pw.print(" ");
                        } else {
                            break;
                        }
                    }
                }
                pw.println();
            }
            if (eval) {
                Collection ourBrackets, goldBrackets;
                ourBrackets = proc.allBrackets(tree);
                goldBrackets = proc.allBrackets(gold);
                if (goodPOS) {
                    ourBrackets.addAll(proc.commonWordTagTypeBrackets(tree, gold));
                    goldBrackets.addAll(proc.commonWordTagTypeBrackets(gold, tree));
                }
                basicEval.eval(ourBrackets, goldBrackets);
                System.out.println("\nScores:");
                basicEval.displayLast();
                Tree collinsTree = collinizer.transformTree(tree);
                Tree collinsGold = collinizer.transformTree(gold);
                ourBrackets = proc.allBrackets(collinsTree);
                goldBrackets = proc.allBrackets(collinsGold);
                if (goodPOS) {
                    ourBrackets.addAll(proc.commonWordTagTypeBrackets(collinsTree, collinsGold));
                    goldBrackets.addAll(proc.commonWordTagTypeBrackets(collinsGold, collinsTree));
                }
                collinsEval.eval(ourBrackets, goldBrackets);
                System.out.println("\nCollinized scores:");
                collinsEval.displayLast();
                System.out.println();
            }
        }
        if (eval) {
            basicEval.display();
            System.out.println();
            collinsEval.display();
        }
    }
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) TaggedWord(edu.stanford.nlp.ling.TaggedWord) Word(edu.stanford.nlp.ling.Word) NumberRangesFileFilter(edu.stanford.nlp.io.NumberRangesFileFilter) ArrayList(java.util.ArrayList) ObjectOutputStream(java.io.ObjectOutputStream) StringLabel(edu.stanford.nlp.ling.StringLabel) TreeToBracketProcessor(edu.stanford.nlp.trees.TreeToBracketProcessor) WordSegmenter(edu.stanford.nlp.process.WordSegmenter) Iterator(java.util.Iterator) Tree(edu.stanford.nlp.trees.Tree) MemoryTreebank(edu.stanford.nlp.trees.MemoryTreebank) NumberRangesFileFilter(edu.stanford.nlp.io.NumberRangesFileFilter) FileFilter(java.io.FileFilter) PrintWriter(java.io.PrintWriter) HasWord(edu.stanford.nlp.ling.HasWord) WordCatEqualityChecker(edu.stanford.nlp.trees.WordCatEqualityChecker) HashIndex(edu.stanford.nlp.util.HashIndex) WordCatEquivalenceClasser(edu.stanford.nlp.trees.WordCatEquivalenceClasser) FileOutputStream(java.io.FileOutputStream) Collection(java.util.Collection) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) TreeTransformer(edu.stanford.nlp.trees.TreeTransformer) ObjectInputStream(java.io.ObjectInputStream)

Example 5 with StringLabel

use of edu.stanford.nlp.ling.StringLabel in project CoreNLP by stanfordnlp.

the class ConstituentTest method testConstituents.

public void testConstituents() {
    Set<Constituent> set = new HashSet<Constituent>();
    Constituent c1 = new LabeledScoredConstituent(9, 15, new StringLabel("S"), 0);
    Constituent c2 = new LabeledScoredConstituent(9, 15, new StringLabel("VP"), 0);
    //  System.err.println("c1 "+c1+" c2 "+c2+" equal? "+c1.equals(c2));
    assertNotSame(c1, c2);
    set.add(c1);
    //  System.err.println("Set has c1? "+set.contains(c1));
    // System.err.println("Set has c2? "+set.contains(c2));
    assertTrue(set.contains(c1));
    assertFalse(set.contains(c2));
    set.add(c2);
    //  System.err.println("Set has c1? "+set.contains(c1));
    //  System.err.println("Set has c2? "+set.contains(c2));
    assertTrue(set.contains(c1));
    assertTrue(set.contains(c2));
    //   System.err.println("Set size is " + set.size());
    assertTrue(set.size() == 2);
    for (Constituent c : set) {
        //   System.err.println(" "+c+" is c1? "+c.equals(c1)+" or "+c1.equals(c)+" is c2? "+c.equals(c2)+" or "+c2.equals(c));
        assertTrue((c.equals(c1) || c.equals(c2)));
    }
// there used to be a parallel test for Constituents in TreeSets,
// but given that Constituents do not implement Comparable(),
// this test just always failed.
}
Also used : StringLabel(edu.stanford.nlp.ling.StringLabel)

Aggregations

StringLabel (edu.stanford.nlp.ling.StringLabel)5 Tree (edu.stanford.nlp.trees.Tree)3 ArrayList (java.util.ArrayList)3 Label (edu.stanford.nlp.ling.Label)2 NumberRangesFileFilter (edu.stanford.nlp.io.NumberRangesFileFilter)1 HasWord (edu.stanford.nlp.ling.HasWord)1 TaggedWord (edu.stanford.nlp.ling.TaggedWord)1 Word (edu.stanford.nlp.ling.Word)1 WordSegmenter (edu.stanford.nlp.process.WordSegmenter)1 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)1 MemoryTreebank (edu.stanford.nlp.trees.MemoryTreebank)1 TreeToBracketProcessor (edu.stanford.nlp.trees.TreeToBracketProcessor)1 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)1 WordCatEqualityChecker (edu.stanford.nlp.trees.WordCatEqualityChecker)1 WordCatEquivalenceClasser (edu.stanford.nlp.trees.WordCatEquivalenceClasser)1 HashIndex (edu.stanford.nlp.util.HashIndex)1 File (java.io.File)1 FileFilter (java.io.FileFilter)1 FileOutputStream (java.io.FileOutputStream)1 ObjectInputStream (java.io.ObjectInputStream)1