Search in sources :

Example 6 with CollectionValuedMap

use of edu.stanford.nlp.util.CollectionValuedMap in project CoreNLP by stanfordnlp.

the class ExtractPhraseFromPattern method printSubGraph.

//Here, the index (startIndex, endIndex) seems to be inclusive of the endIndex
public void printSubGraph(SemanticGraph g, IndexedWord w, List<String> additionalCutOffRels, List<String> textTokens, Collection<String> listOfOutput, Collection<IntPair> listOfOutputIndices, List<IndexedWord> seenNodes, List<IndexedWord> doNotAddThese, boolean findSubTrees, Collection<ExtractedPhrase> extractedPhrases, SemgrexPattern pattern, Function<CoreLabel, Boolean> acceptWord) {
    try {
        if (seenNodes.contains(w))
            return;
        seenNodes.add(w);
        if (doNotAddThese.contains(w))
            return;
        List<IndexedWord> andNodes = new ArrayList<>();
        descendantsWithReln(g, w, "conj_and", new ArrayList<>(), andNodes);
        for (IndexedWord w1 : andNodes) {
            printSubGraph(g, w1, additionalCutOffRels, textTokens, listOfOutput, listOfOutputIndices, seenNodes, doNotAddThese, findSubTrees, extractedPhrases, pattern, acceptWord);
        }
        doNotAddThese.addAll(andNodes);
        List<String> allCutOffRels = new ArrayList<>();
        if (additionalCutOffRels != null)
            allCutOffRels.addAll(additionalCutOffRels);
        allCutOffRels.addAll(cutoffRelations);
        CollectionValuedMap<Integer, String> featPerToken = new CollectionValuedMap<>();
        Collection<String> feat = new ArrayList<>();
        GetPatternsFromDataMultiClass.getFeatures(g, w, true, feat, null);
        Set<IndexedWord> words = descendants(g, w, allCutOffRels, doNotAddThese, ignoreCommonTags, acceptWord, featPerToken);
        //System.out.println("words are " + words);
        if (words.size() > 0) {
            int min = Integer.MAX_VALUE, max = -1;
            for (IndexedWord word : words) {
                if (word.index() < min)
                    min = word.index();
                if (word.index() > max)
                    max = word.index();
            }
            IntPair indices;
            // phrase = StringUtils.join(ph.values(), " ");
            if ((max - min + 1) > maxPhraseLength) {
                max = min + maxPhraseLength - 1;
            }
            indices = new IntPair(min - 1, max - 1);
            String phrase = StringUtils.join(textTokens.subList(min - 1, max), " ");
            phrase = phrase.trim();
            feat.add("LENGTH-" + (max - min + 1));
            for (int i = min; i <= max; i++) feat.addAll(featPerToken.get(i));
            //System.out.println("phrase is " + phrase  + " index is " + indices + " and maxphraselength is " + maxPhraseLength + " and descendentset is " + words);
            ExtractedPhrase extractedPh = new ExtractedPhrase(min - 1, max - 1, pattern, phrase, Counters.asCounter(feat));
            if (!listOfOutput.contains(phrase) && !doNotAddThese.contains(phrase)) {
                //          if (sentElem != null) {
                //            Element node = new Element(elemString, curNS);
                //            node.addContent(phrase);
                //            sentElem.addContent(node);
                //          }
                listOfOutput.add(phrase);
                if (!listOfOutputIndices.contains(indices)) {
                    listOfOutputIndices.add(indices);
                    extractedPhrases.add(extractedPh);
                }
                if (findSubTrees == true) {
                    for (IndexedWord word : words) if (!seenNodes.contains(word))
                        printSubGraph(g, word, additionalCutOffRels, textTokens, listOfOutput, listOfOutputIndices, seenNodes, doNotAddThese, findSubTrees, extractedPhrases, pattern, acceptWord);
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : CollectionValuedMap(edu.stanford.nlp.util.CollectionValuedMap) ArrayList(java.util.ArrayList) IntPair(edu.stanford.nlp.util.IntPair) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Aggregations

CollectionValuedMap (edu.stanford.nlp.util.CollectionValuedMap)6 IntPair (edu.stanford.nlp.util.IntPair)3 CoreLabel (edu.stanford.nlp.ling.CoreLabel)2 TwoDimensionalCounter (edu.stanford.nlp.stats.TwoDimensionalCounter)2 CoreMap (edu.stanford.nlp.util.CoreMap)2 Triple (edu.stanford.nlp.util.Triple)2 ArrayList (java.util.ArrayList)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 IndexedWord (edu.stanford.nlp.ling.IndexedWord)1 SequenceMatchResult (edu.stanford.nlp.ling.tokensregex.SequenceMatchResult)1 TokenSequenceMatcher (edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher)1 TokenSequencePattern (edu.stanford.nlp.ling.tokensregex.TokenSequencePattern)1 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)1 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)1 Pair (edu.stanford.nlp.util.Pair)1 Collection (java.util.Collection)1 List (java.util.List)1 Map (java.util.Map)1