use of edu.stanford.nlp.util.CollectionValuedMap in project CoreNLP by stanfordnlp.
the class ExtractPhraseFromPattern method printSubGraph.
//Here, the index (startIndex, endIndex) seems to be inclusive of the endIndex
public void printSubGraph(SemanticGraph g, IndexedWord w, List<String> additionalCutOffRels, List<String> textTokens, Collection<String> listOfOutput, Collection<IntPair> listOfOutputIndices, List<IndexedWord> seenNodes, List<IndexedWord> doNotAddThese, boolean findSubTrees, Collection<ExtractedPhrase> extractedPhrases, SemgrexPattern pattern, Function<CoreLabel, Boolean> acceptWord) {
try {
if (seenNodes.contains(w))
return;
seenNodes.add(w);
if (doNotAddThese.contains(w))
return;
List<IndexedWord> andNodes = new ArrayList<>();
descendantsWithReln(g, w, "conj_and", new ArrayList<>(), andNodes);
for (IndexedWord w1 : andNodes) {
printSubGraph(g, w1, additionalCutOffRels, textTokens, listOfOutput, listOfOutputIndices, seenNodes, doNotAddThese, findSubTrees, extractedPhrases, pattern, acceptWord);
}
doNotAddThese.addAll(andNodes);
List<String> allCutOffRels = new ArrayList<>();
if (additionalCutOffRels != null)
allCutOffRels.addAll(additionalCutOffRels);
allCutOffRels.addAll(cutoffRelations);
CollectionValuedMap<Integer, String> featPerToken = new CollectionValuedMap<>();
Collection<String> feat = new ArrayList<>();
GetPatternsFromDataMultiClass.getFeatures(g, w, true, feat, null);
Set<IndexedWord> words = descendants(g, w, allCutOffRels, doNotAddThese, ignoreCommonTags, acceptWord, featPerToken);
//System.out.println("words are " + words);
if (words.size() > 0) {
int min = Integer.MAX_VALUE, max = -1;
for (IndexedWord word : words) {
if (word.index() < min)
min = word.index();
if (word.index() > max)
max = word.index();
}
IntPair indices;
// phrase = StringUtils.join(ph.values(), " ");
if ((max - min + 1) > maxPhraseLength) {
max = min + maxPhraseLength - 1;
}
indices = new IntPair(min - 1, max - 1);
String phrase = StringUtils.join(textTokens.subList(min - 1, max), " ");
phrase = phrase.trim();
feat.add("LENGTH-" + (max - min + 1));
for (int i = min; i <= max; i++) feat.addAll(featPerToken.get(i));
//System.out.println("phrase is " + phrase + " index is " + indices + " and maxphraselength is " + maxPhraseLength + " and descendentset is " + words);
ExtractedPhrase extractedPh = new ExtractedPhrase(min - 1, max - 1, pattern, phrase, Counters.asCounter(feat));
if (!listOfOutput.contains(phrase) && !doNotAddThese.contains(phrase)) {
// if (sentElem != null) {
// Element node = new Element(elemString, curNS);
// node.addContent(phrase);
// sentElem.addContent(node);
// }
listOfOutput.add(phrase);
if (!listOfOutputIndices.contains(indices)) {
listOfOutputIndices.add(indices);
extractedPhrases.add(extractedPh);
}
if (findSubTrees == true) {
for (IndexedWord word : words) if (!seenNodes.contains(word))
printSubGraph(g, word, additionalCutOffRels, textTokens, listOfOutput, listOfOutputIndices, seenNodes, doNotAddThese, findSubTrees, extractedPhrases, pattern, acceptWord);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
Aggregations