use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.
the class LexicalizedParserQuery method getBestPCFGParse.
public Tree getBestPCFGParse(boolean stripSubcategories) {
if (pparser == null || parseSkipped || parseUnparsable) {
return null;
}
Tree binaryTree = pparser.getBestParse();
if (binaryTree == null) {
return null;
}
Tree t = debinarizer.transformTree(binaryTree);
if (stripSubcategories) {
t = subcategoryStripper.transformTree(t);
}
restoreOriginalWords(t);
return t;
}
use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.
the class Tdiff method main.
/**
* @param args
*/
public static void main(String[] args) {
if (args.length != 2) {
System.out.println("Usage: java Tdiff tree1 tree2");
return;
}
File tree1Path = new File(args[0]);
File tree2Path = new File(args[1]);
try {
TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
TreeReader tR1 = trf.newTreeReader(new BufferedReader(new FileReader(tree1Path)));
TreeReader tR2 = trf.newTreeReader(new BufferedReader(new FileReader(tree2Path)));
Tree t1 = tR1.readTree();
Tree t2 = tR2.readTree();
Set<Constituent> t1Diff = markDiff(t1, t2);
System.out.println(t2.pennString());
System.out.println();
for (Constituent c : t1Diff) System.out.println(c);
} catch (FileNotFoundException e) {
log.info("File not found!");
} catch (IOException e) {
log.info("Unable to read file!");
}
}
use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.
the class Tdiff method markDiff.
/**
* Marks bracketings in t2 not in t1 using the DoAnnotation field.
* Returns a list of brackets in t1 not in t2.
*
* @param t1
* @param t2
* @return A list of brackets in t1 not in t2;
*/
public static Set<Constituent> markDiff(Tree t1, Tree t2) {
// if (t1 == null || t2 == null || ! t1.value().equals(t2.value())) {
// System.err.printf("t1 value is %s; t2 value is %s; t1 is %s t2 is %s", t1.value(), t2.value(), t1, t2);
// }
Set<Constituent> t1Labels = (t1 == null) ? Generics.<Constituent>newHashSet() : t1.constituents(cf);
if (t2 != null) {
t2.setSpans();
for (Tree subTree : t2) {
if (subTree.isPhrasal()) {
IntPair span = subTree.getSpan();
Constituent c = cf.newConstituent(span.getSource(), span.getTarget(), subTree.label(), 0.0);
if (t1Labels.contains(c)) {
t1Labels.remove(c);
((CoreLabel) subTree.label()).set(CoreAnnotations.DoAnnotation.class, false);
} else {
((CoreLabel) subTree.label()).set(CoreAnnotations.DoAnnotation.class, true);
}
}
}
}
return t1Labels;
}
use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.
the class ArabicTreeNormalizer method normalizeWholeTree.
@Override
public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
tree = tree.prune(emptyFilter, tf).spliceOut(aOverAFilter, tf);
for (Tree t : tree) {
if (t.isLeaf()) {
//specified by HasContext.
if (t.value().contains(MorphoFeatureSpecification.MORPHO_MARK)) {
String[] toks = t.value().split(MorphoFeatureSpecification.MORPHO_MARK);
if (toks.length != 2)
System.err.printf("%s: Word contains malformed morph annotation: %s%n", this.getClass().getName(), t.value());
else if (t.label() instanceof CoreLabel) {
((CoreLabel) t.label()).setValue(toks[0].trim().intern());
((CoreLabel) t.label()).setWord(toks[0].trim().intern());
Pair<String, String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(toks[0], toks[1]);
String lemma = lemmaMorph.first();
String morphAnalysis = lemmaMorph.second();
if (lemma.equals(toks[0])) {
((CoreLabel) t.label()).setOriginalText(toks[1].trim().intern());
} else {
// TODO(speneg): Does this help?
String newLemma = lexMapper.map(null, lemma);
if (newLemma == null || newLemma.trim().length() == 0) {
newLemma = lemma;
}
String newMorphAnalysis = newLemma + MorphoFeatureSpecification.LEMMA_MARK + morphAnalysis;
((CoreLabel) t.label()).setOriginalText(newMorphAnalysis.intern());
}
} else {
System.err.printf("%s: Cannot store morph analysis in non-CoreLabel: %s%n", this.getClass().getName(), t.label().getClass().getName());
}
}
} else if (t.isPreTerminal()) {
if (t.value() == null || t.value().equals("")) {
System.err.printf("%s: missing tag for\n%s\n", this.getClass().getName(), t.pennString());
} else if (t.label() instanceof HasTag) {
((HasTag) t.label()).setTag(t.value());
}
} else {
//Phrasal nodes
// there are some nodes "/" missing preterminals. We'll splice in a tag for these.
int nk = t.numChildren();
List<Tree> newKids = new ArrayList<>(nk);
for (int j = 0; j < nk; j++) {
Tree child = t.getChild(j);
if (child.isLeaf()) {
System.err.printf("%s: Splicing in DUMMYTAG for%n%s%n", this.getClass().getName(), t.toString());
newKids.add(tf.newTreeNode("DUMMYTAG", Collections.singletonList(child)));
} else {
newKids.add(child);
}
}
t.setChildren(newKids);
}
}
// special global coding for moving PRD annotation from constituent to verb tag.
if (markPRDverb) {
TregexMatcher m = prdVerbPattern.matcher(tree);
Tree match = null;
while (m.find()) {
if (m.getMatch() != match) {
match = m.getMatch();
match.label().setValue(match.label().value() + "-PRDverb");
Tree prd = m.getNode("prd");
prd.label().setValue(super.normalizeNonterminal(prd.label().value()));
}
}
}
//Mark *only* subjects in verb-initial clauses
if (retainNPSbj) {
TregexMatcher m = npSbjPattern.matcher(tree);
while (m.find()) {
Tree match = m.getMatch();
match.label().setValue("NP");
}
}
if (tree.isPreTerminal()) {
// The whole tree is a bare tag: bad!
String val = tree.label().value();
if (val.equals("CC") || val.startsWith("PUNC") || val.equals("CONJ")) {
System.err.printf("%s: Bare tagged word being wrapped in FRAG\n%s\n", this.getClass().getName(), tree.pennString());
tree = tf.newTreeNode("FRAG", Collections.singletonList(tree));
} else {
System.err.printf("%s: Bare tagged word\n%s\n", this.getClass().getName(), tree.pennString());
}
}
//will return null. In this case, readers e.g. PennTreeReader will try to read the next tree.
while (tree != null && (tree.value() == null || tree.value().equals("")) && tree.numChildren() <= 1) tree = tree.firstChild();
if (tree != null && !tree.value().equals(rootLabel))
tree = tf.newTreeNode(rootLabel, Collections.singletonList(tree));
return tree;
}
use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.
the class FrenchXMLTreeReader method readTree.
public Tree readTree() {
Tree t = null;
while (t == null && sentences != null && sentIdx < sentences.getLength()) {
Node sentRoot = sentences.item(sentIdx++);
t = getTreeFromXML(sentRoot);
if (t != null) {
t = treeNormalizer.normalizeWholeTree(t, treeFactory);
if (t.label() instanceof CoreLabel) {
String ftbId = ((Element) sentRoot).getAttribute(ATTR_NUMBER);
((CoreLabel) t.label()).set(CoreAnnotations.SentenceIDAnnotation.class, ftbId);
}
}
}
return t;
}
Aggregations