use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.
the class NegraPennCollinizer method transformTree.
public Tree transformTree(Tree tree) {
Label l = tree.label();
if (tree.isLeaf()) {
return tf.newLeaf(l);
}
String s = l.value();
s = tlpp.treebankLanguagePack().basicCategory(s);
if (deletePunct) {
// since it ignores punctuation anyway
if (tree.isPreTerminal() && tlpp.treebankLanguagePack().isEvalBIgnoredPunctuationTag(s)) {
return null;
}
}
// TEMPORARY: eliminate the TOPP constituent
if (tree.children()[0].label().value().equals("TOPP")) {
log.info("Found a TOPP");
tree.setChildren(tree.children()[0].children());
}
// Negra has lots of non-unary roots; delete unary roots
if (tlpp.treebankLanguagePack().isStartSymbol(s) && tree.numChildren() == 1) {
// NB: This deletes the boundary symbol, which is in the tree!
return transformTree(tree.getChild(0));
}
List<Tree> children = new ArrayList<>();
for (int cNum = 0, numC = tree.numChildren(); cNum < numC; cNum++) {
Tree child = tree.getChild(cNum);
Tree newChild = transformTree(child);
if (newChild != null) {
children.add(newChild);
}
}
if (children.isEmpty()) {
return null;
}
return tf.newTreeNode(new StringLabel(s), children);
}
use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.
the class NodePruner method helper.
private List<Tree> helper(List<Tree> treeList, int start) {
List<Tree> newTreeList = new ArrayList<>(treeList.size());
for (Tree tree : treeList) {
int end = start + tree.yield().size();
newTreeList.add(prune(tree, start));
start = end;
}
return newTreeList;
}
use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.
the class HTKLatticeReader method main.
public static void main(String[] args) throws Exception {
boolean mergeType = USESUM;
boolean prettyPrint = true;
boolean debug = false;
String parseGram = null;
String filename = args[0];
for (int i = 1; i < args.length; i++) {
if (args[i].equalsIgnoreCase("-debug")) {
debug = true;
} else if (args[i].equalsIgnoreCase("-useMax")) {
mergeType = USEMAX;
} else if (args[i].equalsIgnoreCase("-useSum")) {
mergeType = USESUM;
} else if (args[i].equalsIgnoreCase("-noPrettyPrint")) {
prettyPrint = false;
} else if (args[i].equalsIgnoreCase("-parser")) {
parseGram = args[++i];
} else {
log.info("unrecognized flag: " + args[i]);
log.info("usage: java LatticeReader <file> [ -debug ] [ -useMax ] [ -useSum ] [ -noPrettyPrint ] [ -parser parserFile ]");
System.exit(0);
}
}
HTKLatticeReader lr = new HTKLatticeReader(filename, mergeType, debug, prettyPrint);
if (parseGram != null) {
Options op = new Options();
// TODO: these options all get clobbered by the Options object
// stored in the LexicalizedParser (unless it's a text file?)
op.doDep = false;
op.testOptions.maxLength = 80;
op.testOptions.maxSpanForTags = 80;
LexicalizedParser lp = LexicalizedParser.loadModel(parseGram, op);
// TODO: somehow merge this into ParserQuery instead of being
// LexicalizedParserQuery specific
LexicalizedParserQuery pq = lp.lexicalizedParserQuery();
pq.parse(lr);
Tree t = pq.getBestParse();
t.pennPrint();
}
//lr.processLattice();
}
use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.
the class CollinsDepEval method main.
/**
*
* @param args
*/
public static void main(String[] args) {
if (args.length < MIN_ARGS) {
log.info(usage());
System.exit(-1);
}
Properties options = StringUtils.argsToProperties(args, optionArgDefs());
boolean VERBOSE = PropertiesUtils.getBool(options, "v", false);
Language LANGUAGE = PropertiesUtils.get(options, "l", Language.English, Language.class);
int MAX_GOLD_YIELD = PropertiesUtils.getInt(options, "g", Integer.MAX_VALUE);
int MAX_GUESS_YIELD = PropertiesUtils.getInt(options, "y", Integer.MAX_VALUE);
String[] parsedArgs = options.getProperty("", "").split("\\s+");
if (parsedArgs.length != MIN_ARGS) {
log.info(usage());
System.exit(-1);
}
File goldFile = new File(parsedArgs[0]);
File guessFile = new File(parsedArgs[1]);
final TreebankLangParserParams tlpp = LANGUAGE.params;
final PrintWriter pwOut = tlpp.pw();
final Treebank guessTreebank = tlpp.diskTreebank();
guessTreebank.loadPath(guessFile);
pwOut.println("GUESS TREEBANK:");
pwOut.println(guessTreebank.textualSummary());
final Treebank goldTreebank = tlpp.diskTreebank();
goldTreebank.loadPath(goldFile);
pwOut.println("GOLD TREEBANK:");
pwOut.println(goldTreebank.textualSummary());
final CollinsDepEval depEval = new CollinsDepEval("CollinsDep", true, tlpp.headFinder(), tlpp.treebankLanguagePack().startSymbol());
final TreeTransformer tc = tlpp.collinizer();
//PennTreeReader skips over null/malformed parses. So when the yields of the gold/guess trees
//don't match, we need to keep looking for the next gold tree that matches.
//The evalb ref implementation differs slightly as it expects one tree per line. It assigns
//status as follows:
//
// 0 - Ok (yields match)
// 1 - length mismatch
// 2 - null parse e.g. (()).
//
//In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
final Iterator<Tree> goldItr = goldTreebank.iterator();
int goldLineId = 0;
int skippedGuessTrees = 0;
for (final Tree guess : guessTreebank) {
final Tree evalGuess = tc.transformTree(guess);
if (guess.yield().size() > MAX_GUESS_YIELD) {
skippedGuessTrees++;
continue;
}
boolean doneEval = false;
while (goldItr.hasNext() && !doneEval) {
final Tree gold = goldItr.next();
final Tree evalGold = tc.transformTree(gold);
goldLineId++;
if (gold.yield().size() > MAX_GOLD_YIELD) {
continue;
} else if (evalGold.yield().size() != evalGuess.yield().size()) {
pwOut.println("Yield mismatch at gold line " + goldLineId);
skippedGuessTrees++;
//Default evalb behavior -- skip this guess tree
break;
}
depEval.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
//Move to the next guess parse
doneEval = true;
}
}
pwOut.println("================================================================================");
if (skippedGuessTrees != 0)
pwOut.printf("%s %d guess trees\n", ((MAX_GUESS_YIELD < Integer.MAX_VALUE) ? "Skipped" : "Unable to evaluate"), skippedGuessTrees);
depEval.display(true, pwOut);
pwOut.close();
}
use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.
the class LeafAncestorEval method main.
/**
* Execute with no arguments for usage.
*/
public static void main(String[] args) {
if (!validateCommandLine(args)) {
log.info(USAGE);
System.exit(-1);
}
final TreebankLangParserParams tlpp = LANGUAGE.params;
final PrintWriter pwOut = tlpp.pw();
final Treebank guessTreebank = tlpp.diskTreebank();
guessTreebank.loadPath(guessFile);
pwOut.println("GUESS TREEBANK:");
pwOut.println(guessTreebank.textualSummary());
final Treebank goldTreebank = tlpp.diskTreebank();
goldTreebank.loadPath(goldFile);
pwOut.println("GOLD TREEBANK:");
pwOut.println(goldTreebank.textualSummary());
final LeafAncestorEval metric = new LeafAncestorEval("LeafAncestor");
final TreeTransformer tc = tlpp.collinizer();
//The evalb ref implementation assigns status for each tree pair as follows:
//
// 0 - Ok (yields match)
// 1 - length mismatch
// 2 - null parse e.g. (()).
//
//In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
final Iterator<Tree> goldItr = goldTreebank.iterator();
final Iterator<Tree> guessItr = guessTreebank.iterator();
int goldLineId = 0;
int guessLineId = 0;
int skippedGuessTrees = 0;
while (guessItr.hasNext() && goldItr.hasNext()) {
Tree guessTree = guessItr.next();
List<Label> guessYield = guessTree.yield();
guessLineId++;
Tree goldTree = goldItr.next();
List<Label> goldYield = goldTree.yield();
goldLineId++;
// Check that we should evaluate this tree
if (goldYield.size() > MAX_GOLD_YIELD) {
skippedGuessTrees++;
continue;
}
// Only trees with equal yields can be evaluated
if (goldYield.size() != guessYield.size()) {
pwOut.printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.size(), guessYield.size(), goldLineId, guessLineId);
skippedGuessTrees++;
continue;
}
final Tree evalGuess = tc.transformTree(guessTree);
final Tree evalGold = tc.transformTree(goldTree);
metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
}
if (guessItr.hasNext() || goldItr.hasNext()) {
System.err.printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
}
pwOut.println("================================================================================");
if (skippedGuessTrees != 0)
pwOut.printf("%s %d guess trees%n", "Unable to evaluate", skippedGuessTrees);
metric.display(true, pwOut);
pwOut.close();
}
Aggregations