use of edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams in project CoreNLP by stanfordnlp.
the class TaggingEval method main.
/**
* Run the scoring metric on guess/gold input. This method performs "Collinization."
* The default language is English.
*
* @param args
*/
public static void main(String[] args) {
if (args.length < minArgs) {
System.out.println(usage.toString());
System.exit(-1);
}
TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
int maxGoldYield = Integer.MAX_VALUE;
boolean VERBOSE = false;
String encoding = "UTF-8";
String guessFile = null;
String goldFile = null;
Map<String, String[]> argsMap = StringUtils.argsToMap(args, optionArgDefs);
for (Map.Entry<String, String[]> opt : argsMap.entrySet()) {
if (opt.getKey() == null)
continue;
if (opt.getKey().equals("-l")) {
Language lang = Language.valueOf(opt.getValue()[0].trim());
tlpp = lang.params;
} else if (opt.getKey().equals("-y")) {
maxGoldYield = Integer.parseInt(opt.getValue()[0].trim());
} else if (opt.getKey().equals("-v")) {
VERBOSE = true;
} else if (opt.getKey().equals("-c")) {
TaggingEval.doCatLevelEval = true;
} else if (opt.getKey().equals("-e")) {
encoding = opt.getValue()[0];
} else {
log.info(usage.toString());
System.exit(-1);
}
//Non-option arguments located at key null
String[] rest = argsMap.get(null);
if (rest == null || rest.length < minArgs) {
log.info(usage.toString());
System.exit(-1);
}
goldFile = rest[0];
guessFile = rest[1];
}
tlpp.setInputEncoding(encoding);
final PrintWriter pwOut = tlpp.pw();
final Treebank guessTreebank = tlpp.diskTreebank();
guessTreebank.loadPath(guessFile);
pwOut.println("GUESS TREEBANK:");
pwOut.println(guessTreebank.textualSummary());
final Treebank goldTreebank = tlpp.diskTreebank();
goldTreebank.loadPath(goldFile);
pwOut.println("GOLD TREEBANK:");
pwOut.println(goldTreebank.textualSummary());
final TaggingEval metric = new TaggingEval("Tagging LP/LR");
final TreeTransformer tc = tlpp.collinizer();
//The evalb ref implementation assigns status for each tree pair as follows:
//
// 0 - Ok (yields match)
// 1 - length mismatch
// 2 - null parse e.g. (()).
//
//In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
final Iterator<Tree> goldItr = goldTreebank.iterator();
final Iterator<Tree> guessItr = guessTreebank.iterator();
int goldLineId = 0;
int guessLineId = 0;
int skippedGuessTrees = 0;
while (guessItr.hasNext() && goldItr.hasNext()) {
Tree guessTree = guessItr.next();
List<Label> guessYield = guessTree.yield();
guessLineId++;
Tree goldTree = goldItr.next();
List<Label> goldYield = goldTree.yield();
goldLineId++;
// Check that we should evaluate this tree
if (goldYield.size() > maxGoldYield) {
skippedGuessTrees++;
continue;
}
// Only trees with equal yields can be evaluated
if (goldYield.size() != guessYield.size()) {
pwOut.printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.size(), guessYield.size(), goldLineId, guessLineId);
skippedGuessTrees++;
continue;
}
final Tree evalGuess = tc.transformTree(guessTree);
final Tree evalGold = tc.transformTree(goldTree);
metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
}
if (guessItr.hasNext() || goldItr.hasNext()) {
System.err.printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
}
pwOut.println("================================================================================");
if (skippedGuessTrees != 0)
pwOut.printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
metric.display(true, pwOut);
pwOut.println();
pwOut.close();
}
use of edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams in project CoreNLP by stanfordnlp.
the class UnlabeledAttachmentEval method main.
/**
* Run the Evalb scoring metric on guess/gold input. The default language is English.
*
* @param args
*/
public static void main(String[] args) {
TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
int maxGoldYield = Integer.MAX_VALUE;
boolean VERBOSE = false;
String encoding = "UTF-8";
String guessFile = null;
String goldFile = null;
Map<String, String[]> argsMap = StringUtils.argsToMap(args, optionArgDefs);
for (Map.Entry<String, String[]> opt : argsMap.entrySet()) {
if (opt.getKey() == null)
continue;
if (opt.getKey().equals("-l")) {
Language lang = Language.valueOf(opt.getValue()[0].trim());
tlpp = lang.params;
} else if (opt.getKey().equals("-y")) {
maxGoldYield = Integer.parseInt(opt.getValue()[0].trim());
} else if (opt.getKey().equals("-v")) {
VERBOSE = true;
} else if (opt.getKey().equals("-e")) {
encoding = opt.getValue()[0];
} else {
log.info(usage.toString());
System.exit(-1);
}
//Non-option arguments located at key null
String[] rest = argsMap.get(null);
if (rest == null || rest.length < minArgs) {
log.info(usage.toString());
System.exit(-1);
}
goldFile = rest[0];
guessFile = rest[1];
}
tlpp.setInputEncoding(encoding);
final PrintWriter pwOut = tlpp.pw();
final Treebank guessTreebank = tlpp.diskTreebank();
guessTreebank.loadPath(guessFile);
pwOut.println("GUESS TREEBANK:");
pwOut.println(guessTreebank.textualSummary());
final Treebank goldTreebank = tlpp.diskTreebank();
goldTreebank.loadPath(goldFile);
pwOut.println("GOLD TREEBANK:");
pwOut.println(goldTreebank.textualSummary());
final UnlabeledAttachmentEval metric = new UnlabeledAttachmentEval("UAS LP/LR", true, tlpp.headFinder());
final TreeTransformer tc = tlpp.collinizer();
//The evalb ref implementation assigns status for each tree pair as follows:
//
// 0 - Ok (yields match)
// 1 - length mismatch
// 2 - null parse e.g. (()).
//
//In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
final Iterator<Tree> goldItr = goldTreebank.iterator();
final Iterator<Tree> guessItr = guessTreebank.iterator();
int goldLineId = 0;
int guessLineId = 0;
int skippedGuessTrees = 0;
while (guessItr.hasNext() && goldItr.hasNext()) {
Tree guessTree = guessItr.next();
List<Label> guessYield = guessTree.yield();
guessLineId++;
Tree goldTree = goldItr.next();
List<Label> goldYield = goldTree.yield();
goldLineId++;
// Check that we should evaluate this tree
if (goldYield.size() > maxGoldYield) {
skippedGuessTrees++;
continue;
}
// Only trees with equal yields can be evaluated
if (goldYield.size() != guessYield.size()) {
pwOut.printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.size(), guessYield.size(), goldLineId, guessLineId);
skippedGuessTrees++;
continue;
}
final Tree evalGuess = tc.transformTree(guessTree);
evalGuess.indexLeaves(true);
final Tree evalGold = tc.transformTree(goldTree);
evalGold.indexLeaves(true);
metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
}
if (guessItr.hasNext() || goldItr.hasNext()) {
System.err.printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
}
pwOut.println("================================================================================");
if (skippedGuessTrees != 0)
pwOut.printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
metric.display(true, pwOut);
pwOut.println();
pwOut.close();
}
use of edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams in project CoreNLP by stanfordnlp.
the class SemgrexDemo method main.
public static void main(String[] args) {
String treeString = "(ROOT (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
// Typically the tree is constructed by parsing or reading a
// treebank. This is just for example purposes
Tree tree = Tree.valueOf(treeString);
// This creates English uncollapsed dependencies as a
// SemanticGraph. If you are creating many SemanticGraphs, you
// should use a GrammaticalStructureFactory and use it to generate
// the intermediate GrammaticalStructure instead
SemanticGraph graph = SemanticGraphFactory.generateUncollapsedDependencies(tree);
// Alternatively, this could have been the Chinese params or any
// other language supported. As of 2014, only English and Chinese
TreebankLangParserParams params = new EnglishTreebankParserParams();
GrammaticalStructureFactory gsf = params.treebankLanguagePack().grammaticalStructureFactory(params.treebankLanguagePack().punctuationWordRejectFilter(), params.typedDependencyHeadFinder());
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
log.info(graph);
SemgrexPattern semgrex = SemgrexPattern.compile("{}=A <<nsubj {}=B");
SemgrexMatcher matcher = semgrex.matcher(graph);
// ancestor of both "dog" and "my" via the nsubj relation
while (matcher.find()) {
log.info(matcher.getNode("A") + " <<nsubj " + matcher.getNode("B"));
}
}
Aggregations