use of edu.illinois.cs.cogcomp.nlp.utilities.POSFromParse in project cogcomp-nlp by CogComp.
the class PennTreebankReader method findNextTree.
private TextAnnotation findNextTree() throws AnnotatorException {
StringBuilder sb = new StringBuilder();
int numParen = 0;
boolean first = true;
while (true) {
String line = lines.get(currentLineId++);
if (line.length() == 0)
continue;
if (first) {
first = false;
line = line.substring(0, line.indexOf("(") + 1) + TOP_LABEL + line.substring(line.indexOf("(") + 1);
}
int numOpenParen = line.replaceAll("[^\\(]", "").length();
int numCloseParen = line.replaceAll("[^\\)]", "").length();
numParen += (numOpenParen - numCloseParen);
sb.append(line);
if (numParen == 0)
break;
}
Tree<String> tree = TreeParserFactory.getStringTreeParser().parse(sb.toString().replaceAll("\\\\/", "/"));
String[] text = ParseUtils.getTerminalStringSentence(tree);
String id = "wsj/" + sections[currentSectionId - 1] + "/" + currentSectionFiles[currentFileId - 1] + ":" + treeInFile;
treeInFile++;
TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(PENN_TREEBANK_WSJ, id, Collections.singletonList(text));
TreeView parse = new TreeView(parseViewName, "PTB-GOLD", ta, 1.0);
parse.setParseTree(0, tree);
ta.addView(parseViewName, parse);
POSFromParse pos = new POSFromParse(parseViewName);
ta.addView(pos);
return ta;
}
Aggregations