Search in sources :

Example 1 with POSFromParse

use of edu.illinois.cs.cogcomp.nlp.utilities.POSFromParse in project cogcomp-nlp by CogComp.

the class PennTreebankReader method findNextTree.

private TextAnnotation findNextTree() throws AnnotatorException {
    StringBuilder sb = new StringBuilder();
    int numParen = 0;
    boolean first = true;
    while (true) {
        String line = lines.get(currentLineId++);
        if (line.length() == 0)
            continue;
        if (first) {
            first = false;
            line = line.substring(0, line.indexOf("(") + 1) + TOP_LABEL + line.substring(line.indexOf("(") + 1);
        }
        int numOpenParen = line.replaceAll("[^\\(]", "").length();
        int numCloseParen = line.replaceAll("[^\\)]", "").length();
        numParen += (numOpenParen - numCloseParen);
        sb.append(line);
        if (numParen == 0)
            break;
    }
    Tree<String> tree = TreeParserFactory.getStringTreeParser().parse(sb.toString().replaceAll("\\\\/", "/"));
    String[] text = ParseUtils.getTerminalStringSentence(tree);
    String id = "wsj/" + sections[currentSectionId - 1] + "/" + currentSectionFiles[currentFileId - 1] + ":" + treeInFile;
    treeInFile++;
    TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(PENN_TREEBANK_WSJ, id, Collections.singletonList(text));
    TreeView parse = new TreeView(parseViewName, "PTB-GOLD", ta, 1.0);
    parse.setParseTree(0, tree);
    ta.addView(parseViewName, parse);
    POSFromParse pos = new POSFromParse(parseViewName);
    ta.addView(pos);
    return ta;
}
Also used : TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) POSFromParse(edu.illinois.cs.cogcomp.nlp.utilities.POSFromParse)

Aggregations

TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)1 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)1 POSFromParse (edu.illinois.cs.cogcomp.nlp.utilities.POSFromParse)1