use of edu.stanford.nlp.trees.TreeFactory in project CoreNLP by stanfordnlp.
the class ManipulateTopBracket method main.
public static void main(String[] args) {
if (args.length < minArgs) {
System.out.println(usage());
System.exit(-1);
}
Properties options = StringUtils.argsToProperties(args, argDefs());
Language language = PropertiesUtils.get(options, "l", Language.English, Language.class);
TreebankLangParserParams tlpp = language.params;
DiskTreebank tb = null;
String encoding = options.getProperty("l", "UTF-8");
boolean removeBracket = PropertiesUtils.getBool(options, "b", false);
tlpp.setInputEncoding(encoding);
tlpp.setOutputEncoding(encoding);
tb = tlpp.diskTreebank();
String[] files = options.getProperty("", "").split("\\s+");
if (files.length != 0) {
for (String filename : files) {
tb.loadPath(filename);
}
} else {
log.info(usage());
System.exit(-1);
}
PrintWriter pwo = tlpp.pw();
String startSymbol = tlpp.treebankLanguagePack().startSymbol();
TreeFactory tf = new LabeledScoredTreeFactory();
int nTrees = 0;
for (Tree t : tb) {
if (removeBracket) {
if (t.value().equals(startSymbol)) {
t = t.firstChild();
}
} else if (!t.value().equals(startSymbol)) {
//Add a bracket if it isn't already there
t = tf.newTreeNode(startSymbol, Collections.singletonList(t));
}
pwo.println(t.toString());
nTrees++;
}
pwo.close();
System.err.printf("Processed %d trees.%n", nTrees);
}
use of edu.stanford.nlp.trees.TreeFactory in project CoreNLP by stanfordnlp.
the class MultiWordPreprocessor method resolveDummyTags.
private static void resolveDummyTags(File treeFile, TwoDimensionalCounter<String, String> unigramTagger, boolean retainNER, TreeNormalizer tn) {
TreeFactory tf = new LabeledScoredTreeFactory();
MultiWordTreeExpander expander = new MultiWordTreeExpander();
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
TreeReaderFactory trf = new SpanishTreeReaderFactory();
TreeReader tr = trf.newTreeReader(br);
PrintWriter pw = new PrintWriter(new PrintStream(new FileOutputStream(new File(treeFile + ".fixed")), false, "UTF-8"));
int nTrees = 0;
for (Tree t; (t = tr.readTree()) != null; nTrees++) {
traverseAndFix(t, null, unigramTagger, retainNER);
// Now "decompress" further the expanded trees formed by
// multiword token splitting
t = expander.expandPhrases(t, tn, tf);
if (tn != null)
t = tn.normalizeWholeTree(t, tf);
pw.println(t.toString());
}
pw.close();
tr.close();
System.out.println("Processed " + nTrees + " trees");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
use of edu.stanford.nlp.trees.TreeFactory in project CoreNLP by stanfordnlp.
the class ParserUtils method xTree.
/**
* Construct a fall through tree in case we can't parse this sentence.
*
* @param words Words of the sentence that didn't parse
* @return A tree with X for all the internal nodes.
* Preterminals have the right tag if the words are tagged.
*/
public static Tree xTree(List<? extends HasWord> words) {
TreeFactory treeFactory = new LabeledScoredTreeFactory();
List<Tree> lst2 = new ArrayList<>();
for (HasWord obj : words) {
String s = obj.word();
Tree t = treeFactory.newLeaf(s);
String tag = "XX";
if (obj instanceof HasTag) {
if (((HasTag) obj).tag() != null) {
tag = ((HasTag) obj).tag();
}
}
Tree t2 = treeFactory.newTreeNode(tag, Collections.singletonList(t));
lst2.add(t2);
}
return treeFactory.newTreeNode("X", lst2);
}
Aggregations