use of edu.stanford.nlp.ling.HasIndex in project CoreNLP by stanfordnlp.
the class Tree method dependencies.
/**
* Return a set of TaggedWord-TaggedWord dependencies, represented as
* Dependency objects, for the Tree. This will only give
* useful results if the internal tree node labels support HasWord and
* head percolation has already been done (see percolateHeads()).
*
* @param f Dependencies are excluded for which the Dependency is not
* accepted by the Filter
* @return Set of dependencies (each a Dependency)
*/
public Set<Dependency<Label, Label, Object>> dependencies(Predicate<Dependency<Label, Label, Object>> f, boolean isConcrete, boolean copyLabel, boolean copyPosTag) {
Set<Dependency<Label, Label, Object>> deps = Generics.newHashSet();
for (Tree node : this) {
// Skip leaves and unary re-writes
if (node.isLeaf() || node.children().length < 2) {
continue;
}
// Create the head label (percolateHeads has already been executed)
Label headLabel = makeDependencyLabel(node.label(), copyLabel, isConcrete, copyPosTag);
String headWord = ((HasWord) headLabel).word();
if (headWord == null) {
headWord = headLabel.value();
}
int headIndex = (isConcrete && (headLabel instanceof HasIndex)) ? ((HasIndex) headLabel).index() : -1;
// every child with a different (or repeated) head is an argument
boolean seenHead = false;
for (Tree child : node.children()) {
Label depLabel = makeDependencyLabel(child.label(), copyLabel, isConcrete, copyPosTag);
String depWord = ((HasWord) depLabel).word();
if (depWord == null) {
depWord = depLabel.value();
}
int depIndex = (isConcrete && (depLabel instanceof HasIndex)) ? ((HasIndex) depLabel).index() : -1;
if (!seenHead && headIndex == depIndex && headWord.equals(depWord)) {
seenHead = true;
} else {
Dependency<Label, Label, Object> dependency = (isConcrete && depIndex != headIndex) ? new UnnamedConcreteDependency(headLabel, depLabel) : new UnnamedDependency(headLabel, depLabel);
if (f.test(dependency)) {
deps.add(dependency);
}
}
}
}
return deps;
}
use of edu.stanford.nlp.ling.HasIndex in project CoreNLP by stanfordnlp.
the class Tree method percolateHeads.
/**
* Finds the heads of the tree. This code assumes that the label
* does store and return sensible values for the category, word, and tag.
* It will be a no-op otherwise. The tree is modified. The routine
* assumes the Tree has word leaves and tag preterminals, and copies
* their category to word and tag respectively, if they have a null
* value.
*
* @param hf The headfinding algorithm to use
*/
public void percolateHeads(HeadFinder hf) {
Label nodeLabel = label();
if (isLeaf()) {
// Sanity check: word() is usually set by the TreeReader.
if (nodeLabel instanceof HasWord) {
HasWord w = (HasWord) nodeLabel;
if (w.word() == null) {
w.setWord(nodeLabel.value());
}
}
} else {
for (Tree kid : children()) {
kid.percolateHeads(hf);
}
final Tree head = hf.determineHead(this);
if (head != null) {
final Label headLabel = head.label();
// Set the head tag.
String headTag = (headLabel instanceof HasTag) ? ((HasTag) headLabel).tag() : null;
if (headTag == null && head.isLeaf()) {
// below us is a leaf
headTag = nodeLabel.value();
}
// Set the head word
String headWord = (headLabel instanceof HasWord) ? ((HasWord) headLabel).word() : null;
if (headWord == null && head.isLeaf()) {
// below us is a leaf
// this might be useful despite case for leaf above in
// case the leaf label type doesn't support word()
headWord = headLabel.value();
}
// Set the head index
int headIndex = (headLabel instanceof HasIndex) ? ((HasIndex) headLabel).index() : -1;
if (nodeLabel instanceof HasWord) {
((HasWord) nodeLabel).setWord(headWord);
}
if (nodeLabel instanceof HasTag) {
((HasTag) nodeLabel).setTag(headTag);
}
if (nodeLabel instanceof HasIndex && headIndex >= 0) {
((HasIndex) nodeLabel).setIndex(headIndex);
}
} else {
log.info("Head is null: " + this);
}
}
}
use of edu.stanford.nlp.ling.HasIndex in project CoreNLP by stanfordnlp.
the class MemoryTreebank method load.
/**
* Load a collection of parse trees from a Reader.
* Each tree may optionally be encased in parens to allow for Penn
* Treebank style trees.
*
* @param r The reader to read trees from. (If you want it buffered,
* you should already have buffered it!)
* @param id An ID for where these files come from (arbitrary, but
* something like a filename. Can be <code>null</code> for none.
*/
public void load(Reader r, String id) {
try {
// could throw an IO exception?
TreeReader tr = treeReaderFactory().newTreeReader(r);
int sentIndex = 0;
for (Tree pt; (pt = tr.readTree()) != null; ) {
if (pt.label() instanceof HasIndex) {
// so we can trace where this tree came from
HasIndex hi = (HasIndex) pt.label();
if (id != null) {
hi.setDocID(id);
}
hi.setSentIndex(sentIndex);
}
parseTrees.add(pt);
sentIndex++;
}
} catch (IOException e) {
log.info("load IO Exception: " + e);
}
}
use of edu.stanford.nlp.ling.HasIndex in project CoreNLP by stanfordnlp.
the class MemoryTreebank method processFile.
/**
* Load a collection of parse trees from the file of given name.
* Each tree may optionally be encased in parens to allow for Penn
* Treebank style trees.
* This methods implements the <code>FileProcessor</code> interface.
*
* @param file file to load a tree from
*/
public void processFile(File file) {
TreeReader tr = null;
// SRL stuff
CollectionValuedMap<Integer, String> srlMap = null;
if (this.srlMap != null) {
// there must be a better way ...
String filename = file.getAbsolutePath();
for (String suffix : this.srlMap.keySet()) {
if (filename.endsWith(suffix)) {
srlMap = this.srlMap.get(suffix);
break;
}
}
if (srlMap == null) {
log.info("could not find SRL entries for file: " + file);
}
}
try {
// maybe print file name to stdout to get some feedback
if (PRINT_FILENAMES) {
log.info(file);
}
// could throw an IO exception if can't open for reading
tr = treeReaderFactory().newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding())));
int sentIndex = 0;
Tree pt;
while ((pt = tr.readTree()) != null) {
if (pt.label() instanceof HasIndex) {
// so we can trace where this tree came from
HasIndex hi = (HasIndex) pt.label();
hi.setDocID(file.getName());
hi.setSentIndex(sentIndex);
}
if (srlMap == null) {
parseTrees.add(pt);
} else {
Collection<String> srls = srlMap.get(sentIndex);
// pt.pennPrint();
// log.info(srls);
parseTrees.add(pt);
if (srls.isEmpty()) {
// parseTrees.add(pt);
} else {
for (String srl : srls) {
// Tree t = pt.deepCopy();
String[] bits = srl.split("\\s+");
int verbIndex = Integer.parseInt(bits[0]);
String lemma = bits[2].split("\\.")[0];
// Tree verb = Trees.getTerminal(t, verbIndex);
Tree verb = Trees.getTerminal(pt, verbIndex);
// ((CoreLabel)verb.label()).set(SRLIDAnnotation.class, SRL_ID.REL);
((CoreLabel) verb.label()).set(CoreAnnotations.CoNLLPredicateAnnotation.class, true);
for (int i = 4; i < bits.length; i++) {
String arg = bits[i];
String[] bits1;
if (arg.indexOf("ARGM") >= 0) {
bits1 = arg.split("-");
} else {
bits1 = arg.split("-");
}
String locs = bits1[0];
String argType = bits1[1];
if (argType.equals("rel")) {
continue;
}
for (String loc : locs.split("[*,]")) {
bits1 = loc.split(":");
int term = Integer.parseInt(bits1[0]);
int height = Integer.parseInt(bits1[1]);
// Tree t1 = Trees.getPreTerminal(t, term);
Tree t1 = Trees.getPreTerminal(pt, term);
for (int j = 0; j < height; j++) {
// t1 = t1.parent(t);
t1 = t1.parent(pt);
}
Map<Integer, String> roleMap = ((CoreLabel) t1.label()).get(CoreAnnotations.CoNLLSRLAnnotation.class);
if (roleMap == null) {
roleMap = Generics.newHashMap();
((CoreLabel) t1.label()).set(CoreAnnotations.CoNLLSRLAnnotation.class, roleMap);
}
roleMap.put(verbIndex, argType);
// ((CoreLabel)t1.label()).set(SRLIDAnnotation.class, SRL_ID.ARG);
}
}
// for (Tree t1 : t) {
// if (t1.isLeaf()) { continue; }
// CoreLabel fl = (CoreLabel)t1.label();
// if (fl.value() == null) { continue; }
// if (!fl.has(SRLIDAnnotation.class)) {
// boolean allNone = true;
// for (Tree t2 : t1) {
// SRL_ID s = ((CoreLabel)t2.label()).get(SRLIDAnnotation.class);
// if (s == SRL_ID.ARG || s == SRL_ID.REL) {
// allNone = false;
// break;
// }
// }
// if (allNone) {
// fl.set(SRLIDAnnotation.class, SRL_ID.ALL_NO);
// } else {
// fl.set(SRLIDAnnotation.class, SRL_ID.NO);
// }
// }
// }
// parseTrees.add(t);
}
}
}
sentIndex++;
}
} catch (IOException e) {
throw new RuntimeIOException("MemoryTreebank.processFile IOException in file " + file, e);
} finally {
IOUtils.closeIgnoringExceptions(tr);
}
}
Aggregations