use of opennlp.tools.parser.Parse in project lucida by claritylab.
the class NETagger method tagNes.
/**
* Performs named entity tagging on an array of full parses of sentences.
*
* @param parses array of full parses of sentences
*/
// TODO only works with OpenNLP taggers so far
@SuppressWarnings("unchecked")
public static void tagNes(Parse[] parses) {
String[] results = new String[parses.length];
for (int s = 0; s < results.length; s++) results[s] = "";
// initialize prevTokenMaps
Map[] prevTokenMaps = new HashMap[finders.length];
for (int i = 0; i < finders.length; i++) prevTokenMaps[i] = new HashMap();
for (Parse parse : parses) {
// get tokens
Parse[] tokens = parse.getTagNodes();
// find named entites
String[][] finderTags = new String[finders.length][];
for (int i = 0; i < finders.length; i++) finderTags[i] = finders[i].find(tokens, prevTokenMaps[i]);
// update prevTokenMaps
for (int i = 0; i < prevTokenMaps.length; i++) for (int j = 0; j < tokens.length; j++) prevTokenMaps[i].put(tokens[j], finderTags[i][j]);
for (int i = 0; i < finders.length; i++) {
int start = -1;
List<Span> names = new ArrayList<Span>(5);
// determine spans of tokens that are named entities
for (int j = 0; j < tokens.length; j++) {
if ((finderTags[i][j].equals(NameFinderME.START) || finderTags[i][j].equals(NameFinderME.OTHER))) {
if (start != -1)
names.add(new Span(start, j - 1));
start = -1;
}
if (finderTags[i][j].equals(NameFinderME.START))
start = j;
}
if (start != -1)
names.add(new Span(start, tokens.length - 1));
// add name entity information to parse
addNames(finderNames[i], names, tokens);
}
}
}
use of opennlp.tools.parser.Parse in project lucida by claritylab.
the class NETagger method addNames.
// ==========
// NE tagging
// ==========
/**
* Adds named entity information to parses.
*
* @param tag named entity type
* @param names spans of tokens that are named entities
* @param tokens parses for the tokens
*/
private static void addNames(String tag, List names, Parse[] tokens) {
for (int i = 0; i < names.size(); i++) {
Span nameTokenSpan = (Span) names.get(i);
Parse startToken = tokens[nameTokenSpan.getStart()];
Parse endToken = tokens[nameTokenSpan.getEnd()];
Parse commonP = startToken.getCommonParent(endToken);
if (commonP != null) {
Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd());
if (nameSpan.equals(commonP.getSpan())) {
// common parent matches exactly the named entity
commonP.insert(new Parse(commonP.getText(), nameSpan, tag, 1.0));
} else {
// common parent includes the named entity
Parse[] kids = commonP.getChildren();
boolean crossingKids = false;
for (int j = 0; j < kids.length; j++) if (nameSpan.crosses(kids[j].getSpan()))
crossingKids = true;
if (!crossingKids) {
// named entity does not cross children
commonP.insert(new Parse(commonP.getText(), nameSpan, tag, 1.0));
} else {
// NE crosses children
if (commonP.getType().equals("NP")) {
Parse[] grandKids = kids[0].getChildren();
Parse last = grandKids[grandKids.length - 1];
if (grandKids.length > 1 && nameSpan.contains(last.getSpan()))
commonP.insert(new Parse(commonP.getText(), commonP.getSpan(), tag, 1.0));
}
}
}
}
}
}
use of opennlp.tools.parser.Parse in project lucida by claritylab.
the class NETagger method extractNesRec.
/**
* Recursive method called by <code>extractNes(Parse)</code> to extract NEs
* from a parse tree augmented with NE tags.
*
* @param parse a node of a parse tree
* @param nes NEs found so far
*/
private static void extractNesRec(Parse parse, ArrayList<String>[] nes) {
String type = parse.getType();
if (type.startsWith("NE")) {
String text = parse.getText().substring(parse.getSpan().getStart(), parse.getSpan().getEnd());
nes[getNeIds(type)[0]].add(text.trim());
}
for (Parse child : parse.getChildren()) extractNesRec(child, nes);
}
use of opennlp.tools.parser.Parse in project lucida by claritylab.
the class OpenNLP method link.
/**
* Identifies coreferences in an array of full parses of sentences.
*
* @param parses array of full parses of sentences
*/
public static void link(Parse[] parses) {
int sentenceNumber = 0;
List<Mention> document = new ArrayList<Mention>();
for (Parse parse : parses) {
DefaultParse dp = new DefaultParse(parse, sentenceNumber);
Mention[] extents = linker.getMentionFinder().getMentions(dp);
//construct new parses for mentions which do not have constituents
for (int i = 0; i < extents.length; i++) if (extents[i].getParse() == null) {
Parse snp = new Parse(parse.getText(), extents[i].getSpan(), "NML", 1.0);
parse.insert(snp);
extents[i].setParse(new DefaultParse(snp, sentenceNumber));
}
document.addAll(Arrays.asList(extents));
sentenceNumber++;
}
if (document.size() > 0) {
// Mention[] ms = document.toArray(new Mention[document.size()]);
// DiscourseEntity[] entities = linker.getEntities(ms);
// TODO return results in an appropriate data structure
}
}