Search in sources :

Example 1 with Parse

use of opennlp.tools.parser.Parse in project lucida by claritylab.

the class NETagger method tagNes.

/**
	 * Performs named entity tagging on an array of full parses of sentences.
	 * 
	 * @param parses array of full parses of sentences
	 */
// TODO only works with OpenNLP taggers so far
@SuppressWarnings("unchecked")
public static void tagNes(Parse[] parses) {
    String[] results = new String[parses.length];
    for (int s = 0; s < results.length; s++) results[s] = "";
    // initialize prevTokenMaps
    Map[] prevTokenMaps = new HashMap[finders.length];
    for (int i = 0; i < finders.length; i++) prevTokenMaps[i] = new HashMap();
    for (Parse parse : parses) {
        // get tokens
        Parse[] tokens = parse.getTagNodes();
        // find named entites
        String[][] finderTags = new String[finders.length][];
        for (int i = 0; i < finders.length; i++) finderTags[i] = finders[i].find(tokens, prevTokenMaps[i]);
        // update prevTokenMaps
        for (int i = 0; i < prevTokenMaps.length; i++) for (int j = 0; j < tokens.length; j++) prevTokenMaps[i].put(tokens[j], finderTags[i][j]);
        for (int i = 0; i < finders.length; i++) {
            int start = -1;
            List<Span> names = new ArrayList<Span>(5);
            // determine spans of tokens that are named entities
            for (int j = 0; j < tokens.length; j++) {
                if ((finderTags[i][j].equals(NameFinderME.START) || finderTags[i][j].equals(NameFinderME.OTHER))) {
                    if (start != -1)
                        names.add(new Span(start, j - 1));
                    start = -1;
                }
                if (finderTags[i][j].equals(NameFinderME.START))
                    start = j;
            }
            if (start != -1)
                names.add(new Span(start, tokens.length - 1));
            // add name entity information to parse
            addNames(finderNames[i], names, tokens);
        }
    }
}
Also used : HashMap(java.util.HashMap) Parse(opennlp.tools.parser.Parse) ArrayList(java.util.ArrayList) HashMap(java.util.HashMap) Map(java.util.Map) Span(opennlp.tools.util.Span)

Example 2 with Parse

use of opennlp.tools.parser.Parse in project lucida by claritylab.

the class NETagger method addNames.

// ==========
// NE tagging
// ==========
/**
	 * Adds named entity information to parses.
	 * 
	 * @param tag named entity type
	 * @param names spans of tokens that are named entities
	 * @param tokens parses for the tokens
	 */
private static void addNames(String tag, List names, Parse[] tokens) {
    for (int i = 0; i < names.size(); i++) {
        Span nameTokenSpan = (Span) names.get(i);
        Parse startToken = tokens[nameTokenSpan.getStart()];
        Parse endToken = tokens[nameTokenSpan.getEnd()];
        Parse commonP = startToken.getCommonParent(endToken);
        if (commonP != null) {
            Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd());
            if (nameSpan.equals(commonP.getSpan())) {
                // common parent matches exactly the named entity
                commonP.insert(new Parse(commonP.getText(), nameSpan, tag, 1.0));
            } else {
                // common parent includes the named entity
                Parse[] kids = commonP.getChildren();
                boolean crossingKids = false;
                for (int j = 0; j < kids.length; j++) if (nameSpan.crosses(kids[j].getSpan()))
                    crossingKids = true;
                if (!crossingKids) {
                    // named entity does not cross children
                    commonP.insert(new Parse(commonP.getText(), nameSpan, tag, 1.0));
                } else {
                    // NE crosses children
                    if (commonP.getType().equals("NP")) {
                        Parse[] grandKids = kids[0].getChildren();
                        Parse last = grandKids[grandKids.length - 1];
                        if (grandKids.length > 1 && nameSpan.contains(last.getSpan()))
                            commonP.insert(new Parse(commonP.getText(), commonP.getSpan(), tag, 1.0));
                    }
                }
            }
        }
    }
}
Also used : Parse(opennlp.tools.parser.Parse) Span(opennlp.tools.util.Span)

Example 3 with Parse

use of opennlp.tools.parser.Parse in project lucida by claritylab.

the class NETagger method extractNesRec.

/**
	 * Recursive method called by <code>extractNes(Parse)</code> to extract NEs
	 * from a parse tree augmented with NE tags.
	 * 
	 * @param parse a node of a parse tree
	 * @param nes NEs found so far
	 */
private static void extractNesRec(Parse parse, ArrayList<String>[] nes) {
    String type = parse.getType();
    if (type.startsWith("NE")) {
        String text = parse.getText().substring(parse.getSpan().getStart(), parse.getSpan().getEnd());
        nes[getNeIds(type)[0]].add(text.trim());
    }
    for (Parse child : parse.getChildren()) extractNesRec(child, nes);
}
Also used : Parse(opennlp.tools.parser.Parse)

Example 4 with Parse

use of opennlp.tools.parser.Parse in project lucida by claritylab.

the class OpenNLP method link.

/**
	 * Identifies coreferences in an array of full parses of sentences.
	 * 
	 * @param parses array of full parses of sentences
	 */
public static void link(Parse[] parses) {
    int sentenceNumber = 0;
    List<Mention> document = new ArrayList<Mention>();
    for (Parse parse : parses) {
        DefaultParse dp = new DefaultParse(parse, sentenceNumber);
        Mention[] extents = linker.getMentionFinder().getMentions(dp);
        //construct new parses for mentions which do not have constituents
        for (int i = 0; i < extents.length; i++) if (extents[i].getParse() == null) {
            Parse snp = new Parse(parse.getText(), extents[i].getSpan(), "NML", 1.0);
            parse.insert(snp);
            extents[i].setParse(new DefaultParse(snp, sentenceNumber));
        }
        document.addAll(Arrays.asList(extents));
        sentenceNumber++;
    }
    if (document.size() > 0) {
    //			Mention[] ms = document.toArray(new Mention[document.size()]);
    //			DiscourseEntity[] entities = linker.getEntities(ms);
    //			TODO return results in an appropriate data structure
    }
}
Also used : Parse(opennlp.tools.parser.Parse) DefaultParse(opennlp.tools.coref.mention.DefaultParse) Mention(opennlp.tools.coref.mention.Mention) ArrayList(java.util.ArrayList) DefaultParse(opennlp.tools.coref.mention.DefaultParse)

Aggregations

Parse (opennlp.tools.parser.Parse)4 ArrayList (java.util.ArrayList)2 Span (opennlp.tools.util.Span)2 HashMap (java.util.HashMap)1 Map (java.util.Map)1 DefaultParse (opennlp.tools.coref.mention.DefaultParse)1 Mention (opennlp.tools.coref.mention.Mention)1