Search in sources :

Example 16 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class CurrencyIndicator method addCurrencyView.

private void addCurrencyView(TextAnnotation ta) throws Exception {
    if (!loaded)
        synchronized (this) {
            // now its changed to be loaded from datastore.
            if (!loaded)
                loadCurrency(gzip, true);
        }
    synchronized (ta) {
        if (ta.hasView(VIEW_NAME))
            return;
        List<String> tokens = new ArrayList<>();
        Collections.addAll(tokens, ta.getTokens());
        List<IntPair> matches = new ArrayList<>();
        for (String pattern : currencies) {
            List<IntPair> list = ta.getSpansMatching(pattern);
            matches.addAll(list);
        }
        SpanLabelView view = new SpanLabelView(VIEW_NAME, "Gazetteer", ta, 1.0);
        Set<IntPair> added = new LinkedHashSet<>();
        for (IntPair p : matches) {
            // don't add nested constituents of the same type
            boolean foundContainer = false;
            for (IntPair p1 : added) {
                if (p1 == p)
                    continue;
                if (p1.getFirst() <= p.getFirst() && p1.getSecond() >= p.getSecond()) {
                    foundContainer = true;
                    break;
                }
            }
            if (!foundContainer) {
                view.addSpanLabel(p.getFirst(), p.getSecond(), "CURRENCY", 1.0);
                added.add(p);
            }
        }
        ta.addView(VIEW_NAME, view);
    }
}
Also used : SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair)

Example 17 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class SimpleGazetteerAnnotatorTest method testAddView.

/**
     * Test method for
     * {@link edu.illinois.cs.cogcomp.edison.annotators.SimpleGazetteerAnnotator#addView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)}
     * .
     * 
     * @throws URISyntaxException
     * @throws IOException
     * @throws AnnotatorException
     */
@Test
public void testAddView() throws IOException, URISyntaxException, AnnotatorException {
    SimpleGazetteerAnnotator sga = new SimpleGazetteerAnnotator(defaultRm);
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionaries.size() == 1);
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionariesIgnoreCase.size() == 1);
    TextAnnotation ta = tab.createTextAnnotation("I hail from the university of illinois at champaign urbana.");
    sga.addView(ta);
    SpanLabelView view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
    List<Constituent> entities = view.getConstituents();
    Constituent c1 = entities.get(0);
    assertEquals(c1.toString(), "university of illinois");
    Constituent c2 = entities.get(1);
    assertEquals(c2.toString(), "university of illinois at champaign urbana");
    Constituent c3 = entities.get(2);
    assertEquals(c3.toString(), "illinois");
    Constituent c4 = entities.get(3);
    assertEquals(c4.toString(), "champaign");
    Constituent c5 = entities.get(4);
    assertEquals(c5.toString(), "urbana");
    assertEquals(c1.getLabel(), "organizations(IC)");
    assertEquals(c2.getLabel(), "organizations(IC)");
    assertEquals(c3.getLabel(), "places(IC)");
    assertEquals(c4.getLabel(), "places(IC)");
    assertEquals(c5.getLabel(), "places(IC)");
    Properties props = new Properties();
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.PHRASE_LENGTH.key, "4");
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.PATH_TO_DICTIONARIES.key, "/testgazetteers/");
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, SimpleGazetteerAnnotatorConfigurator.FALSE);
    sga = new SimpleGazetteerAnnotator(new ResourceManager(props));
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionaries.size() == 1);
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionariesIgnoreCase.size() == 1);
    ta = tab.createTextAnnotation("I hail from the university of illinois at champaign urbana.");
    sga.addView(ta);
    view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
    entities = view.getConstituents();
    c1 = entities.get(0);
    assertEquals(c1.toString(), "university of illinois");
    c2 = entities.get(1);
    assertEquals(c2.toString(), "illinois");
    c3 = entities.get(2);
    assertEquals(c3.toString(), "champaign");
    c4 = entities.get(3);
    assertEquals(c4.toString(), "urbana");
    assertEquals(c1.getLabel(), "organizations(IC)");
    assertEquals(c2.getLabel(), "places(IC)");
    assertEquals(c3.getLabel(), "places(IC)");
    assertEquals(c4.getLabel(), "places(IC)");
    ta = tab.createTextAnnotation("I hail from the University of Illinois at champaign urbana.");
    sga.addView(ta);
    view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
    entities = view.getConstituents();
    c1 = entities.get(0);
    assertEquals(c1.toString(), "University of Illinois");
    assertEquals(c1.getLabel(), "organizations");
    c2 = entities.get(1);
    assertEquals(c1.toString(), "University of Illinois");
    assertEquals(c1.getLabel(), "organizations");
}
Also used : ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Properties(java.util.Properties) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Example 18 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class TestBrownClusterViewGenerator method testCharniakParseViewGenerator.

/**
     * Test the configuration of normalizing tokens in the brown clusters
     */
@Test
public final void testCharniakParseViewGenerator() {
    String sentence = "a test .";
    TextAnnotation ta = TextAnnotationUtilities.createFromTokenizedString(sentence);
    // The default configuration: do normalization
    BrownClusterViewGenerator viewGenerator = null;
    try {
        viewGenerator = new BrownClusterViewGenerator(BrownClusterViewGenerator.file100, BrownClusterViewGenerator.file100);
        viewGenerator.addView(ta);
    } catch (Exception e) {
        e.printStackTrace();
    }
    SpanLabelView view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
    assertEquals("a", view.getConstituents().get(0).getSurfaceForm());
    assertEquals("111011111", view.getConstituents().get(0).getLabel());
    assertEquals("a", view.getConstituents().get(1).getSurfaceForm());
    assertEquals("10010", view.getConstituents().get(1).getLabel());
    assertEquals("test", view.getConstituents().get(2).getSurfaceForm());
    assertEquals("001110", view.getConstituents().get(2).getLabel());
    // Don't normalize tokens in the brown clusters
    Properties props = new Properties();
    props.setProperty(BrownClusterViewGeneratorConfigurator.NORMALIZE_TOKEN.key, Configurator.FALSE);
    ResourceManager rm = new ResourceManager(props);
    try {
        viewGenerator = new BrownClusterViewGenerator(BrownClusterViewGenerator.file100, BrownClusterViewGenerator.file100, rm);
        viewGenerator.addView(ta);
    } catch (Exception e) {
        e.printStackTrace();
    }
    view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
    assertEquals("a", view.getConstituents().get(0).getSurfaceForm());
    assertEquals("10010", view.getConstituents().get(0).getLabel());
    assertEquals("test", view.getConstituents().get(1).getSurfaceForm());
    assertEquals("001110", view.getConstituents().get(1).getLabel());
}
Also used : ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Properties(java.util.Properties) Test(org.junit.Test)

Example 19 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class CommaTest method setUp.

@Override
public void setUp() throws Exception {
    String[] tokens = "Says Gayle Key , a mathematics teacher , `` Hello world . ''".split("\\s+");
    TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(Collections.singletonList(tokens));
    TokenLabelView tlv = new TokenLabelView(ViewNames.POS, "Test", ta, 1.0);
    tlv.addTokenLabel(0, "VBZ", 1d);
    tlv.addTokenLabel(1, "NNP", 1d);
    tlv.addTokenLabel(2, "NNP", 1d);
    tlv.addTokenLabel(3, ",", 1d);
    tlv.addTokenLabel(4, "DT", 1d);
    tlv.addTokenLabel(5, "NN", 1d);
    tlv.addTokenLabel(6, "NN", 1d);
    tlv.addTokenLabel(7, ",", 1d);
    tlv.addTokenLabel(8, "``", 1d);
    tlv.addTokenLabel(9, "UH", 1d);
    tlv.addTokenLabel(10, "NN", 1d);
    tlv.addTokenLabel(11, ".", 1d);
    tlv.addTokenLabel(12, "''", 1d);
    TreeView parse = new TreeView(ViewNames.PARSE_STANFORD, "Test", ta, 1.0);
    String treeString = "(ROOT" + "  (SINV" + "    (VP (VBZ Says))" + "    (NP (NNP Gayle) (NNP Key))" + "    (, ,)" + "    (S" + "      (NP (DT a) (NNS mathematics))" + "      (VP (VBZ teacher) (, ,) (`` ``)" + "        (NP" + "          (INTJ (UH Hello))" + "          (NP (NN world)))))" + "    (. .) ('' '')))";
    parse.setParseTree(0, TreeParserFactory.getStringTreeParser().parse(treeString));
    SpanLabelView ner = new SpanLabelView(ViewNames.NER_CONLL, "Test", ta, 1.0);
    ner.addSpanLabel(1, 3, "PER", 1.0);
    SpanLabelView shallowParse = new SpanLabelView(ViewNames.SHALLOW_PARSE, "Test", ta, 1.0);
    shallowParse.addSpanLabel(0, 3, "NP", 1.0);
    shallowParse.addSpanLabel(4, 7, "NP", 1.0);
    shallowParse.addSpanLabel(9, 11, "NP", 1.0);
    // TODO dependency parse
    // TODO SRL view
    ta.addView(tlv.getViewName(), tlv);
    ta.addView(parse.getViewName(), parse);
    ta.addView(ner.getViewName(), ner);
    ta.addView(shallowParse.getViewName(), shallowParse);
    List<String> firstCommasRefinedLabels = Collections.singletonList("Substitute");
    List<String> secondCommasRefinedLabels = Arrays.asList("Substitute", "Quotation");
    CommaSRLSentence sentence = new CommaSRLSentence(ta, null, Arrays.asList(firstCommasRefinedLabels, secondCommasRefinedLabels));
    List<Comma> sentenceCommas = sentence.getCommas();
    commas = sentenceCommas.toArray(new Comma[sentenceCommas.size()]);
}
Also used : Comma(edu.illinois.cs.cogcomp.comma.datastructures.Comma) TokenLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView) CommaSRLSentence(edu.illinois.cs.cogcomp.comma.datastructures.CommaSRLSentence) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)

Example 20 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class NERAnnotator method addView.

/**
     * Generate the view representing the list of extracted entities and adds it the
     * {@link TextAnnotation}.
     */
@Override
public void addView(TextAnnotation ta) {
    // convert this data structure into one the NER package can deal with.
    ArrayList<LinkedVector> sentences = new ArrayList<>();
    String[] tokens = ta.getTokens();
    int[] tokenindices = new int[tokens.length];
    int tokenIndex = 0;
    int neWordIndex = 0;
    for (int i = 0; i < ta.getNumberOfSentences(); i++) {
        Sentence sentence = ta.getSentence(i);
        String[] wtoks = sentence.getTokens();
        LinkedVector words = new LinkedVector();
        for (String w : wtoks) {
            if (w.length() > 0) {
                NEWord.addTokenToSentence(words, w, "unlabeled");
                tokenindices[neWordIndex] = tokenIndex;
                neWordIndex++;
            } else {
                logger.error("Bad (zero length) token.");
            }
            tokenIndex++;
        }
        if (words.size() > 0)
            sentences.add(words);
    }
    // Do the annotation.
    Data data = new Data(new NERDocument(sentences, "input"));
    try {
        ExpressiveFeaturesAnnotator.annotate(data);
        Decoder.annotateDataBIO(data, t1, t2);
    } catch (Exception e) {
        logger.error("Cannot annotate the text, the exception was: ", e);
        return;
    }
    // now we have the parsed entities, construct the view object.
    ArrayList<LinkedVector> nerSentences = data.documents.get(0).sentences;
    SpanLabelView nerView = new SpanLabelView(getViewName(), ta);
    // the data always has a single document
    // each LinkedVector in data corresponds to a sentence.
    int tokenoffset = 0;
    for (LinkedVector vector : nerSentences) {
        boolean open = false;
        // there should be a 1:1 mapping btw sentence tokens in record and words/predictions
        // from NER.
        int startIndex = -1;
        String label = null;
        for (int j = 0; j < vector.size(); j++, tokenoffset++) {
            NEWord neWord = (NEWord) (vector.get(j));
            String prediction = neWord.neTypeLevel2;
            // inefficient, use enums, or nominalized indexes for this sort of thing.
            if (prediction.startsWith("B-")) {
                startIndex = tokenoffset;
                label = prediction.substring(2);
                open = true;
            } else if (j > 0) {
                String previous_prediction = ((NEWord) vector.get(j - 1)).neTypeLevel2;
                if (prediction.startsWith("I-") && (!previous_prediction.endsWith(prediction.substring(2)))) {
                    startIndex = tokenoffset;
                    label = prediction.substring(2);
                    open = true;
                }
            }
            if (open) {
                boolean close = false;
                if (j == vector.size() - 1) {
                    close = true;
                } else {
                    String next_prediction = ((NEWord) vector.get(j + 1)).neTypeLevel2;
                    if (next_prediction.startsWith("B-"))
                        close = true;
                    if (next_prediction.equals("O"))
                        close = true;
                    if (next_prediction.indexOf('-') > -1 && (!prediction.endsWith(next_prediction.substring(2))))
                        close = true;
                }
                if (close) {
                    int s = tokenindices[startIndex];
                    /**
                         * MS: fixed bug. Originally, e was set using tokenindices[tokenoffset], but
                         * tokenoffset can reach tokens.length) and this exceeds array length.
                         * Constituent constructor requires one-past-the-end token indexing,
                         * requiring e > s. Hence the complicated setting of endIndex/e below.
                         */
                    int endIndex = Math.min(tokenoffset + 1, tokens.length - 1);
                    int e = tokenindices[endIndex];
                    if (e <= s)
                        e = s + 1;
                    nerView.addSpanLabel(s, e, label, 1d);
                    open = false;
                }
            }
        }
    }
    ta.addView(viewName, nerView);
}
Also used : LinkedVector(edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector) ArrayList(java.util.ArrayList) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) IOException(java.io.IOException) Sentence(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Sentence)

Aggregations

SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)24 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)12 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)12 ArrayList (java.util.ArrayList)5 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)4 Test (org.junit.Test)3 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)2 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)2 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)2 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)2 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)2 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)2 Annotation (edu.stanford.nlp.pipeline.Annotation)2 CoreMap (edu.stanford.nlp.util.CoreMap)2 LinkedHashSet (java.util.LinkedHashSet)2 Properties (java.util.Properties)2 Comma (edu.illinois.cs.cogcomp.comma.datastructures.Comma)1 CommaSRLSentence (edu.illinois.cs.cogcomp.comma.datastructures.CommaSRLSentence)1 Option (edu.illinois.cs.cogcomp.core.datastructures.Option)1