Search in sources :

Example 11 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class VerbSenseLabeler method initializeDummySentenceVerb.

protected TextAnnotation initializeDummySentenceVerb() {
    List<String[]> listOfTokens = new ArrayList<>();
    listOfTokens.add(new String[] { "I", "do", "." });
    TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens("", "", listOfTokens);
    TokenLabelView tlv = new TokenLabelView(ViewNames.POS, "Test", ta, 1.0);
    tlv.addTokenLabel(0, "PRP", 1d);
    tlv.addTokenLabel(1, "VBP", 1d);
    tlv.addTokenLabel(2, ".", 1d);
    ta.addView(ViewNames.POS, tlv);
    ta.addView(ViewNames.NER, new SpanLabelView(ViewNames.NER, "test", ta, 1d));
    SpanLabelView chunks = new SpanLabelView(ViewNames.SHALLOW_PARSE, "test", ta, 1d);
    chunks.addSpanLabel(0, 1, "NP", 1d);
    chunks.addSpanLabel(1, 2, "VP", 1d);
    ta.addView(ViewNames.SHALLOW_PARSE, chunks);
    TokenLabelView view = new TokenLabelView(ViewNames.LEMMA, "test", ta, 1d);
    view.addTokenLabel(0, "i", 1d);
    view.addTokenLabel(1, "do", 1d);
    view.addTokenLabel(2, ".", 1d);
    ta.addView(ViewNames.LEMMA, view);
    return ta;
}
Also used : ArrayList(java.util.ArrayList) TokenLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)

Example 12 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class TokenizerTextAnnotationBuilder method buildTextAnnotation.

/**
     * instantiate a TextAnnotation using a SentenceViewGenerator to create an explicit Sentence
     * view
     *
     * @param corpusId a field in TextAnnotation that can be used by the client for book-keeping
     *        (e.g. track texts from the same corpus)
     * @param textId a field in TextAnnotation that can be used by the client for book-keeping (e.g.
     *        identify a specific document by some reference string)
     * @param text the plain English text to process
     * @param tokens the token Strings, in order from original text
     * @param sentenceEndPositions token offsets of sentence ends (one-past-the-end indexing)
     * @param sentenceViewGenerator the name of the source of the sentence split
     * @param sentenceViewScore a score that may indicate how reliable the sentence split
     *        information is
     * @return a TextAnnotation object with {@link ViewNames#TOKENS} and {@link ViewNames#SENTENCE}
     *         views.
     */
public static TextAnnotation buildTextAnnotation(String corpusId, String textId, String text, String[] tokens, int[] sentenceEndPositions, String sentenceViewGenerator, double sentenceViewScore) {
    if (sentenceEndPositions[sentenceEndPositions.length - 1] != tokens.length)
        throw new IllegalArgumentException("Invalid sentence boundary. Last element should be the number of tokens");
    IntPair[] offsets = TokenUtils.getTokenOffsets(text, tokens);
    assert offsets.length == tokens.length;
    TextAnnotation ta = new TextAnnotation(corpusId, textId, text, offsets, tokens, sentenceEndPositions);
    SpanLabelView view = new SpanLabelView(ViewNames.SENTENCE, sentenceViewGenerator, ta, sentenceViewScore);
    int start = 0;
    for (int s : sentenceEndPositions) {
        view.addSpanLabel(start, s, ViewNames.SENTENCE, 1d);
        start = s;
    }
    ta.addView(ViewNames.SENTENCE, view);
    SpanLabelView tokView = new SpanLabelView(ViewNames.TOKENS, sentenceViewGenerator, ta, sentenceViewScore);
    for (int tokIndex = 0; tokIndex < tokens.length; ++tokIndex) {
        tokView.addSpanLabel(tokIndex, tokIndex + 1, tokens[tokIndex], 1d);
    }
    ta.addView(ViewNames.TOKENS, tokView);
    return ta;
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair)

Example 13 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class TokenizerUtilities method addTokenView.

public static SpanLabelView addTokenView(TextAnnotation input, Tokenizer tokenizer, String source) {
    SentenceSplitter splitter = new SentenceSplitter(new String[] { input.getText() });
    Sentence[] sentences = splitter.splitAll();
    List<String> tokens = new ArrayList<>();
    List<IntPair> charOffsets = new ArrayList<>();
    List<IntPair> sentenceSpans = new ArrayList<>();
    int start = 0;
    for (Sentence s : sentences) {
        Pair<String[], IntPair[]> toks = tokenizer.tokenizeSentence(s.text);
        for (int i = 0; i < toks.getFirst().length; i++) {
            tokens.add(toks.getFirst()[i]);
            IntPair charOffset = toks.getSecond()[i];
            IntPair translatedCharOffset = new IntPair(charOffset.getFirst() + s.start, charOffset.getSecond() + s.start);
            charOffsets.add(translatedCharOffset);
        }
        sentenceSpans.add(new IntPair(start, tokens.size()));
        start = tokens.size();
    }
    if (tokens.size() != charOffsets.size())
        throw new IllegalArgumentException("tokens (" + tokens.size() + ") must equal charOffsets (" + charOffsets.size() + "), but does not.");
    SpanLabelView tokView = new SpanLabelView(ViewNames.TOKENS, source, input, 1.0);
    SpanLabelView view = new SpanLabelView(ViewNames.SENTENCE, source, input, 1.0);
    for (int i = 0; i < tokens.size(); ++i) {
        tokView.addSpanLabel(i, i + 1, tokens.get(i), 1d);
    }
    for (IntPair span : sentenceSpans) {
        view.addSpanLabel(span.getFirst(), span.getSecond(), ViewNames.SENTENCE, 1d);
    }
    return tokView;
}
Also used : SentenceSplitter(edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter) ArrayList(java.util.ArrayList) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Sentence(edu.illinois.cs.cogcomp.lbjava.nlp.Sentence) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair)

Example 14 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class SimpleGazetteerAnnotatorTest method testAddView.

/**
     * Test method for
     * {@link edu.illinois.cs.cogcomp.edison.annotators.SimpleGazetteerAnnotator#addView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)}
     * .
     * 
     * @throws URISyntaxException
     * @throws IOException
     * @throws AnnotatorException
     */
@Test
public void testAddView() throws IOException, URISyntaxException, AnnotatorException {
    SimpleGazetteerAnnotator sga = new SimpleGazetteerAnnotator(defaultRm);
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionaries.size() == 1);
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionariesIgnoreCase.size() == 1);
    TextAnnotation ta = tab.createTextAnnotation("I hail from the university of illinois at champaign urbana.");
    sga.addView(ta);
    SpanLabelView view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
    List<Constituent> entities = view.getConstituents();
    Constituent c1 = entities.get(0);
    assertEquals(c1.toString(), "university of illinois");
    Constituent c2 = entities.get(1);
    assertEquals(c2.toString(), "university of illinois at champaign urbana");
    Constituent c3 = entities.get(2);
    assertEquals(c3.toString(), "illinois");
    Constituent c4 = entities.get(3);
    assertEquals(c4.toString(), "champaign");
    Constituent c5 = entities.get(4);
    assertEquals(c5.toString(), "urbana");
    assertEquals(c1.getLabel(), "organizations(IC)");
    assertEquals(c2.getLabel(), "organizations(IC)");
    assertEquals(c3.getLabel(), "places(IC)");
    assertEquals(c4.getLabel(), "places(IC)");
    assertEquals(c5.getLabel(), "places(IC)");
    Properties props = new Properties();
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.PHRASE_LENGTH.key, "4");
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.PATH_TO_DICTIONARIES.key, "/testgazetteers/");
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, SimpleGazetteerAnnotatorConfigurator.FALSE);
    sga = new SimpleGazetteerAnnotator(new ResourceManager(props));
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionaries.size() == 1);
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionariesIgnoreCase.size() == 1);
    ta = tab.createTextAnnotation("I hail from the university of illinois at champaign urbana.");
    sga.addView(ta);
    view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
    entities = view.getConstituents();
    c1 = entities.get(0);
    assertEquals(c1.toString(), "university of illinois");
    c2 = entities.get(1);
    assertEquals(c2.toString(), "illinois");
    c3 = entities.get(2);
    assertEquals(c3.toString(), "champaign");
    c4 = entities.get(3);
    assertEquals(c4.toString(), "urbana");
    assertEquals(c1.getLabel(), "organizations(IC)");
    assertEquals(c2.getLabel(), "places(IC)");
    assertEquals(c3.getLabel(), "places(IC)");
    assertEquals(c4.getLabel(), "places(IC)");
    ta = tab.createTextAnnotation("I hail from the University of Illinois at champaign urbana.");
    sga.addView(ta);
    view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
    entities = view.getConstituents();
    c1 = entities.get(0);
    assertEquals(c1.toString(), "University of Illinois");
    assertEquals(c1.getLabel(), "organizations");
    c2 = entities.get(1);
    assertEquals(c1.toString(), "University of Illinois");
    assertEquals(c1.getLabel(), "organizations");
}
Also used : ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Properties(java.util.Properties) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Example 15 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class TestBrownClusterViewGenerator method testCharniakParseViewGenerator.

/**
     * Test the configuration of normalizing tokens in the brown clusters
     */
@Test
public final void testCharniakParseViewGenerator() {
    String sentence = "a test .";
    TextAnnotation ta = TextAnnotationUtilities.createFromTokenizedString(sentence);
    // The default configuration: do normalization
    BrownClusterViewGenerator viewGenerator = null;
    try {
        viewGenerator = new BrownClusterViewGenerator(BrownClusterViewGenerator.file100, BrownClusterViewGenerator.file100);
        viewGenerator.addView(ta);
    } catch (Exception e) {
        e.printStackTrace();
    }
    SpanLabelView view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
    assertEquals("a", view.getConstituents().get(0).getSurfaceForm());
    assertEquals("111011111", view.getConstituents().get(0).getLabel());
    assertEquals("a", view.getConstituents().get(1).getSurfaceForm());
    assertEquals("10010", view.getConstituents().get(1).getLabel());
    assertEquals("test", view.getConstituents().get(2).getSurfaceForm());
    assertEquals("001110", view.getConstituents().get(2).getLabel());
    // Don't normalize tokens in the brown clusters
    Properties props = new Properties();
    props.setProperty(BrownClusterViewGeneratorConfigurator.NORMALIZE_TOKEN.key, Configurator.FALSE);
    ResourceManager rm = new ResourceManager(props);
    try {
        viewGenerator = new BrownClusterViewGenerator(BrownClusterViewGenerator.file100, BrownClusterViewGenerator.file100, rm);
        viewGenerator.addView(ta);
    } catch (Exception e) {
        e.printStackTrace();
    }
    view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
    assertEquals("a", view.getConstituents().get(0).getSurfaceForm());
    assertEquals("10010", view.getConstituents().get(0).getLabel());
    assertEquals("test", view.getConstituents().get(1).getSurfaceForm());
    assertEquals("001110", view.getConstituents().get(1).getLabel());
}
Also used : ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Properties(java.util.Properties) Test(org.junit.Test)

Aggregations

SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)24 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)12 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)12 ArrayList (java.util.ArrayList)5 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)4 Test (org.junit.Test)3 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)2 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)2 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)2 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)2 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)2 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)2 Annotation (edu.stanford.nlp.pipeline.Annotation)2 CoreMap (edu.stanford.nlp.util.CoreMap)2 LinkedHashSet (java.util.LinkedHashSet)2 Properties (java.util.Properties)2 Comma (edu.illinois.cs.cogcomp.comma.datastructures.Comma)1 CommaSRLSentence (edu.illinois.cs.cogcomp.comma.datastructures.CommaSRLSentence)1 Option (edu.illinois.cs.cogcomp.core.datastructures.Option)1