Search in sources :

Example 61 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class FeatureManifestTest method testCreateFex.

@Test
public void testCreateFex() throws Exception {
    FeatureManifest featureManifest = new FeatureManifest(file);
    featureManifest.useCompressedName();
    featureManifest.setVariable("*default-parser*", ViewNames.PARSE_STANFORD);
    FeatureExtractor fex = featureManifest.createFex();
    Constituent c = tas.get(0).getView(ViewNames.TOKENS).getConstituents().get(0);
    assertEquals("My", c.getSurfaceForm());
    Set<Feature> features = fex.getFeatures(c);
    Iterator<Feature> iterator = features.iterator();
    Feature feature = iterator.next();
    assertEquals("f:#ctxt#:context1::#wd:mother-in-law", feature.getName());
}
Also used : FeatureExtractor(edu.illinois.cs.cogcomp.edison.features.FeatureExtractor) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Example 62 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class CreateTestFeaturesResource method addFeatCollection.

private void addFeatCollection() throws EdisonException, IOException {
    Map<Integer, String> map = new HashMap<>();
    FeatureCollection featureCollection = new FeatureCollection("features");
    featureCollection.addFeatureExtractor(WordFeatureExtractorFactory.conflatedPOS);
    featureCollection.addFeatureExtractor(WordFeatureExtractorFactory.gerundMarker);
    featureCollection.addFeatureExtractor(WordFeatureExtractorFactory.nominalizationMarker);
    for (TextAnnotation ta : tas) {
        for (int tokenId = 0; tokenId < ta.size(); tokenId++) {
            Constituent c = new Constituent("", "", ta, tokenId, tokenId + 1);
            Set<Feature> features = featureCollection.getFeatures(c);
            if (features.size() > 0) {
                String id = ta.getTokenizedText() + ":" + tokenId;
                map.put(id.hashCode(), features.toString());
            }
        }
    }
    IOUtils.writeObject(map, FEATURE_COLLECTION_FILE);
}
Also used : HashMap(java.util.HashMap) FeatureCollection(edu.illinois.cs.cogcomp.edison.features.FeatureCollection) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 63 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class Main method produceBracketedAnnotations.

/**
     * Render a string representing the original data with embedded labels in the text.
     * 
     * @param nerView the NER label view.
     * @param ta the text annotation.
     * @return the original text marked up with the annotations.
     */
private String produceBracketedAnnotations(View nerView, TextAnnotation ta) {
    StringBuilder sb = new StringBuilder();
    List<Constituent> constituents = new ArrayList<>(nerView.getConstituents());
    Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator);
    String text = ta.getText();
    int where = 0;
    for (Constituent c : constituents) {
        // append everything up to this token.
        int start = c.getStartCharOffset();
        sb.append(text.substring(where, start));
        // append the bracketed label.
        sb.append('[');
        sb.append(c.getLabel());
        sb.append(' ');
        sb.append(c.getTokenizedSurfaceForm());
        sb.append(" ] ");
        where = c.getEndCharOffset();
    }
    if (where < text.length())
        sb.append(text.substring(where, text.length()));
    return sb.toString();
}
Also used : ArrayList(java.util.ArrayList) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 64 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class Main method produceCoNLL2002Annotations.

/**
     * Render a string representing the original data with embedded labels in the text.
     * 
     * @param nerView the NER label view.
     * @param ta the text annotation.
     * @return the original text marked up with the annotations.
     */
private String produceCoNLL2002Annotations(View nerView, TextAnnotation ta) {
    StringBuilder sb = new StringBuilder();
    // get the tokens.
    List<Constituent> tokens = new ArrayList<>(ta.getView(ViewNames.TOKENS).getConstituents());
    Collections.sort(tokens, TextAnnotationUtilities.constituentStartEndComparator);
    // get the sentences.
    List<Constituent> sentences = new ArrayList<>(ta.getView(ViewNames.SENTENCE).getConstituents());
    Collections.sort(sentences, TextAnnotationUtilities.constituentStartEndComparator);
    // get the entities
    List<Constituent> entities = new ArrayList<>(nerView.getConstituents());
    Collections.sort(entities, TextAnnotationUtilities.constituentStartEndComparator);
    int entityindx = 0;
    int sentenceindex = 0;
    int sentenceEndIndex = sentences.get(sentenceindex).getEndCharOffset();
    for (Constituent token : tokens) {
        // make sure we have the next entity.
        for (; entityindx < entities.size(); entityindx++) {
            Constituent entity = entities.get(entityindx);
            if (token.getStartCharOffset() <= entity.getStartCharOffset())
                break;
            else if (token.getEndCharOffset() <= entity.getEndCharOffset())
                // we are inside of the entity.
                break;
        }
        String sf = token.getSurfaceForm();
        sb.append(sf);
        sb.append(' ');
        if (entityindx < entities.size()) {
            Constituent entity = entities.get(entityindx);
            if (token.getStartCharOffset() == entity.getStartCharOffset()) {
                if (token.getEndCharOffset() == entity.getEndCharOffset()) {
                    sb.append("B-" + entity.getLabel());
                } else if (token.getEndCharOffset() > entity.getEndCharOffset()) {
                    sb.append("B-" + entity.getLabel());
                    System.err.println("Odd. There is an entity enclosed within a single token!");
                } else {
                    sb.append("B-" + entity.getLabel());
                }
            } else if (token.getStartCharOffset() > entity.getStartCharOffset()) {
                if (token.getEndCharOffset() <= entity.getEndCharOffset()) {
                    sb.append("I-" + entity.getLabel());
                } else {
                    sb.append('O');
                }
            } else {
                sb.append('O');
            }
        } else {
            sb.append('O');
        }
        sb.append('\n');
        if (token.getEndCharOffset() >= sentenceEndIndex) {
            sb.append('\n');
            if (sentenceindex < (sentences.size() - 1))
                sentenceindex++;
            sentenceEndIndex = sentences.get(sentenceindex).getEndCharOffset();
        }
    }
    return sb.toString();
}
Also used : ArrayList(java.util.ArrayList) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 65 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class LemmatizerTATest method testCreateTextAnnotationLemmaView.

@Test
public void testCreateTextAnnotationLemmaView() {
    View lemmaView = null;
    TextAnnotation ta = inputTa;
    try {
        lemmaView = lem.createLemmaView(ta);
    } catch (IOException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    boolean isTested = false;
    if (null != lemmaView) {
        List<Constituent> spans = lemmaView.getConstituents();
        printConstituents(System.out, spans);
        // orig 'The'
        String the = spans.get(0).getLabel();
        // orig 'men'
        String CIA = spans.get(1).getLabel();
        // orig 'have'
        String thought = spans.get(2).getLabel();
        // orig 'had'
        String had = spans.get(6).getLabel();
        // orig 'examinations'
        String were = spans.get(15).getLabel();
        assertEquals(the, "the");
        assertEquals(CIA, "cia");
        assertEquals(thought, "think");
        assertEquals(had, "have");
        assertEquals(were, "be");
        isTested = true;
    }
    assertTrue(isTested);
}
Also used : IOException(java.io.IOException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Aggregations

Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)176 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)95 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)51 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)44 Test (org.junit.Test)39 ArrayList (java.util.ArrayList)29 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)25 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)24 LinkedHashSet (java.util.LinkedHashSet)22 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)20 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)20 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)17 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)14 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)13 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)12 HashSet (java.util.HashSet)12 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)11