Search in sources :

Example 96 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class ViewConstructorPipelineTest method main.

public static void main(String[] args) {
    String input = null;
    try {
        input = LineIO.slurp(textFile);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
        System.exit(-1);
    }
    System.out.println("input from " + textFile + " is " + input.length() + " characters long.");
    AnnotatorService as = null;
    try {
        as = PipelineFactory.buildPipeline(ViewNames.POS);
    } catch (IOException | AnnotatorException e) {
        e.printStackTrace();
        System.exit(-1);
    }
    TextAnnotation ta = null;
    try {
        ta = as.createAnnotatedTextAnnotation("test", "test", input);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        System.exit(-1);
    }
    System.out.println("found " + ta.getView(ViewNames.POS).getConstituents() + " POS constituents.");
}
Also used : AnnotatorService(edu.illinois.cs.cogcomp.annotation.AnnotatorService) FileNotFoundException(java.io.FileNotFoundException) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) IOException(java.io.IOException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Example 97 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class NERAnnotatorTest method testTokenization.

/**
     * test tokenization produces the correct number of constinuents.
     */
@Test
public void testTokenization() {
    TextAnnotation ta = tab.createTextAnnotation(TOKEN_TEST);
    View nerView = null;
    try {
        nerView = getView(ta);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertEquals(nerView.getConstituents().size(), 2);
    String tokTestB = "Grigory Pasko, crusading Russian journalist who documented Russian Navy's mishandling of " + "nuclear waste, is released on parole after serving two-thirds of his four-year prison sentence.";
    ta = tab.createTextAnnotation(tokTestB);
    try {
        nerView = getView(ta);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertEquals(3, nerView.getNumberOfConstituents());
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Test(org.junit.Test)

Example 98 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class NERAnnotatorTest method evaluatePerformance.

/**
     * Make sure it runs in reasonable time. We will test the performance of the machine we run on
     * to get a better measure.
     */
// @Test
public void evaluatePerformance() {
    // now do performance.
    final int SIZE = 100;
    // make sure any lazy loading is done outside the performance test.
    TextAnnotation tat = tab.createTextAnnotation(TEST_INPUT);
    try {
        getView(tat);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    long expectedPerformance = this.measureMachinePerformance();
    logger.info("Expect " + expectedPerformance);
    {
        TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
        View view = null;
        try {
            view = getView(ta);
        } catch (AnnotatorException e) {
            e.printStackTrace();
            fail(e.getMessage());
        }
        assertTrue(view != null);
    }
    // start the performance test.
    long start = System.currentTimeMillis();
    for (int i = 0; i < SIZE; i++) {
        TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
        View view = null;
        try {
            view = getView(ta);
        } catch (AnnotatorException e) {
            e.printStackTrace();
            fail(e.getMessage());
        }
        assertTrue(view != null);
        for (Constituent c : view.getConstituents()) {
            assertTrue("No entity named \"" + c.toString() + "\"", entities.contains(c.toString()));
        }
    }
    start = System.currentTimeMillis() - start;
    start /= SIZE;
    System.out.printf("For text size = %d, average NER runtime = %d, normalized = %f", TEST_INPUT.length(), start, (double) start / (double) expectedPerformance);
    assertTrue(start <= expectedPerformance);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 99 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class NerInitTest method testInit.

@Test
public void testInit() {
    Properties props = new Properties();
    props.setProperty(NerBaseConfigurator.GAZETTEER_FEATURES, "0");
    props.setProperty(NerBaseConfigurator.BROWN_CLUSTER_PATHS, "0");
    ResourceManager rm = (new NerBaseConfigurator()).getConfig(new ResourceManager(props));
    NERAnnotator ner = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
    assertNotNull(ner);
    TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = tab.createTextAnnotation(TESTSTR);
    try {
        ner.getView(ta);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assert (ta.hasView(ViewNames.NER_CONLL));
    assertEquals(ta.getView(ViewNames.NER_CONLL).getConstituents().size(), 2);
}
Also used : NerBaseConfigurator(edu.illinois.cs.cogcomp.ner.config.NerBaseConfigurator) TextAnnotationBuilder(edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) Properties(java.util.Properties) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Test(org.junit.Test)

Example 100 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class PrepSRLDataReader method readData.

@Override
public List<TextAnnotation> readData() {
    lazyReadMaps();
    List<TextAnnotation> textAnnotations = new ArrayList<>();
    String dataDir = file + File.separator;
    dataDir += (corpusName.equals("train") ? semevalTrainDataDirectory : semevalTestDataDirectory);
    for (String currentFile : getFiles(dataDir)) {
        NodeList instanceNodeList;
        try {
            // read the xml
            Document dom = XMLUtils.getXMLDOM(dataDir + File.separator + currentFile + ".xml");
            Element docElem = dom.getDocumentElement();
            instanceNodeList = docElem.getElementsByTagName("instance");
        } catch (Exception ex) {
            System.err.println("Unable to get the DOM" + ex);
            return null;
        }
        // read the key file
        if (corpusName.equals("test")) {
            String keyFileName;
            int start = currentFile.indexOf('-') + 1;
            int end = currentFile.indexOf('.');
            keyFileName = currentFile.substring(start, end);
            try {
                keys = new Hashtable<>();
                LineIO.read(file + File.separator + semevalKeyDirectory + File.separator + keyFileName + ".key", new ITransformer<String, Void>() {

                    public Void transform(String input) {
                        String[] parts = input.split(" ");
                        keys.put(parts[1], parts[2]);
                        return null;
                    }
                });
            } catch (FileNotFoundException e) {
                System.err.println("File " + semevalKeyDirectory + File.separator + keyFileName + ".key not found" + e);
                return null;
            }
        }
        while (currentNodeId < instanceNodeList.getLength()) {
            TextAnnotation ta = makeNewTextAnnotation((Element) instanceNodeList.item(currentNodeId));
            if (ta == null) {
                logger.error("{} returned null.", instanceNodeList.item(currentNodeId));
                currentNodeId++;
                continue;
            }
            textAnnotations.add(ta);
            currentNodeId++;
        }
    }
    return consolidate(textAnnotations);
}
Also used : NodeList(org.w3c.dom.NodeList) Element(org.w3c.dom.Element) FileNotFoundException(java.io.FileNotFoundException) Document(org.w3c.dom.Document) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) FileNotFoundException(java.io.FileNotFoundException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Aggregations

TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)218 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)95 Test (org.junit.Test)65 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)49 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)48 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)29 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)28 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)25 ArrayList (java.util.ArrayList)23 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)22 LinkedHashSet (java.util.LinkedHashSet)21 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)16 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)16 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)16 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)14 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)12 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)11