use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ClauseViewGeneratorTest method testClauseViewGenerator.
// public void setUp() throws Exception {
// super.setUp();
// }
@Test
public final void testClauseViewGenerator() {
String text = "Freeport-McMoRan Inc. said it will convert its Freeport-McMoRan Energy Partners Ltd. " + "partnership into a publicly traded company through the exchange of units of the partnership " + "for common shares .";
TextAnnotation ta = TextAnnotationUtilities.createFromTokenizedString(text);
Tree<String> tree = TreeParserFactory.getStringTreeParser().parse("(S1 (S (NP-SBJ (NNP Freeport-McMoRan) (NNP Inc.)) (VP (VBD said)" + " (SBAR (-NONE- 0) (S (NP-SBJ (PRP it)) " + "(VP (MD will) (VP (VB convert) " + "(NP (PRP$ its) (NNP Freeport-McMoRan) " + " (NNP Energy) (NNPS Partners)" + " (NNP Ltd.) (NN partnership)) " + " (PP-CLR (IN into) (NP (DT a)" + " (ADJP (RB publicly)" + " (VBN traded))" + " (NN company))) " + " (PP-MNR (IN through) " + " (NP (NP (DT the) " + " (NN exchange)) " + " (PP (IN of) " + " (NP (NP (NNS units)) " + " (PP (IN of) " + " (NP (DT the) " + " (NN partnership))))) " + " (PP (IN for) (NP (JJ common) " + " (NNS shares)))))))))) (. .)))");
TreeView parse = new TreeView("", ta);
parse.setParseTree(0, tree);
ta.addView(ViewNames.PARSE_GOLD, parse);
ClauseViewGenerator clg = new ClauseViewGenerator(ViewNames.PARSE_GOLD, "clauses");
try {
ta.addView(clg);
} catch (AnnotatorException e) {
fail(e.getMessage());
}
logger.info(ta.getView("clauses").toString());
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class SimpleGazetteerAnnotatorTest method testMultiThreading.
/**
* Test method for
* {@link edu.illinois.cs.cogcomp.edison.annotators.SimpleGazetteerAnnotator#addView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)}
* .
*
* @throws URISyntaxException
* @throws IOException
* @throws AnnotatorException
*/
@Test
public void testMultiThreading() throws IOException, URISyntaxException, AnnotatorException {
final SimpleGazetteerAnnotator sga = new SimpleGazetteerAnnotator(defaultRm);
class TestThread extends Thread {
Throwable throwable;
public void run() {
long start = System.currentTimeMillis();
while (true) {
final TextAnnotation ta = tab.createTextAnnotation("I hail from the university of illinois at champaign urbana.");
try {
sga.addView(ta);
} catch (AnnotatorException e) {
throwable = e;
return;
}
SpanLabelView view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
List<Constituent> entities = view.getConstituents();
Constituent c1 = entities.get(0);
try {
assertEquals(c1.toString(), "university of illinois");
Constituent c2 = entities.get(1);
assertEquals(c2.toString(), "university of illinois at champaign urbana");
Constituent c3 = entities.get(2);
assertEquals(c3.toString(), "illinois");
Constituent c4 = entities.get(3);
assertEquals(c4.toString(), "champaign");
Constituent c5 = entities.get(4);
assertEquals(c5.toString(), "urbana");
assertEquals(c1.getLabel(), "organizations(IC)");
assertEquals(c2.getLabel(), "organizations(IC)");
assertEquals(c3.getLabel(), "places(IC)");
assertEquals(c4.getLabel(), "places(IC)");
assertEquals(c5.getLabel(), "places(IC)");
if ((System.currentTimeMillis() - start) > 10000l) {
// run for one minute.
throwable = null;
return;
}
} catch (AssertionError ae) {
throwable = ae;
ae.printStackTrace();
return;
}
}
}
}
final int numthreads = 20;
TestThread[] threads = new TestThread[numthreads];
for (int i = 0; i < numthreads; i++) {
threads[i] = new TestThread();
threads[i].start();
}
logger.info("Begin multithreaded test.");
for (int i = 0; i < numthreads; i++) {
while (true) try {
threads[i].join();
assertEquals("Exception during multithreading test : " + threads[i].throwable, threads[i].throwable, null);
break;
} catch (InterruptedException e) {
continue;
}
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class POSBaseLineCounter method buildTableHelper.
/**
* A table is built from a given source corpus file by simply counting the number of times that
* each form-POS association appear in a source corpus.
*
* @param fileName file name of the source corpus
* @throws Exception
**/
private void buildTableHelper(String fileName) {
PennTreebankPOSReader reader = new PennTreebankPOSReader(this.corpusName);
reader.readFile(fileName);
List<TextAnnotation> tas = reader.getTextAnnotations();
for (TextAnnotation ta : tas) {
for (int tokenId = 0; tokenId < ta.size(); tokenId++) {
count(ta.getToken(tokenId), ((SpanLabelView) ta.getView(ViewNames.POS)).getLabel(tokenId));
}
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class POSMikheevCounter method buildTableHelper.
/**
* A table is built from a given source corpus file by counting the number of times that each
* suffix-POS association in a source corpus.
*
* @param fileName file name of the source corpus
* @throws Exception
**/
private void buildTableHelper(String fileName) throws Exception {
PennTreebankPOSReader reader = new PennTreebankPOSReader(this.corpusName);
reader.readFile(fileName);
List<TextAnnotation> tas = reader.getTextAnnotations();
for (TextAnnotation ta : tas) {
for (int tokenId = 0; tokenId < ta.size(); tokenId++) {
String form = ta.getToken(tokenId);
String tag = ((SpanLabelView) ta.getView(ViewNames.POS)).getLabel(tokenId);
if (form.length() >= 5) {
boolean allLetters = true;
for (int i = form.length() - 3; i < form.length() && allLetters; ++i) allLetters = Character.isLetter(form.charAt(i));
if (allLetters) {
// Word w = (Word) example;
HashMap<String, TreeMap<String, Integer>> t = null;
if (WordHelpers.isCapitalized(ta, tokenId)) {
int headOfSentence = ta.getSentence(ta.getSentenceId(tokenId)).getStartSpan();
if (tokenId == headOfSentence)
t = firstCapitalized;
else
t = notFirstCapitalized;
} else {
if (form.contains("-"))
return;
t = table;
}
form = form.toLowerCase();
count(t, form.substring(form.length() - 3), tag);
if (form.length() >= 6 && Character.isLetter(form.charAt(form.length() - 4)))
count(t, form.substring(form.length() - 4), tag);
}
}
}
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class TestWordFeatureFactory method testBrownFeatures.
@Test
public final void testBrownFeatures() throws EdisonException {
logger.info("\tTesting Brown cluster features");
WordFeatureExtractor brownFeatureGenerator = WordFeatureExtractorFactory.getBrownFeatureGenerator("", "brownBllipClusters", new int[] { 4, 5 });
for (TextAnnotation ta : tas) {
runTest(ta, brownFeatureGenerator);
}
}
Aggregations