use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class FeatureManifestTest method testCreateFex.
@Test
public void testCreateFex() throws Exception {
FeatureManifest featureManifest = new FeatureManifest(file);
featureManifest.useCompressedName();
featureManifest.setVariable("*default-parser*", ViewNames.PARSE_STANFORD);
FeatureExtractor fex = featureManifest.createFex();
Constituent c = tas.get(0).getView(ViewNames.TOKENS).getConstituents().get(0);
assertEquals("My", c.getSurfaceForm());
Set<Feature> features = fex.getFeatures(c);
Iterator<Feature> iterator = features.iterator();
Feature feature = iterator.next();
assertEquals("f:#ctxt#:context1::#wd:mother-in-law", feature.getName());
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class CreateTestFeaturesResource method addFeatCollection.
private void addFeatCollection() throws EdisonException, IOException {
Map<Integer, String> map = new HashMap<>();
FeatureCollection featureCollection = new FeatureCollection("features");
featureCollection.addFeatureExtractor(WordFeatureExtractorFactory.conflatedPOS);
featureCollection.addFeatureExtractor(WordFeatureExtractorFactory.gerundMarker);
featureCollection.addFeatureExtractor(WordFeatureExtractorFactory.nominalizationMarker);
for (TextAnnotation ta : tas) {
for (int tokenId = 0; tokenId < ta.size(); tokenId++) {
Constituent c = new Constituent("", "", ta, tokenId, tokenId + 1);
Set<Feature> features = featureCollection.getFeatures(c);
if (features.size() > 0) {
String id = ta.getTokenizedText() + ":" + tokenId;
map.put(id.hashCode(), features.toString());
}
}
}
IOUtils.writeObject(map, FEATURE_COLLECTION_FILE);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class Main method produceBracketedAnnotations.
/**
* Render a string representing the original data with embedded labels in the text.
*
* @param nerView the NER label view.
* @param ta the text annotation.
* @return the original text marked up with the annotations.
*/
private String produceBracketedAnnotations(View nerView, TextAnnotation ta) {
StringBuilder sb = new StringBuilder();
List<Constituent> constituents = new ArrayList<>(nerView.getConstituents());
Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator);
String text = ta.getText();
int where = 0;
for (Constituent c : constituents) {
// append everything up to this token.
int start = c.getStartCharOffset();
sb.append(text.substring(where, start));
// append the bracketed label.
sb.append('[');
sb.append(c.getLabel());
sb.append(' ');
sb.append(c.getTokenizedSurfaceForm());
sb.append(" ] ");
where = c.getEndCharOffset();
}
if (where < text.length())
sb.append(text.substring(where, text.length()));
return sb.toString();
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class Main method produceCoNLL2002Annotations.
/**
* Render a string representing the original data with embedded labels in the text.
*
* @param nerView the NER label view.
* @param ta the text annotation.
* @return the original text marked up with the annotations.
*/
private String produceCoNLL2002Annotations(View nerView, TextAnnotation ta) {
StringBuilder sb = new StringBuilder();
// get the tokens.
List<Constituent> tokens = new ArrayList<>(ta.getView(ViewNames.TOKENS).getConstituents());
Collections.sort(tokens, TextAnnotationUtilities.constituentStartEndComparator);
// get the sentences.
List<Constituent> sentences = new ArrayList<>(ta.getView(ViewNames.SENTENCE).getConstituents());
Collections.sort(sentences, TextAnnotationUtilities.constituentStartEndComparator);
// get the entities
List<Constituent> entities = new ArrayList<>(nerView.getConstituents());
Collections.sort(entities, TextAnnotationUtilities.constituentStartEndComparator);
int entityindx = 0;
int sentenceindex = 0;
int sentenceEndIndex = sentences.get(sentenceindex).getEndCharOffset();
for (Constituent token : tokens) {
// make sure we have the next entity.
for (; entityindx < entities.size(); entityindx++) {
Constituent entity = entities.get(entityindx);
if (token.getStartCharOffset() <= entity.getStartCharOffset())
break;
else if (token.getEndCharOffset() <= entity.getEndCharOffset())
// we are inside of the entity.
break;
}
String sf = token.getSurfaceForm();
sb.append(sf);
sb.append(' ');
if (entityindx < entities.size()) {
Constituent entity = entities.get(entityindx);
if (token.getStartCharOffset() == entity.getStartCharOffset()) {
if (token.getEndCharOffset() == entity.getEndCharOffset()) {
sb.append("B-" + entity.getLabel());
} else if (token.getEndCharOffset() > entity.getEndCharOffset()) {
sb.append("B-" + entity.getLabel());
System.err.println("Odd. There is an entity enclosed within a single token!");
} else {
sb.append("B-" + entity.getLabel());
}
} else if (token.getStartCharOffset() > entity.getStartCharOffset()) {
if (token.getEndCharOffset() <= entity.getEndCharOffset()) {
sb.append("I-" + entity.getLabel());
} else {
sb.append('O');
}
} else {
sb.append('O');
}
} else {
sb.append('O');
}
sb.append('\n');
if (token.getEndCharOffset() >= sentenceEndIndex) {
sb.append('\n');
if (sentenceindex < (sentences.size() - 1))
sentenceindex++;
sentenceEndIndex = sentences.get(sentenceindex).getEndCharOffset();
}
}
return sb.toString();
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class LemmatizerTATest method testCreateTextAnnotationLemmaView.
@Test
public void testCreateTextAnnotationLemmaView() {
View lemmaView = null;
TextAnnotation ta = inputTa;
try {
lemmaView = lem.createLemmaView(ta);
} catch (IOException e) {
e.printStackTrace();
fail(e.getMessage());
}
boolean isTested = false;
if (null != lemmaView) {
List<Constituent> spans = lemmaView.getConstituents();
printConstituents(System.out, spans);
// orig 'The'
String the = spans.get(0).getLabel();
// orig 'men'
String CIA = spans.get(1).getLabel();
// orig 'have'
String thought = spans.get(2).getLabel();
// orig 'had'
String had = spans.get(6).getLabel();
// orig 'examinations'
String were = spans.get(15).getLabel();
assertEquals(the, "the");
assertEquals(CIA, "cia");
assertEquals(thought, "think");
assertEquals(had, "have");
assertEquals(were, "be");
isTested = true;
}
assertTrue(isTested);
}
Aggregations