use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class ColumnFormatReader method outputToColumnFormatFile.
public static void outputToColumnFormatFile(TextAnnotation ta, String viewName, String outputFilePath) {
List<Integer> sentence_ends = new ArrayList<Integer>();
for (int i = 0; i < ta.getNumberOfSentences(); i++) {
sentence_ends.add(ta.getSentence(i).getEndSpan());
}
String outputContent = "";
View tokenView = ta.getView(ViewNames.TOKENS);
View constituentView = ta.getView(viewName);
for (Constituent token : tokenView) {
String consTag = "O";
String xh = "x";
String zh = "0";
String oh = "O";
List<Constituent> constituentList = constituentView.getConstituentsCoveringToken(token.getStartSpan());
if (constituentList.size() > 0) {
Constituent hit = constituentList.get(0);
if (hit.getStartSpan() == token.getStartSpan()) {
consTag = "B-" + hit.getLabel();
} else {
consTag = "I-" + hit.getLabel();
}
}
outputContent += consTag + "\t" + zh + "\t" + token.getStartSpan() + "\t" + oh + "\t" + oh + "\t" + token.toString() + "\t" + xh + "\t" + xh + "\t" + zh + "\n";
if (sentence_ends.contains(token.getStartSpan())) {
outputContent += "\n";
}
}
try {
FileOutputStream out = null;
out = new FileOutputStream(outputFilePath);
out.write(outputContent.getBytes());
out.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class ExtentTester method getFullMention.
/**
* Gets the full mention of the given head
* @param classifier The extent classifier
* @param head The head Constituent
* @param gazetteers gazetteers
* @param brownClusters brownclusters
* @param wordnet wordnet
* @return A Constituent of a full mention (extent included)
* @Note The returned Constituent has Attributes "EntityHeadStartSpan" and "EntityHeadEndSpan"
*/
public static Constituent getFullMention(extent_classifier classifier, Constituent head, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet) {
addHeadAttributes(head, gazetteers, brownClusters, wordnet);
View tokenView = head.getTextAnnotation().getView(ViewNames.TOKENS);
int leftIdx = head.getStartSpan() - 1;
while (leftIdx >= tokenView.getStartSpan()) {
Constituent cur = tokenView.getConstituentsCoveringToken(leftIdx).get(0);
addExtentAttributes(cur, gazetteers, brownClusters, wordnet);
Relation candidate = new Relation("UNKNOWN", cur, head, 1.0f);
String prediction = classifier.discreteValue(candidate);
if (prediction.equals("false")) {
leftIdx++;
break;
}
leftIdx--;
}
if (leftIdx < tokenView.getStartSpan()) {
leftIdx = tokenView.getStartSpan();
}
int rightIdx = head.getEndSpan();
while (rightIdx < tokenView.getEndSpan()) {
Constituent cur = tokenView.getConstituentsCoveringToken(rightIdx).get(0);
addExtentAttributes(cur, gazetteers, brownClusters, wordnet);
Relation candidate = new Relation("UNKNOWN", cur, head, 1.0f);
String prediction = classifier.discreteValue(candidate);
if (prediction.equals("false")) {
rightIdx--;
break;
}
rightIdx++;
}
if (rightIdx >= tokenView.getEndSpan()) {
rightIdx = tokenView.getEndSpan() - 1;
}
Constituent fullMention = new Constituent(head.getLabel(), 1.0f, ViewNames.MENTION, head.getTextAnnotation(), leftIdx, rightIdx + 1);
fullMention.addAttribute("EntityHeadStartSpan", Integer.toString(head.getStartSpan()));
fullMention.addAttribute("EntityHeadEndSpan", Integer.toString(head.getEndSpan()));
fullMention.addAttribute("EntityType", head.getAttribute("EntityType"));
fullMention.addAttribute("EntityMentionType", head.getAttribute("EntityMentionType"));
return fullMention;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class LemmatizerTATest method testCreateTextAnnotationLemmaView.
@Test
public void testCreateTextAnnotationLemmaView() {
View lemmaView = null;
TextAnnotation ta = inputTa;
try {
lemmaView = lem.createLemmaView(ta);
} catch (IOException e) {
e.printStackTrace();
fail(e.getMessage());
}
boolean isTested = false;
if (null != lemmaView) {
List<Constituent> spans = lemmaView.getConstituents();
printConstituents(System.out, spans);
// orig 'The'
String the = spans.get(0).getLabel();
// orig 'men'
String CIA = spans.get(1).getLabel();
// orig 'have'
String thought = spans.get(2).getLabel();
// orig 'had'
String had = spans.get(6).getLabel();
// orig 'examinations'
String were = spans.get(15).getLabel();
assertEquals(the, "the");
assertEquals(CIA, "cia");
assertEquals(thought, "think");
assertEquals(had, "have");
assertEquals(were, "be");
isTested = true;
}
assertTrue(isTested);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class IllinoisLemmatizer method createLemmaView.
/**
* create a Lemma view in the TextAnnotation argument, and return a reference to that View.
*/
public View createLemmaView(TextAnnotation inputTa) throws IOException {
String[] toks = inputTa.getTokens();
TokenLabelView lemmaView = new TokenLabelView(ViewNames.LEMMA, NAME, inputTa, 1.0);
for (int i = 0; i < toks.length; ++i) {
String lemma = getLemma(inputTa, i);
Constituent lemmaConstituent = new Constituent(lemma, ViewNames.LEMMA, inputTa, i, i + 1);
lemmaView.addConstituent(lemmaConstituent);
}
inputTa.addView(ViewNames.LEMMA, lemmaView);
return lemmaView;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class Contains method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent instance) throws EdisonException {
Set<Feature> features = new LinkedHashSet<Feature>();
TextAnnotation ta = instance.getTextAnnotation();
View view = ta.getView(viewName);
List<Constituent> lsc = view.getConstituentsCovering(instance);
if (lsc.size() == 0) {
features.add(N);
return features;
}
boolean contains = false;
for (Constituent c : lsc) if (contained.contains(c.getTokenizedSurfaceForm()) || contained.contains(c.getLabel())) {
contains = true;
break;
}
if (contains)
features.add(Y);
else
features.add(N);
return features;
}
Aggregations