use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class ParseLabelIdentifier method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView parse = (TreeView) ta.getView(parseViewName);
String l;
try {
l = parse.getParsePhrase(c).getLabel();
} catch (Exception e) {
throw new EdisonException(e);
}
boolean found = isLabelValid(l);
Set<Feature> features = new LinkedHashSet<>();
if (found) {
features.add(DiscreteFeature.create(label));
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class SyntacticFrame method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
Set<Feature> features = new LinkedHashSet<>();
List<Relation> incomingRelations = c.getIncomingRelations();
if (incomingRelations.size() > 0) {
Constituent pred = incomingRelations.get(0).getSource();
TextAnnotation ta = c.getTextAnnotation();
TreeView parse = (TreeView) ta.getView(parseViewName);
Constituent predicate, arg;
try {
predicate = parse.getParsePhrase(pred);
arg = parse.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
Constituent vp = TreeView.getParent(predicate);
// go over VP's siblings before it
StringBuffer sb1 = new StringBuffer();
StringBuffer sb2 = new StringBuffer();
StringBuffer sb3 = new StringBuffer();
if (!TreeView.isRoot(vp)) {
Constituent vpParent = TreeView.getParent(vp);
for (int i = 0; i < vpParent.getOutgoingRelations().size(); i++) {
Constituent target = vpParent.getOutgoingRelations().get(i).getTarget();
if (target == vp)
break;
addToFeature(target, arg, sb1, sb2, sb3);
}
}
for (int i = 0; i < vp.getOutgoingRelations().size(); i++) {
Constituent target = vp.getOutgoingRelations().get(i).getTarget();
if (target.getSpan().equals(predicate.getSpan())) {
sb1.append("v-");
sb2.append("v-");
sb3.append(WordHelpers.getLemma(ta, target.getStartSpan())).append("-");
} else {
addToFeature(target, arg, sb1, sb2, sb3);
}
}
features.add(DiscreteFeature.create(sb1.toString()));
features.add(DiscreteFeature.create("general:" + sb2.toString()));
features.add(DiscreteFeature.create("lemma:" + sb3.toString()));
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class MixedChunkWindowTwoBeforePOSWindowThreeBefore method getFeatures.
@Override
public /**
* This feature extractor assumes that the TOKEN View, POS View and the SHALLOW_PARSE View have been
* generated in the Constituents TextAnnotation. It will use its own POS tag and well as the POS tag
* and the SHALLOW_PARSE (Chunk) labels of the previous two tokens and return it as a discrete feature.
*
**/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
try {
TOKENS = ta.getView(ViewNames.TOKENS);
POS = ta.getView(ViewNames.POS);
SHALLOW_PARSE = ta.getView(ViewNames.SHALLOW_PARSE);
} catch (Exception e) {
e.printStackTrace();
}
// We can assume that the constituent in this case is a Word(Token) described by the LBJ
// chunk definition
int startspan = c.getStartSpan();
int endspan = c.getEndSpan();
// All our constituents are words(tokens)
// words two before
int k = -2;
List<Constituent> wordstwobefore = getwordskfrom(TOKENS, startspan, endspan, k);
if (wordstwobefore.size() != 2)
return null;
String[] tags = new String[3];
String[] labels = new String[2];
int i = 0;
for (Constituent token : wordstwobefore) {
// Should only be one POS tag for each token
List<String> POS_tag = POS.getLabelsCoveringSpan(token.getStartSpan(), token.getEndSpan());
List<String> Chunk_label = SHALLOW_PARSE.getLabelsCoveringSpan(token.getStartSpan(), token.getEndSpan());
if (POS_tag.size() != 1 || Chunk_label.size() != 1) {
logger.warn("Error token has more than one POS tag or Chunk Label.");
}
labels[i] = Chunk_label.get(0);
tags[i] = POS_tag.get(0);
i++;
}
tags[i] = POS.getLabelsCoveringSpan(startspan, endspan).get(0);
Set<Feature> __result = new LinkedHashSet<Feature>();
String classifier = "MixedChunkWindowTwoBeforePOSWindowThreeBefore";
String __id = classifier + ":" + "ll";
String __value = "(" + (labels[0] + "_" + labels[1]) + ")";
/*
* BufferedWriter output = null; try { File file = new
* File("/home/pvijaya2/feat-output.txt");
*
* if(!file.exists()){ file.createNewFile(); }
*
* FileWriter fw = new FileWriter(file,true);
*
* //BufferedWriter writer give better performance BufferedWriter bw = new
* BufferedWriter(fw);
*/
logger.info(__id + __value);
__result.add(new DiscreteFeature(__id + __value));
__id = classifier + ":" + "lt1";
__value = "(" + (labels[0] + "_" + tags[1]) + ")";
logger.info(__id + __value);
__result.add(new DiscreteFeature(__id + __value));
__id = classifier + ":" + "lt2";
__value = "" + (labels[1] + "_" + tags[2]);
logger.info(__id + __value);
__result.add(new DiscreteFeature(__id + __value));
return __result;
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class CurrencyIndicator method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
try {
if (!loaded)
synchronized (this) {
// now its changed to be loaded from datastore.
if (!loaded)
loadCurrency(gzip, true);
}
} catch (Exception ex) {
throw new EdisonException(ex);
}
TextAnnotation ta = c.getTextAnnotation();
if (!ta.hasView(VIEW_NAME)) {
try {
addCurrencyView(ta);
} catch (Exception e) {
e.printStackTrace();
}
}
SpanLabelView view = (SpanLabelView) ta.getView(VIEW_NAME);
Set<Feature> features = new LinkedHashSet<>();
for (Constituent cc : view.where(Queries.containedInConstituent(c))) {
if (cc.getEndSpan() == c.getEndSpan()) {
if (cc.getStartSpan() - 1 > c.getEndSpan()) {
// check if this is a number
if (WordLists.NUMBERS.contains(ta.getToken(cc.getStartSpan() - 1).toLowerCase())) {
features.add(CURRENCY);
break;
}
}
} else if (WordFeatureExtractorFactory.numberNormalizer.getWordFeatures(ta, cc.getEndSpan()).size() > 0) {
features.add(CURRENCY);
break;
}
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class TestBrownClusterFeatureExtractor method test.
@Test
public final void test() {
int[] prefixLengths = new int[] { 4, 6, 10, 20 };
BrownClusterFeatureExtractor bcfex1 = BrownClusterFeatureExtractor.instance1000;
BrownClusterFeatureExtractor bcfex2 = null;
try {
bcfex2 = new BrownClusterFeatureExtractor("bllip", "brownBllipClusters", prefixLengths);
} catch (EdisonException e) {
e.printStackTrace();
fail(e.getMessage());
}
BrownClusterFeatureExtractor bcfex3 = null;
try {
bcfex3 = new BrownClusterFeatureExtractor("wiki", "brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt", prefixLengths);
} catch (EdisonException e) {
e.printStackTrace();
fail(e.getMessage());
}
TokenizerTextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = taBldr.createTextAnnotation("test", "test", "This test sentence has Joynt and Lieberknecht and Fibonnaci in it " + "just to exercise possible brown cluster hits in resources used by NER.");
Set<Feature> feats = new HashSet<>();
for (int wordIndex = 0; wordIndex < ta.size(); ++wordIndex) try {
feats.addAll(bcfex1.getWordFeatures(ta, wordIndex));
feats.addAll(bcfex2.getWordFeatures(ta, wordIndex));
feats.addAll(bcfex3.getWordFeatures(ta, wordIndex));
} catch (EdisonException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(ta.hasView(ViewNames.BROWN_CLUSTERS + "_wiki"));
String[] featArray = new String[feats.size()];
int i = 0;
for (Feature f : feats) featArray[i++] = f.toString();
Arrays.sort(featArray);
String actualOutput = StringUtils.join(",", featArray);
assertEquals(expectedOutput, actualOutput);
}
Aggregations