use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class LabeledDepFeatureGenerator method PrefixConj.
private Set<Feature> PrefixConj(int head, int dep, DepInst sent, String deprel) {
String header = "Prefix: ";
String prefixhead = sent.strLemmas[head].substring(0, Math.min(sent.strLemmas[head].length(), 5)) + " ";
String prefixdep = sent.strLemmas[dep].substring(0, Math.min(sent.strLemmas[dep].length(), 5)) + " ";
String poshead = sent.strPos[head] + " ";
String posdep = sent.strPos[dep] + " ";
String arcdir = "Arc-dir: " + (head < dep) + " ";
Set<Feature> feats = new HashSet<>();
feats.add(new DiscreteFeature(header + prefixhead + posdep + arcdir + deprel));
feats.add(new DiscreteFeature(header + poshead + prefixdep + arcdir + deprel));
feats.add(new DiscreteFeature(header + prefixhead + prefixdep + arcdir + deprel));
return feats;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class LabeledDepFeatureGenerator method ChunkConj.
private Set<Feature> ChunkConj(int head, int dep, DepInst sent, String deprel) {
String header = "POSChunk: ";
String chunkhead = sent.strChunk[head] + " ";
String poshead = sent.strPos[head] + " ";
String chunkdep = sent.strChunk[dep] + " ";
String posdep = sent.strPos[dep] + " ";
String arcdir = "Arc-dir: " + (head < dep) + " ";
String arclength = "Arc-length " + (head - dep) + " ";
Set<Feature> feats = new HashSet<>();
feats.add(new DiscreteFeature(header + chunkhead + chunkdep + arcdir + deprel));
feats.add(new DiscreteFeature(header + chunkhead + chunkdep + arclength + deprel));
feats.add(new DiscreteFeature(header + chunkhead + poshead + chunkdep + posdep + arcdir + deprel));
feats.add(new DiscreteFeature(header + chunkhead + poshead + chunkdep + posdep + arclength + deprel));
return feats;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class LabeledDepFeatureGenerator method POSwinConj.
private Set<Feature> POSwinConj(int head, int dep, DepInst sent, String deprel) {
String header = "POSwin: ";
String headleft = "Left: " + (head > 0 ? sent.strPos[head - 1] : "null") + " ";
String headcenter = "Center: " + sent.strPos[head] + " ";
String headright = "Right: " + (head + 1 < sent.strPos.length ? sent.strPos[head + 1] : "null") + " ";
String depleft = "Left: " + (dep > 0 ? sent.strPos[dep - 1] : "null") + " ";
String depcenter = "Center: " + sent.strPos[dep] + " ";
String depright = "Right: " + (dep + 1 < sent.strPos.length ? sent.strPos[dep + 1] : "null") + " ";
String arcdir = "Arc-dir: " + (head < dep) + " ";
String arclength = "Arc-length: " + (head - dep) + " ";
Set<Feature> feats = new HashSet<>();
feats.add(new DiscreteFeature(header + headcenter + depcenter + arcdir + deprel));
feats.add(new DiscreteFeature(header + headcenter + depcenter + arclength + deprel));
feats.add(new DiscreteFeature(header + headleft + headcenter + headright + depleft + depcenter + depright + arcdir + deprel));
return feats;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class POSWindowTwo method getFeatures.
@Override
public /**
* This feature extractor assumes that the TOKEN View, POS View have been
* generated in the Constituents TextAnnotation. It will use its own POS tag of the
* two context words before and after the constituent.
*
**/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
View TOKENS = null, POS = null;
try {
TOKENS = ta.getView(ViewNames.TOKENS);
POS = ta.getView(ViewNames.POS);
} catch (Exception e) {
e.printStackTrace();
}
// We can assume that the constituent in this case is a Word(Token) described by the LBJ
// chunk definition
int startspan = c.getStartSpan();
int endspan = c.getEndSpan();
// All our constituents are words(tokens)
// words two before & after
int k = 2;
String[] tags = getwindowtagskfrom(TOKENS, POS, startspan, endspan, k);
String classifier = "POSWindowTwo";
String __id, __value;
Set<Feature> __result = new LinkedHashSet<Feature>();
for (int i = 0; i < tags.length; i++) {
if (tags[i] == null) {
continue;
} else {
__id = classifier + ":" + i;
__value = "(" + tags[i] + ")";
logger.info(__id + __value);
__result.add(new DiscreteFeature(__id + __value));
}
}
return __result;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class POSandPositionWindowThree method getFeatures.
@Override
public /**
* This feature extractor assumes that the TOKEN View and POS View have been
* generated in the Constituents TextAnnotation. It will use its own POS tag and well as the POS tag
* and the SHALLOW_PARSE (Chunk) labels of the previous two tokens and return it as a discrete feature.
*
**/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier = "POSandPositionWindowThree";
TextAnnotation ta = c.getTextAnnotation();
TOKENS = ta.getView(ViewNames.TOKENS);
POS = ta.getView(ViewNames.POS);
// We can assume that the constituent in this case is a Word(Token) described by the LBJ
// chunk definition
int startspan = c.getStartSpan();
int endspan = c.getEndSpan();
int before = 3;
int after = 3;
// All our constituents are words(tokens)
String[] tags = new String[before + after + 1];
// three words before
int k = -3;
List<Constituent> wordsthreebefore = getwordskfrom(TOKENS, startspan, endspan, k);
int i = 0;
for (Constituent token : wordsthreebefore) {
// Should only be one POS tag for each token
List<String> POS_tag = POS.getLabelsCoveringSpan(token.getStartSpan(), token.getEndSpan());
if (POS_tag.size() != 1) {
logger.warn("Error token has more than one POS tag.");
}
tags[i] = POS_tag.get(0);
i++;
}
tags[i] = POS.getLabelsCoveringSpan(c.getStartSpan(), c.getEndSpan()).get(0);
i++;
// three words after
k = 3;
List<Constituent> wordsthreeafter = getwordskfrom(TOKENS, startspan, endspan, k);
for (Constituent token : wordsthreeafter) {
// Should only be one POS tag for each token
List<String> POS_tag = POS.getLabelsCoveringSpan(token.getStartSpan(), token.getEndSpan());
if (POS_tag.size() != 1) {
logger.info("Error token has more than one POS tag.");
}
tags[i] = POS_tag.get(0);
i++;
}
Set<Feature> __result = new LinkedHashSet<Feature>();
String __id;
String __value;
int contextmax = 3;
for (int j = 0; j < contextmax; j++) {
for (i = 0; i < tags.length; i++) {
StringBuffer f = new StringBuffer();
for (int context = 0; context <= j && i + context < tags.length; context++) {
if (context != 0) {
f.append("_");
}
f.append(tags[i + context]);
}
__id = "" + (i + "_" + j);
__value = "" + (f.toString());
__result.add(new DiscreteFeature(classifier + ":" + __id + "(" + __value + ")"));
}
}
return __result;
}
Aggregations