use of edu.cmu.minorthird.classify.Feature in project lucida by claritylab.
the class EnglishFeatureExtractor method addSyntacticFeatures.
private static void addSyntacticFeatures(MutableInstance instance, List<Term> terms, String parseTree, Term focusTerm) {
if (parseTree == null) {
log.error("Syntactic parse of the question is null.");
return;
}
Tree tree = TreeHelper.buildTree(parseTree, Tree.ENGLISH);
// MAIN_VERB
TreeHelper.markHeadNode(tree);
String mainVerb = tree.getHeadWord();
//mainVerb = WordnetInterface.getLemma("VERB",mainVerb);
try {
IndexWord word = Dictionary.getInstance().lookupIndexWord(POS.VERB, mainVerb);
String lemma = null;
if (word != null)
lemma = word.getLemma();
if (lemma != null)
mainVerb = lemma;
} catch (Exception e) {
log.warn("Failed to get lemma for verb '" + mainVerb + "'", e);
}
if (mainVerb == null)
mainVerb = "-";
instance.addBinary(new Feature("MAIN_VERB" + "." + mainVerb));
// WH_DET
if (focusTerm != null && focusTerm.getText() != null) {
String focus = focusTerm.getText();
String question = "";
for (Term term : terms) question += term.getText() + " ";
question = question.trim();
for (String ptrn : whPtrns) {
Matcher m = Pattern.compile(ptrn + SPACE_PTRN + focus + REST_PTRN).matcher(question);
if (m.matches()) {
instance.addBinary(new Feature("WH_DET" + ".+"));
break;
}
}
}
// FOCUS_ADJ
Tree focusNode = TreeHelper.findFirstPreterminalWithPrecedingPreterminal(tree, "RB|JJ", "WRB");
if (focusNode != null)
instance.addBinary(new Feature("FOCUS_ADJ" + "." + focusNode.getHeadWord()));
}
use of edu.cmu.minorthird.classify.Feature in project lucida by claritylab.
the class EnglishFeatureExtractor method addSemanticFeatures.
private static void addSemanticFeatures(MutableInstance instance, Term focusTerm) {
// FOCUS_TYPE
String focusType = WordNetAnswerTypeMapping.getAnswerType(focusTerm);
if (focusType == null)
focusType = "-";
instance.addBinary(new Feature("FOCUS_TYPE" + "." + focusType));
return;
}
use of edu.cmu.minorthird.classify.Feature in project lucida by claritylab.
the class EnglishFeatureExtractor method addWordLevelFeatures.
private static void addWordLevelFeatures(MutableInstance instance, List<Term> terms, Term focus) {
String[] words = new String[terms.size()];
for (int i = 0; i < terms.size(); i++) {
Term term = terms.get(i);
if (term.getText() != null)
words[i] = term.getText().replaceAll("\\s+", "_");
else
words[i] = "-";
}
// UNIGRAM
for (int i = 0; i < words.length; i++) {
instance.addBinary(new Feature("UNIGRAM" + "." + words[i]));
}
// BIGRAM
for (int i = 0; i < words.length - 1; i++) {
instance.addBinary(new Feature("BIGRAM" + "." + words[i] + "-" + words[i + 1]));
}
// WH_WORD
String question = "";
for (Term term : terms) question += term.getText() + " ";
question = question.trim();
String whWord = null;
// first look at sentence beginning
for (String ptrn : whPtrns) {
Matcher m = Pattern.compile("^" + ptrn + REST_PTRN).matcher(question);
if (m.matches()) {
whWord = m.group(1).toLowerCase().replaceAll("\\s+", "_");
instance.addBinary(new Feature("WH_WORD" + "." + whWord));
break;
}
}
if (whWord == null) {
// then look anywhere in the sentence
for (String ptrn : whPtrns) {
Matcher m = Pattern.compile(ptrn + REST_PTRN).matcher(question);
if (m.find()) {
whWord = m.group(1).toLowerCase().replaceAll("\\s+", "_");
instance.addBinary(new Feature("WH_WORD" + "." + whWord));
break;
}
}
}
// OF_HEAD
if (focus == null)
return;
for (String word : OF_HEAD_WORDS) {
Matcher m = Pattern.compile(word + "s? of " + focus.getText()).matcher(question);
if (m.find()) {
instance.addBinary(new Feature("OF_HEAD" + "." + word));
break;
}
}
}
use of edu.cmu.minorthird.classify.Feature in project lucida by claritylab.
the class FeatureExtractor method printFeaturesFromQuestions.
/**
* Prints the features generated for each example in an input file. If feature
* types are included as command-line arguments, only those types are printed.
* Otherwise, all features are printed.
*
* @param questionSetFileName the name of the file containing the dataset to load
* @param features a List of the features to print
*/
public void printFeaturesFromQuestions(String questionSetFileName, List<String> features) {
String questions = IOUtil.readFile(questionSetFileName);
for (String question : questions.split("[\\n\\r\\f]")) {
Instance instance = createInstance(question);
StringBuilder sb = new StringBuilder();
if (features.size() > 0) {
for (Iterator it = instance.binaryFeatureIterator(); it.hasNext(); ) {
Feature feat = (Feature) it.next();
String name = "";
for (String s : feat.getName()) name += "." + s;
name = name.replaceFirst(".", "");
if (features.contains(feat.getName()[0]))
sb.append(name + " ");
}
System.out.println(sb.toString() + " " + question);
} else
System.out.println(instance + " " + question);
}
}
use of edu.cmu.minorthird.classify.Feature in project lucida by claritylab.
the class ScoreNormalizationFilter method addAnswerTypeFeatures.
/**
* Adds the answer types of the question as features to the instance.
*/
private static void addAnswerTypeFeatures(MutableInstance instance, Result result) {
String[] answerTypes = result.getQuery().getAnalyzedQuestion().getAnswerTypes();
for (String answerType : answerTypes) {
Feature feature = new Feature(answerType.split("->"));
instance.addBinary(feature);
}
}
Aggregations