Search in sources :

Example 1 with Feature

use of edu.cmu.minorthird.classify.Feature in project lucida by claritylab.

the class EnglishFeatureExtractor method addSyntacticFeatures.

private static void addSyntacticFeatures(MutableInstance instance, List<Term> terms, String parseTree, Term focusTerm) {
    if (parseTree == null) {
        log.error("Syntactic parse of the question is null.");
        return;
    }
    Tree tree = TreeHelper.buildTree(parseTree, Tree.ENGLISH);
    // MAIN_VERB
    TreeHelper.markHeadNode(tree);
    String mainVerb = tree.getHeadWord();
    //mainVerb = WordnetInterface.getLemma("VERB",mainVerb);
    try {
        IndexWord word = Dictionary.getInstance().lookupIndexWord(POS.VERB, mainVerb);
        String lemma = null;
        if (word != null)
            lemma = word.getLemma();
        if (lemma != null)
            mainVerb = lemma;
    } catch (Exception e) {
        log.warn("Failed to get lemma for verb '" + mainVerb + "'", e);
    }
    if (mainVerb == null)
        mainVerb = "-";
    instance.addBinary(new Feature("MAIN_VERB" + "." + mainVerb));
    // WH_DET
    if (focusTerm != null && focusTerm.getText() != null) {
        String focus = focusTerm.getText();
        String question = "";
        for (Term term : terms) question += term.getText() + " ";
        question = question.trim();
        for (String ptrn : whPtrns) {
            Matcher m = Pattern.compile(ptrn + SPACE_PTRN + focus + REST_PTRN).matcher(question);
            if (m.matches()) {
                instance.addBinary(new Feature("WH_DET" + ".+"));
                break;
            }
        }
    }
    // FOCUS_ADJ
    Tree focusNode = TreeHelper.findFirstPreterminalWithPrecedingPreterminal(tree, "RB|JJ", "WRB");
    if (focusNode != null)
        instance.addBinary(new Feature("FOCUS_ADJ" + "." + focusNode.getHeadWord()));
}
Also used : Matcher(java.util.regex.Matcher) Tree(edu.cmu.lti.chineseNLP.util.Tree) Term(edu.cmu.lti.javelin.qa.Term) IndexWord(net.didion.jwnl.data.IndexWord) Feature(edu.cmu.minorthird.classify.Feature)

Example 2 with Feature

use of edu.cmu.minorthird.classify.Feature in project lucida by claritylab.

the class EnglishFeatureExtractor method addSemanticFeatures.

private static void addSemanticFeatures(MutableInstance instance, Term focusTerm) {
    // FOCUS_TYPE
    String focusType = WordNetAnswerTypeMapping.getAnswerType(focusTerm);
    if (focusType == null)
        focusType = "-";
    instance.addBinary(new Feature("FOCUS_TYPE" + "." + focusType));
    return;
}
Also used : Feature(edu.cmu.minorthird.classify.Feature)

Example 3 with Feature

use of edu.cmu.minorthird.classify.Feature in project lucida by claritylab.

the class EnglishFeatureExtractor method addWordLevelFeatures.

private static void addWordLevelFeatures(MutableInstance instance, List<Term> terms, Term focus) {
    String[] words = new String[terms.size()];
    for (int i = 0; i < terms.size(); i++) {
        Term term = terms.get(i);
        if (term.getText() != null)
            words[i] = term.getText().replaceAll("\\s+", "_");
        else
            words[i] = "-";
    }
    // UNIGRAM
    for (int i = 0; i < words.length; i++) {
        instance.addBinary(new Feature("UNIGRAM" + "." + words[i]));
    }
    // BIGRAM
    for (int i = 0; i < words.length - 1; i++) {
        instance.addBinary(new Feature("BIGRAM" + "." + words[i] + "-" + words[i + 1]));
    }
    // WH_WORD
    String question = "";
    for (Term term : terms) question += term.getText() + " ";
    question = question.trim();
    String whWord = null;
    // first look at sentence beginning
    for (String ptrn : whPtrns) {
        Matcher m = Pattern.compile("^" + ptrn + REST_PTRN).matcher(question);
        if (m.matches()) {
            whWord = m.group(1).toLowerCase().replaceAll("\\s+", "_");
            instance.addBinary(new Feature("WH_WORD" + "." + whWord));
            break;
        }
    }
    if (whWord == null) {
        // then look anywhere in the sentence
        for (String ptrn : whPtrns) {
            Matcher m = Pattern.compile(ptrn + REST_PTRN).matcher(question);
            if (m.find()) {
                whWord = m.group(1).toLowerCase().replaceAll("\\s+", "_");
                instance.addBinary(new Feature("WH_WORD" + "." + whWord));
                break;
            }
        }
    }
    // OF_HEAD
    if (focus == null)
        return;
    for (String word : OF_HEAD_WORDS) {
        Matcher m = Pattern.compile(word + "s? of " + focus.getText()).matcher(question);
        if (m.find()) {
            instance.addBinary(new Feature("OF_HEAD" + "." + word));
            break;
        }
    }
}
Also used : Matcher(java.util.regex.Matcher) Term(edu.cmu.lti.javelin.qa.Term) Feature(edu.cmu.minorthird.classify.Feature)

Example 4 with Feature

use of edu.cmu.minorthird.classify.Feature in project lucida by claritylab.

the class FeatureExtractor method printFeaturesFromQuestions.

/**
     * Prints the features generated for each example in an input file.  If feature
     * types are included as command-line arguments, only those types are printed. 
     * Otherwise, all features are printed.
     * 
     * @param questionSetFileName the name of the file containing the dataset to load
     * @param features a List of the features to print
     */
public void printFeaturesFromQuestions(String questionSetFileName, List<String> features) {
    String questions = IOUtil.readFile(questionSetFileName);
    for (String question : questions.split("[\\n\\r\\f]")) {
        Instance instance = createInstance(question);
        StringBuilder sb = new StringBuilder();
        if (features.size() > 0) {
            for (Iterator it = instance.binaryFeatureIterator(); it.hasNext(); ) {
                Feature feat = (Feature) it.next();
                String name = "";
                for (String s : feat.getName()) name += "." + s;
                name = name.replaceFirst(".", "");
                if (features.contains(feat.getName()[0]))
                    sb.append(name + "  ");
            }
            System.out.println(sb.toString() + " " + question);
        } else
            System.out.println(instance + " " + question);
    }
}
Also used : Instance(edu.cmu.minorthird.classify.Instance) Iterator(java.util.Iterator) Feature(edu.cmu.minorthird.classify.Feature)

Example 5 with Feature

use of edu.cmu.minorthird.classify.Feature in project lucida by claritylab.

the class ScoreNormalizationFilter method addAnswerTypeFeatures.

/**
	 * Adds the answer types of the question as features to the instance.
	 */
private static void addAnswerTypeFeatures(MutableInstance instance, Result result) {
    String[] answerTypes = result.getQuery().getAnalyzedQuestion().getAnswerTypes();
    for (String answerType : answerTypes) {
        Feature feature = new Feature(answerType.split("->"));
        instance.addBinary(feature);
    }
}
Also used : Feature(edu.cmu.minorthird.classify.Feature)

Aggregations

Feature (edu.cmu.minorthird.classify.Feature)15 Result (info.ephyra.search.Result)4 Iterator (java.util.Iterator)3 Term (edu.cmu.lti.javelin.qa.Term)2 Example (edu.cmu.minorthird.classify.Example)2 MutableInstance (edu.cmu.minorthird.classify.MutableInstance)2 Matcher (java.util.regex.Matcher)2 Tree (edu.cmu.lti.chineseNLP.util.Tree)1 BasicDataset (edu.cmu.minorthird.classify.BasicDataset)1 Dataset (edu.cmu.minorthird.classify.Dataset)1 Instance (edu.cmu.minorthird.classify.Instance)1 CrossValidatedDataset (edu.cmu.minorthird.classify.experiments.CrossValidatedDataset)1 StringReader (java.io.StringReader)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)1 IndexWord (net.didion.jwnl.data.IndexWord)1 Document (org.w3c.dom.Document)1 InputSource (org.xml.sax.InputSource)1