Search in sources :

Example 1 with Predicate

use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.

the class PredicateG method getQueryString.

/**
	 * Forms a query string from the predicates, terms and individual keywords.
	 * 
	 * @param predicates predicates in the question
	 * @param terms terms in the question
	 * @param kws keywords in the question
	 * @return query string
	 */
public String getQueryString(Predicate[] predicates, Term[] terms, String[] kws) {
    ArrayList<String> phraseL = new ArrayList<String>();
    HashSet<String> normSet = new HashSet<String>();
    // get predicate verbs and arguments
    for (Predicate predicate : predicates) {
        String[] verbArgs = predicate.getVerbArgs();
        for (String verbArg : verbArgs) {
            String[] parts = verbArg.split("\t");
            for (String part : parts) if (// no words in IGNORE
            !part.matches("(?i)" + IGNORE) && // no function words
            !FunctionWords.lookup(part) && normSet.add(StringUtils.normalize(part))) {
                // drop quotation marks
                String noQuotes = part.replace("\"", "");
                // add quotation marks for compound phrases
                if (noQuotes.matches(".*?\\s.*+"))
                    noQuotes = "\"" + noQuotes + "\"";
                String phrase = noQuotes;
                //						// append expansions
                //						Map<String, Double> expMap =
                //							TermExpander.expandPhrase(part, terms);
                //						if (expMap.size() > 0) {
                //							String[] expansions =
                //								expMap.keySet().toArray(new String[expMap.size()]);
                //							phrase = "(" + phrase;
                //							for (String expansion : expansions) {
                //								// drop quotation marks
                //								expansion = expansion.replace("\"", "");
                //								// add quotation marks for compound phrases
                //								if (expansion.matches(".*?\\s.*+"))
                //									expansion = "\"" + expansion + "\"";
                //								
                //								phrase += " OR " + expansion;
                //							}
                //							phrase += ")";
                //						}
                phraseL.add(phrase);
            }
        }
    }
    // get terms
    //		for (Term term : terms) {
    //			String text = term.getText();
    //			if (normSet.add(StringUtils.normalize(text))) {
    //				// add quotation marks for compound phrases
    //				if (text.matches(".*?\\s.*+"))
    //					text = "\"" + text + "\"";
    //				
    //				String phrase = text;
    //				
    //				// append expansions
    //				Map<String, Double> expMap = term.getExpansions();
    //				expMap = TermExpander.reduceExpansionsQuery(expMap, true);
    //				if (expMap != null && expMap.size() > 0) {
    //					String[] expansions =
    //						expMap.keySet().toArray(new String[expMap.size()]);
    //					phrase = "(" + phrase;
    //					for (String expansion : expansions) {
    //						// add quotation marks for compound phrases
    //						if (expansion.matches(".*?\\s.*+"))
    //							expansion = "\"" + expansion + "\"";
    //						
    //						phrase += " OR " + expansion;
    //					}
    //					phrase += ")";
    //				}
    //				
    //				phraseL.add(phrase);
    //			}
    //		}
    // get individual keywords
    // - expand keywords (not supported by Web search engines!)
    //		for (Term term : terms) {
    //			String phrase;
    //			Map<String, Double> expMap = term.getExpansions();
    //			expMap = TermExpander.reduceExpansionsQuery(expMap, true);
    //			boolean newKeyword = false;  // term/expansion contains new keyword?
    //			
    //			if (expMap.size() == 0) {
    //				String[] keywords =
    //					KeywordExtractor.getKeywords(term.getText());
    //				List<String> uniqueL = new ArrayList<String>();
    //				for (String keyword : keywords)
    //					if (normSet.add(StringUtils.normalize(keyword)))
    //						uniqueL.add(keyword);
    //				String[] unique = uniqueL.toArray(new String[uniqueL.size()]);
    //				phrase = StringUtils.concatWithSpaces(unique);
    //				if (unique.length > 0) newKeyword = true;
    //			} else {
    //				// form AND query from keywords in term
    //				String[] keywords =
    //					KeywordExtractor.getKeywords(term.getText());
    //				String and = StringUtils.concat(keywords, " AND ");
    //				if (keywords.length > 1)
    //					and = "(" + and + ")";
    //				for (String keyword : keywords)
    //					if (normSet.add(StringUtils.normalize(keyword)))
    //						newKeyword = true;
    //				
    //				phrase = and;
    //				
    //				// append expansions
    //				if (expMap != null && expMap.size() > 0) {
    //					String[] expansions =
    //						expMap.keySet().toArray(new String[expMap.size()]);
    //					phrase = "(" + phrase;
    //					for (String expansion : expansions) {
    //						// form AND query from keywords in expansion
    //						keywords = KeywordExtractor.getKeywords(expansion);
    //						and = StringUtils.concat(keywords, " AND ");
    //						if (keywords.length > 1)
    //							and = "(" + and + ")";
    //						for (String keyword : keywords)
    //							if (normSet.add(StringUtils.normalize(keyword)))
    //								newKeyword = true;
    //						
    //						phrase += " OR " + and;
    //					}
    //					phrase += ")";
    //				}
    //			}
    //			
    //			// add phrase to the query if the term or one of its expansions has
    //			// multiple tokens and thus the keyword query is different from the
    //			// term query
    //			if (newKeyword) phraseL.add(phrase);
    //		}
    // - do not expand keywords
    //		for (String kw : kws)
    //			if (normSet.add(StringUtils.normalize(kw)))
    //				phraseL.add(kw);
    // build query string
    String[] phrases = phraseL.toArray(new String[phraseL.size()]);
    String queryString = StringUtils.concatWithSpaces(phrases);
    // include context keywords in the query string
    for (String kw : kws) if (!StringUtils.equalsCommonNorm(queryString, kw))
        queryString += " " + kw;
    return queryString;
}
Also used : ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 2 with Predicate

use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.

the class PredicateG method generateQueries.

/**
	 * Generates queries from predicate-argument structures extracted from the
	 * question string.
	 * 
	 * @param aq analyzed question
	 * @return <code>Query</code> objects
	 */
public Query[] generateQueries(AnalyzedQuestion aq) {
    // only generate a query if predicates could be extracted
    Predicate[] ps = aq.getPredicates();
    if (ps.length == 0)
        return new Query[0];
    // create query string
    Term[] terms = aq.getTerms();
    String[] kws = aq.getKeywords();
    String queryString = getQueryString(ps, terms, kws);
    // create query, set answer types and predicates
    Query[] queries = new Query[1];
    queries[0] = new Query(queryString, aq, SCORE);
    queries[0].setExtractionTechniques(EXTRACTION_TECHNIQUES);
    return queries;
}
Also used : Query(info.ephyra.querygeneration.Query) Term(info.ephyra.questionanalysis.Term) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 3 with Predicate

use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.

the class PredicateExtractor method getPredicates.

/**
	 * Extracts the predicates from a question string.
	 * 
	 * @param qn normalized question string
	 * @param verbMod question string with modified verbs
	 * @param ats expected answer types
	 * @param terms question terms
	 * @return predicate-argument structures
	 */
public static Predicate[] getPredicates(String qn, String verbMod, String[] ats, Term[] terms) {
    // check if question contains a predicate
    if (!containsPredicate(qn))
        return new Predicate[0];
    // transform question into statement
    String statement = questionToStatement(qn, verbMod, ats);
    // annotate and extract predicates
    String[][] ass = ASSERT.annotatePredicates(new String[] { statement });
    String[] as = (ass.length > 0) ? ass[0] : new String[0];
    List<Predicate> predicates = new ArrayList<Predicate>();
    for (int i = 0; i < as.length; i++) {
        // build predicate
        Predicate predicate = null;
        try {
            predicate = new Predicate(statement, as[i], terms);
        } catch (ParseException e) {
            //				System.exit(1);
            continue;
        }
        predicates.add(predicate);
    }
    // drop placeholders
    boolean missingArgs = false;
    for (Predicate p : predicates) {
        if (p.dropArgs(PERSON_R) | p.dropArgs(THING_R) | p.dropArgs(DATE_TIME_R) | p.dropArgs(DURATION_R) | p.dropArgs(LOCATION_R) | p.dropArgs(PURPOSE_R) | p.dropArgs(MANNER_R) | p.dropArgs(QUANTIFICATION_R) | p.dropArgs(UNKNOWN_R))
            missingArgs = true;
    }
    // (else the answer extraction does not work)
    return (missingArgs) ? predicates.toArray(new Predicate[predicates.size()]) : new Predicate[0];
}
Also used : ArrayList(java.util.ArrayList) ParseException(java.text.ParseException) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 4 with Predicate

use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.

the class BagOfTermsG method generateQueries.

/**
	 * Generates a "bag of terms" query from the terms in the question string.
	 * 
	 * @param aq analyzed question
	 * @return <code>Query</code> objects
	 */
public Query[] generateQueries(AnalyzedQuestion aq) {
    // only generate a query if the answer type is known, predicates could
    // be extracted or the question is not a factoid question
    String[] ats = aq.getAnswerTypes();
    Predicate[] ps = aq.getPredicates();
    if (ats.length == 0 && ps.length == 0 && aq.isFactoid())
        return new Query[0];
    // create query string
    Term[] terms = aq.getTerms();
    String[] kws = aq.getKeywords();
    String queryString = getQueryString(terms, kws);
    // create query, set answer types
    Query[] queries = new Query[1];
    queries[0] = new Query(queryString, aq, SCORE);
    queries[0].setExtractionTechniques(EXTRACTION_TECHNIQUES);
    return queries;
}
Also used : Query(info.ephyra.querygeneration.Query) Term(info.ephyra.questionanalysis.Term) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 5 with Predicate

use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.

the class WebDocumentFetcher method apply.

/**
	 * Fetches the top <code>MAX_DOCS</code> documents containing the given
	 * search engine snippets. The original snippets are dropped.
	 * 
	 * @param results array of <code>Result</code> objects containing snippets
	 * @return array of <code>Result</code> objects containing entire documents
	 */
public Result[] apply(Result[] results) {
    // documents containing the search engine snippets
    docs = new ArrayList<Result>();
    // start document fetchers
    HashSet<String> urls = new HashSet<String>();
    for (Result result : results) {
        // only apply this filter to results for the semantic parsing
        // approach
        Query query = result.getQuery();
        Predicate[] ps = query.getAnalyzedQuestion().getPredicates();
        if (!query.extractWith(FactoidsFromPredicatesFilter.ID) || ps.length == 0 || result.getScore() > Float.NEGATIVE_INFINITY)
            continue;
        // if result is not a web document then just make a copy
        if (!result.getDocID().contains(":")) {
            Result newResult = result.getCopy();
            newResult.setScore(0);
            docs.add(newResult);
            continue;
        }
        // fetch at most MAX_DOCS documents
        if (urls.size() >= MAX_DOCS)
            break;
        String url = result.getDocID();
        // no forbidden document type
        if (url.matches("(?i).*?" + FORBIDDEN_DOCS))
            continue;
        // only HTTP connections
        try {
            URLConnection conn = (new URL(url)).openConnection();
            if (!(conn instanceof HttpURLConnection))
                continue;
        } catch (IOException e) {
            continue;
        }
        // no duplicate document
        if (!urls.add(url))
            continue;
        // if caching is enabled, try to read document from cache
        if (CACHING) {
            FileCache cache = new FileCache(CACHE_DIR);
            String[] entries = cache.read(url);
            if (entries != null) {
                StringBuilder sb = new StringBuilder();
                for (String entry : entries) {
                    sb.append(entry);
                    sb.append("\n");
                }
                String docText = sb.toString();
                Result doc = new Result(docText, result.getQuery(), url, result.getHitPos());
                doc.setScore(0);
                docs.add(doc);
                continue;
            }
        }
        (new WebDocumentFetcher()).start(this, result);
    }
    // wait until all fetchers are done
    waitForDocs();
    // keep old results
    Result[] newResults = docs.toArray(new Result[docs.size()]);
    Result[] allResults = new Result[results.length + newResults.length];
    for (int i = 0; i < results.length; i++) allResults[i] = results[i];
    for (int i = 0; i < newResults.length; i++) allResults[results.length + i] = newResults[i];
    return allResults;
}
Also used : Query(info.ephyra.querygeneration.Query) IOException(java.io.IOException) HttpURLConnection(java.net.HttpURLConnection) URLConnection(java.net.URLConnection) URL(java.net.URL) Result(info.ephyra.search.Result) Predicate(info.ephyra.nlp.semantics.Predicate) FileCache(info.ephyra.util.FileCache) HttpURLConnection(java.net.HttpURLConnection) HashSet(java.util.HashSet)

Aggregations

Predicate (info.ephyra.nlp.semantics.Predicate)12 Query (info.ephyra.querygeneration.Query)6 Term (info.ephyra.questionanalysis.Term)6 ArrayList (java.util.ArrayList)4 Result (info.ephyra.search.Result)3 HashSet (java.util.HashSet)3 IOException (java.io.IOException)2 ParseException (java.text.ParseException)2 Hashtable (java.util.Hashtable)2 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)1 Dictionary (info.ephyra.util.Dictionary)1 FileCache (info.ephyra.util.FileCache)1 FileOutputStream (java.io.FileOutputStream)1 PrintWriter (java.io.PrintWriter)1 HttpURLConnection (java.net.HttpURLConnection)1 URL (java.net.URL)1 URLConnection (java.net.URLConnection)1