Search in sources :

Example 16 with Query

use of info.ephyra.querygeneration.Query in project lucida by claritylab.

the class Logger method logQueryStrings.

/**
	 * Logs the query strings.
	 * 
	 * @param queries the queries
	 * @return true, iff logging was successful
	 */
public static boolean logQueryStrings(Query[] queries) {
    // logging is disabled or log file is not specified
    if (!enabled || logfile == null)
        return false;
    try {
        PrintWriter out = new PrintWriter(new FileOutputStream(logfile, true));
        for (Query query : queries) {
            out.println("\t<querystring>");
            out.println("\t\t" + query.getQueryString());
            out.println("\t</querystring>");
        }
        out.close();
    } catch (IOException e) {
        return false;
    }
    return true;
}
Also used : Query(info.ephyra.querygeneration.Query) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter)

Example 17 with Query

use of info.ephyra.querygeneration.Query in project lucida by claritylab.

the class EphyraTREC13To16 method askOther.

// Layout 2
//	/**
//	 * Initializes the pipeline for 'other' questions.
//	 */
//	protected void initOther() {
//		// query generation
//		QueryGeneration.clearQueryGenerators();
//		
//		// search
//		// - knowledge miners for unstructured knowledge sources
//		Search.clearKnowledgeMiners();
//		for (String[] indriIndices : IndriKM.getIndriIndices())
//			Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
//		for (String[] indriServers : IndriKM.getIndriServers())
//			Search.addKnowledgeMiner(new IndriKM(indriServers, true));
//		// - knowledge annotators for (semi-)structured knowledge sources
//		Search.clearKnowledgeAnnotators();
//		
//		// answer extraction and selection
//		// (the filters are applied in this order)
//		AnswerSelection.clearFilters();
//		
//		//	initialize scores
//		AnswerSelection.addFilter(new ScoreResetterFilter());
//		
//		//	extract sentences from snippets
//		AnswerSelection.addFilter(new SentenceExtractionFilter());
//		
//		//	cut meaningless introductions from sentences
//		AnswerSelection.addFilter(new CutKeywordsFilter());
//		AnswerSelection.addFilter(new CutStatementProviderFilter());
//		AnswerSelection.addFilter(new SentenceSplitterFilter());
//		AnswerSelection.addFilter(new CutKeywordsFilter());
//		
//		//	remove duplicates
//		AnswerSelection.addFilter(new DuplicateSnippetFilter());
//		
//		//	throw out enumerations of proper names
//		AnswerSelection.addFilter(new ProperNameFilter());
//		
//		//	throw out direct speech snippets, rarely contain useful information
//		AnswerSelection.addFilter(new DirectSpeechFilter());
//		
//		AnswerSelection.addFilter(
//				new WikipediaGoogleWebTermImportanceFilter(
//					WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
//					WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
//					false
//				)
//			);
//		AnswerSelection.addFilter(new ScoreSorterFilter());
//		
//		//	cut off result
//		AnswerSelection.addFilter(new ResultLengthFilter(3000));
//	}
// Layout 3
//	/**
//	 * Initializes the pipeline for 'other' questions.
//	 */
//	protected void initOther() {
//		// query generation
//		QueryGeneration.clearQueryGenerators();
//		
//		// search
//		// - knowledge miners for unstructured knowledge sources
//		Search.clearKnowledgeMiners();
//		for (String[] indriIndices : IndriKM.getIndriIndices())
//			Search.addKnowledgeMiner(new IndriDocumentKM(indriIndices, false));
//		for (String[] indriServers : IndriKM.getIndriServers())
//			Search.addKnowledgeMiner(new IndriDocumentKM(indriServers, true));
//		// - knowledge annotators for (semi-)structured knowledge sources
//		Search.clearKnowledgeAnnotators();
//		
//		// answer extraction and selection
//		// (the filters are applied in this order)
//		AnswerSelection.clearFilters();
//		
//		//	initialize scores
//		AnswerSelection.addFilter(new ScoreResetterFilter());
//		
//		//	extract sentences from snippets
//		AnswerSelection.addFilter(new SentenceExtractionFilter());
//		
//		//	cut meaningless introductions from sentences
//		AnswerSelection.addFilter(new CutKeywordsFilter());
//		AnswerSelection.addFilter(new CutStatementProviderFilter());
//		AnswerSelection.addFilter(new SentenceSplitterFilter());
//		AnswerSelection.addFilter(new CutKeywordsFilter());
//		
//		//	remove duplicates
//		AnswerSelection.addFilter(new DuplicateSnippetFilter());
//		
//		//	throw out enumerations of proper names
//		AnswerSelection.addFilter(new ProperNameFilter());
//		
//		//	throw out direct speech snippets, rarely contain useful information
//		AnswerSelection.addFilter(new DirectSpeechFilter());
//		
//		//	sort out snippets containing no new terms
//		AnswerSelection.addFilter(new TermFilter());
//		
//		AnswerSelection.addFilter(
//				new WikipediaGoogleWebTermImportanceFilter(
//					WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
//					WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
//					false
//				)
//			);
//		AnswerSelection.addFilter(new ScoreSorterFilter());
//		
//		//	cut off result
//		AnswerSelection.addFilter(new ResultLengthFilter(3000));
//	}
/**
	 * Asks Ephyra an 'other' question.
	 * 
	 * @param question other question
	 * @return array of results
	 */
public final Result[] askOther(String question) {
    // initialize pipeline
    initOther();
    // query generation
    MsgPrinter.printGeneratingQueries();
    String qn = QuestionNormalizer.normalize(question);
    // print normalized question string
    MsgPrinter.printNormalization(qn);
    // log normalized question string
    Logger.logNormalization(qn);
    String[] kws = KeywordExtractor.getKeywords(qn);
    AnalyzedQuestion aq = new AnalyzedQuestion(question);
    aq.setKeywords(kws);
    aq.setFactoid(false);
    BagOfWordsG gen = new BagOfWordsG();
    Query[] queries = gen.generateQueries(aq);
    for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
    // print query strings
    MsgPrinter.printQueryStrings(queries);
    // log query strings
    Logger.logQueryStrings(queries);
    // search
    MsgPrinter.printSearching();
    Result[] results = Search.doSearch(queries);
    // answer selection
    MsgPrinter.printSelectingAnswers();
    results = AnswerSelection.getResults(results, Integer.MAX_VALUE, 0);
    return results;
}
Also used : Query(info.ephyra.querygeneration.Query) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) Result(info.ephyra.search.Result)

Example 18 with Query

use of info.ephyra.querygeneration.Query in project lucida by claritylab.

the class TREC13To16Parser method loadResults.

/**
	 * Loads the results for a question from a log file.
	 * 
	 * @param question the question
	 * @param type the type of question ("factoid", "list" or "other")
	 * @param logfile the log file
	 * @return array of results or <code>null</code> if the question could not
	 * 		   be found in the log file
	 */
public static Result[] loadResults(String question, String type, String logfile) {
    try {
        // get cached entries for given question type
        ArrayList<String> entries;
        if (type.equals("FACTOID"))
            entries = factoidEntries;
        else if (type.equals("LIST"))
            entries = listEntries;
        else
            entries = otherEntries;
        // get entries from log file if not cached
        if (entries == null) {
            entries = new ArrayList<String>();
            String entry = "";
            BufferedReader in = new BufferedReader(new FileReader(logfile));
            while (in.ready()) {
                String line = in.readLine();
                // handle characters that are not allowed in XML
                for (int i = 0; i < SPECIALCHARS.length; i++) line = line.replace(SPECIALCHARS[i], REPLACEMENTS[i]);
                if (line.matches("<" + type.toLowerCase() + ">"))
                    entry = "";
                entry += line + "\n";
                if (line.matches("</" + type.toLowerCase() + ">"))
                    entries.add(entry);
            }
            // cache entries
            if (type.equals("FACTOID"))
                factoidEntries = entries;
            else if (type.equals("LIST"))
                listEntries = entries;
            else
                otherEntries = entries;
        }
        // traverse entries in reverse order
        for (int i = entries.size() - 1; i >= 0; i--) {
            // create factory object
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            // create DOM parser
            DocumentBuilder parser = factory.newDocumentBuilder();
            // parse entry and build tree
            Document entryD = parser.parse(new InputSource(new StringReader(entries.get(i))));
            // Is this the question we are looking for?
            Element questionE = (Element) entryD.getElementsByTagName("question").item(0);
            String questionS = questionE.getFirstChild().getNodeValue().trim();
            if (!questionS.equals(question))
                continue;
            // get results
            ArrayList<Result> results = new ArrayList<Result>();
            NodeList resultsL = entryD.getElementsByTagName("result");
            for (int j = 0; j < resultsL.getLength(); j++) {
                Element resultE = (Element) resultsL.item(j);
                Element answerE = (Element) resultE.getElementsByTagName("answer").item(0);
                String answerS = answerE.getFirstChild().getNodeValue().trim();
                Element scoreE = (Element) resultE.getElementsByTagName("score").item(0);
                float scoreF = Float.parseFloat(scoreE.getFirstChild().getNodeValue().trim());
                Element docidE = (Element) resultE.getElementsByTagName("docid").item(0);
                String docidS = docidE.getFirstChild().getNodeValue().trim();
                Element qiE = (Element) resultE.getElementsByTagName("interpretation").item(0);
                QuestionInterpretation qi = null;
                if (qiE != null) {
                    Element propertyE = (Element) qiE.getElementsByTagName("property").item(0);
                    String propertyS = propertyE.getFirstChild().getNodeValue().trim();
                    Element targetE = (Element) qiE.getElementsByTagName("target").item(0);
                    String targetS = targetE.getFirstChild().getNodeValue().trim();
                    NodeList contextL = qiE.getElementsByTagName("context");
                    String[] contextS = new String[contextL.getLength()];
                    for (int k = 0; k < contextS.length; k++) {
                        Element contextE = (Element) contextL.item(k);
                        contextS[k] = contextE.getFirstChild().getNodeValue().trim();
                    }
                    qi = new QuestionInterpretation(targetS, contextS, propertyS);
                }
                Query query = new Query(null);
                query.setInterpretation(qi);
                Result result = new Result(answerS, query, docidS);
                result.setScore(scoreF);
                results.add(result);
            }
            return results.toArray(new Result[results.size()]);
        }
        // question not found
        return null;
    } catch (Exception e) {
        MsgPrinter.printErrorMsg("Failed to load or parse log file:");
        MsgPrinter.printErrorMsg(e.toString());
        return null;
    }
}
Also used : InputSource(org.xml.sax.InputSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) QuestionInterpretation(info.ephyra.questionanalysis.QuestionInterpretation) Query(info.ephyra.querygeneration.Query) Element(org.w3c.dom.Element) NodeList(org.w3c.dom.NodeList) ArrayList(java.util.ArrayList) Document(org.w3c.dom.Document) IOException(java.io.IOException) Result(info.ephyra.search.Result) DocumentBuilder(javax.xml.parsers.DocumentBuilder) BufferedReader(java.io.BufferedReader) StringReader(java.io.StringReader) FileReader(java.io.FileReader)

Example 19 with Query

use of info.ephyra.querygeneration.Query in project lucida by claritylab.

the class Search method doSearch.

/**
	 * Sends several alternative queries to all the searchers that have been
	 * registered and returns the aggregated results.
	 * 
	 * @param queries queries to be processed
	 * @return results returned by the searchers
	 */
public static Result[] doSearch(Query[] queries) {
    results = new ArrayList<Result>();
    pending = 0;
    System.out.println("queries.length == " + queries.length);
    // send only the first query to the KnowledgeAnnotators
    if (queries.length > 0)
        queryKAs(queries[0]);
    // send all queries to the KnowledgeMiners
    for (Query query : queries) queryKMs(query);
    // wait until all queries have been completed
    waitForResults();
    // drop duplicates among results from KnowledgeMiners
    results = dropDuplicates(results);
    return results.toArray(new Result[results.size()]);
}
Also used : Query(info.ephyra.querygeneration.Query)

Aggregations

Query (info.ephyra.querygeneration.Query)19 Result (info.ephyra.search.Result)11 ArrayList (java.util.ArrayList)8 Predicate (info.ephyra.nlp.semantics.Predicate)6 QuestionInterpretation (info.ephyra.questionanalysis.QuestionInterpretation)4 Term (info.ephyra.questionanalysis.Term)4 IOException (java.io.IOException)4 BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)3 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)3 Hashtable (java.util.Hashtable)3 TRECTarget (info.ephyra.trec.TRECTarget)2 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2 HashSet (java.util.HashSet)2 QuestionReformulator (info.ephyra.querygeneration.QuestionReformulator)1 QuestionInterpretationG (info.ephyra.querygeneration.generators.QuestionInterpretationG)1 FileCache (info.ephyra.util.FileCache)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 PrintWriter (java.io.PrintWriter)1