use of info.ephyra.querygeneration.Query in project lucida by claritylab.
the class Logger method logQueryStrings.
/**
* Logs the query strings.
*
* @param queries the queries
* @return true, iff logging was successful
*/
public static boolean logQueryStrings(Query[] queries) {
// logging is disabled or log file is not specified
if (!enabled || logfile == null)
return false;
try {
PrintWriter out = new PrintWriter(new FileOutputStream(logfile, true));
for (Query query : queries) {
out.println("\t<querystring>");
out.println("\t\t" + query.getQueryString());
out.println("\t</querystring>");
}
out.close();
} catch (IOException e) {
return false;
}
return true;
}
use of info.ephyra.querygeneration.Query in project lucida by claritylab.
the class EphyraTREC13To16 method askOther.
// Layout 2
// /**
// * Initializes the pipeline for 'other' questions.
// */
// protected void initOther() {
// // query generation
// QueryGeneration.clearQueryGenerators();
//
// // search
// // - knowledge miners for unstructured knowledge sources
// Search.clearKnowledgeMiners();
// for (String[] indriIndices : IndriKM.getIndriIndices())
// Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
// for (String[] indriServers : IndriKM.getIndriServers())
// Search.addKnowledgeMiner(new IndriKM(indriServers, true));
// // - knowledge annotators for (semi-)structured knowledge sources
// Search.clearKnowledgeAnnotators();
//
// // answer extraction and selection
// // (the filters are applied in this order)
// AnswerSelection.clearFilters();
//
// // initialize scores
// AnswerSelection.addFilter(new ScoreResetterFilter());
//
// // extract sentences from snippets
// AnswerSelection.addFilter(new SentenceExtractionFilter());
//
// // cut meaningless introductions from sentences
// AnswerSelection.addFilter(new CutKeywordsFilter());
// AnswerSelection.addFilter(new CutStatementProviderFilter());
// AnswerSelection.addFilter(new SentenceSplitterFilter());
// AnswerSelection.addFilter(new CutKeywordsFilter());
//
// // remove duplicates
// AnswerSelection.addFilter(new DuplicateSnippetFilter());
//
// // throw out enumerations of proper names
// AnswerSelection.addFilter(new ProperNameFilter());
//
// // throw out direct speech snippets, rarely contain useful information
// AnswerSelection.addFilter(new DirectSpeechFilter());
//
// AnswerSelection.addFilter(
// new WikipediaGoogleWebTermImportanceFilter(
// WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
// WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
// false
// )
// );
// AnswerSelection.addFilter(new ScoreSorterFilter());
//
// // cut off result
// AnswerSelection.addFilter(new ResultLengthFilter(3000));
// }
// Layout 3
// /**
// * Initializes the pipeline for 'other' questions.
// */
// protected void initOther() {
// // query generation
// QueryGeneration.clearQueryGenerators();
//
// // search
// // - knowledge miners for unstructured knowledge sources
// Search.clearKnowledgeMiners();
// for (String[] indriIndices : IndriKM.getIndriIndices())
// Search.addKnowledgeMiner(new IndriDocumentKM(indriIndices, false));
// for (String[] indriServers : IndriKM.getIndriServers())
// Search.addKnowledgeMiner(new IndriDocumentKM(indriServers, true));
// // - knowledge annotators for (semi-)structured knowledge sources
// Search.clearKnowledgeAnnotators();
//
// // answer extraction and selection
// // (the filters are applied in this order)
// AnswerSelection.clearFilters();
//
// // initialize scores
// AnswerSelection.addFilter(new ScoreResetterFilter());
//
// // extract sentences from snippets
// AnswerSelection.addFilter(new SentenceExtractionFilter());
//
// // cut meaningless introductions from sentences
// AnswerSelection.addFilter(new CutKeywordsFilter());
// AnswerSelection.addFilter(new CutStatementProviderFilter());
// AnswerSelection.addFilter(new SentenceSplitterFilter());
// AnswerSelection.addFilter(new CutKeywordsFilter());
//
// // remove duplicates
// AnswerSelection.addFilter(new DuplicateSnippetFilter());
//
// // throw out enumerations of proper names
// AnswerSelection.addFilter(new ProperNameFilter());
//
// // throw out direct speech snippets, rarely contain useful information
// AnswerSelection.addFilter(new DirectSpeechFilter());
//
// // sort out snippets containing no new terms
// AnswerSelection.addFilter(new TermFilter());
//
// AnswerSelection.addFilter(
// new WikipediaGoogleWebTermImportanceFilter(
// WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
// WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
// false
// )
// );
// AnswerSelection.addFilter(new ScoreSorterFilter());
//
// // cut off result
// AnswerSelection.addFilter(new ResultLengthFilter(3000));
// }
/**
* Asks Ephyra an 'other' question.
*
* @param question other question
* @return array of results
*/
public final Result[] askOther(String question) {
// initialize pipeline
initOther();
// query generation
MsgPrinter.printGeneratingQueries();
String qn = QuestionNormalizer.normalize(question);
// print normalized question string
MsgPrinter.printNormalization(qn);
// log normalized question string
Logger.logNormalization(qn);
String[] kws = KeywordExtractor.getKeywords(qn);
AnalyzedQuestion aq = new AnalyzedQuestion(question);
aq.setKeywords(kws);
aq.setFactoid(false);
BagOfWordsG gen = new BagOfWordsG();
Query[] queries = gen.generateQueries(aq);
for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
// print query strings
MsgPrinter.printQueryStrings(queries);
// log query strings
Logger.logQueryStrings(queries);
// search
MsgPrinter.printSearching();
Result[] results = Search.doSearch(queries);
// answer selection
MsgPrinter.printSelectingAnswers();
results = AnswerSelection.getResults(results, Integer.MAX_VALUE, 0);
return results;
}
use of info.ephyra.querygeneration.Query in project lucida by claritylab.
the class TREC13To16Parser method loadResults.
/**
* Loads the results for a question from a log file.
*
* @param question the question
* @param type the type of question ("factoid", "list" or "other")
* @param logfile the log file
* @return array of results or <code>null</code> if the question could not
* be found in the log file
*/
public static Result[] loadResults(String question, String type, String logfile) {
try {
// get cached entries for given question type
ArrayList<String> entries;
if (type.equals("FACTOID"))
entries = factoidEntries;
else if (type.equals("LIST"))
entries = listEntries;
else
entries = otherEntries;
// get entries from log file if not cached
if (entries == null) {
entries = new ArrayList<String>();
String entry = "";
BufferedReader in = new BufferedReader(new FileReader(logfile));
while (in.ready()) {
String line = in.readLine();
// handle characters that are not allowed in XML
for (int i = 0; i < SPECIALCHARS.length; i++) line = line.replace(SPECIALCHARS[i], REPLACEMENTS[i]);
if (line.matches("<" + type.toLowerCase() + ">"))
entry = "";
entry += line + "\n";
if (line.matches("</" + type.toLowerCase() + ">"))
entries.add(entry);
}
// cache entries
if (type.equals("FACTOID"))
factoidEntries = entries;
else if (type.equals("LIST"))
listEntries = entries;
else
otherEntries = entries;
}
// traverse entries in reverse order
for (int i = entries.size() - 1; i >= 0; i--) {
// create factory object
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// create DOM parser
DocumentBuilder parser = factory.newDocumentBuilder();
// parse entry and build tree
Document entryD = parser.parse(new InputSource(new StringReader(entries.get(i))));
// Is this the question we are looking for?
Element questionE = (Element) entryD.getElementsByTagName("question").item(0);
String questionS = questionE.getFirstChild().getNodeValue().trim();
if (!questionS.equals(question))
continue;
// get results
ArrayList<Result> results = new ArrayList<Result>();
NodeList resultsL = entryD.getElementsByTagName("result");
for (int j = 0; j < resultsL.getLength(); j++) {
Element resultE = (Element) resultsL.item(j);
Element answerE = (Element) resultE.getElementsByTagName("answer").item(0);
String answerS = answerE.getFirstChild().getNodeValue().trim();
Element scoreE = (Element) resultE.getElementsByTagName("score").item(0);
float scoreF = Float.parseFloat(scoreE.getFirstChild().getNodeValue().trim());
Element docidE = (Element) resultE.getElementsByTagName("docid").item(0);
String docidS = docidE.getFirstChild().getNodeValue().trim();
Element qiE = (Element) resultE.getElementsByTagName("interpretation").item(0);
QuestionInterpretation qi = null;
if (qiE != null) {
Element propertyE = (Element) qiE.getElementsByTagName("property").item(0);
String propertyS = propertyE.getFirstChild().getNodeValue().trim();
Element targetE = (Element) qiE.getElementsByTagName("target").item(0);
String targetS = targetE.getFirstChild().getNodeValue().trim();
NodeList contextL = qiE.getElementsByTagName("context");
String[] contextS = new String[contextL.getLength()];
for (int k = 0; k < contextS.length; k++) {
Element contextE = (Element) contextL.item(k);
contextS[k] = contextE.getFirstChild().getNodeValue().trim();
}
qi = new QuestionInterpretation(targetS, contextS, propertyS);
}
Query query = new Query(null);
query.setInterpretation(qi);
Result result = new Result(answerS, query, docidS);
result.setScore(scoreF);
results.add(result);
}
return results.toArray(new Result[results.size()]);
}
// question not found
return null;
} catch (Exception e) {
MsgPrinter.printErrorMsg("Failed to load or parse log file:");
MsgPrinter.printErrorMsg(e.toString());
return null;
}
}
use of info.ephyra.querygeneration.Query in project lucida by claritylab.
the class Search method doSearch.
/**
* Sends several alternative queries to all the searchers that have been
* registered and returns the aggregated results.
*
* @param queries queries to be processed
* @return results returned by the searchers
*/
public static Result[] doSearch(Query[] queries) {
results = new ArrayList<Result>();
pending = 0;
System.out.println("queries.length == " + queries.length);
// send only the first query to the KnowledgeAnnotators
if (queries.length > 0)
queryKAs(queries[0]);
// send all queries to the KnowledgeMiners
for (Query query : queries) queryKMs(query);
// wait until all queries have been completed
waitForResults();
// drop duplicates among results from KnowledgeMiners
results = dropDuplicates(results);
return results.toArray(new Result[results.size()]);
}
Aggregations