use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class XMoreLikeThis method addToQuery.
/**
* Add to an existing boolean query the More Like This query from this PriorityQueue
*/
private void addToQuery(PriorityQueue<ScoreTerm> q, BooleanQuery.Builder query) {
ScoreTerm scoreTerm;
float bestScore = -1;
while ((scoreTerm = q.pop()) != null) {
Query tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word));
if (boost) {
if (bestScore == -1) {
bestScore = (scoreTerm.score);
}
float myScore = (scoreTerm.score);
tq = new BoostQuery(tq, boostFactor * myScore / bestScore);
}
try {
query.add(tq, BooleanClause.Occur.SHOULD);
} catch (BooleanQuery.TooManyClauses ignore) {
break;
}
}
}
use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class AllTermQuery method rewrite.
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = super.rewrite(reader);
if (rewritten != this) {
return rewritten;
}
boolean hasPayloads = false;
for (LeafReaderContext context : reader.leaves()) {
final Terms terms = context.reader().terms(term.field());
if (terms != null) {
if (terms.hasPayloads()) {
hasPayloads = true;
break;
}
}
}
// which rewrites query with an empty reader.
if (hasPayloads == false) {
return new TermQuery(term);
}
return this;
}
use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class XMoreLikeThisTests method testTopN.
public void testTopN() throws Exception {
int numDocs = 100;
int topN = 25;
// add series of docs with terms of decreasing df
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
for (int i = 0; i < numDocs; i++) {
addDoc(writer, generateStrSeq(0, i + 1));
}
IndexReader reader = writer.getReader();
writer.close();
// setup MLT query
MoreLikeThis mlt = new MoreLikeThis(reader);
mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
mlt.setMaxQueryTerms(topN);
mlt.setMinDocFreq(1);
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);
mlt.setFieldNames(new String[] { "text" });
// perform MLT query
String likeText = "";
for (String text : generateStrSeq(0, numDocs)) {
likeText += text + " ";
}
BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText));
// check best terms are topN of highest idf
List<BooleanClause> clauses = query.clauses();
assertEquals("Expected" + topN + "clauses only!", topN, clauses.size());
Term[] expectedTerms = new Term[topN];
int idx = 0;
for (String text : generateStrSeq(numDocs - topN, topN)) {
expectedTerms[idx++] = new Term("text", text);
}
for (BooleanClause clause : clauses) {
Term term = ((TermQuery) clause.getQuery()).getTerm();
assertTrue(Arrays.asList(expectedTerms).contains(term));
}
// clean up
reader.close();
dir.close();
}
use of org.apache.lucene.search.TermQuery in project OpenGrok by OpenGrok.
the class HistoryContextTest method testGetContext_3args.
@Test
public void testGetContext_3args() throws Exception {
String path = "/mercurial/Makefile";
String filename = repositories.getSourceRoot() + path;
// Construct a query equivalent to hist:dummy
TermQuery q1 = new TermQuery(new Term("hist", "dummy"));
ArrayList<Hit> hits = new ArrayList<>();
assertTrue(new HistoryContext(q1).getContext(filename, path, hits));
assertEquals(1, hits.size());
assertTrue(hits.get(0).getLine().contains("Created a small <b>dummy</b> program"));
// Construct a query equivalent to hist:"dummy program"
PhraseQuery.Builder q2 = new PhraseQuery.Builder();
q2.add(new Term("hist", "dummy"));
q2.add(new Term("hist", "program"));
hits.clear();
assertTrue(new HistoryContext(q2.build()).getContext(filename, path, hits));
assertEquals(1, hits.size());
assertTrue(hits.get(0).getLine().contains("Created a small <b>dummy program</b>"));
// Search for a term that doesn't exist
TermQuery q3 = new TermQuery(new Term("hist", "term_does_not_exist"));
hits.clear();
assertFalse(new HistoryContext(q3).getContext(filename, path, hits));
assertEquals(0, hits.size());
// Search for term with multiple hits - hist:small OR hist:target
BooleanQuery.Builder q4 = new BooleanQuery.Builder();
q4.add(new TermQuery(new Term("hist", "small")), Occur.SHOULD);
q4.add(new TermQuery(new Term("hist", "target")), Occur.SHOULD);
hits.clear();
assertTrue(new HistoryContext(q4.build()).getContext(filename, path, hits));
assertEquals(2, hits.size());
assertTrue(hits.get(0).getLine().contains("Add lint make <b>target</b> and fix lint warnings"));
assertTrue(hits.get(1).getLine().contains("Created a <b>small</b> dummy program"));
}
use of org.apache.lucene.search.TermQuery in project ansj_seg by NLPchina.
the class PhraseTest method main.
public static void main(String[] args) throws IOException, ParseException {
DicLibrary.insert(DicLibrary.DEFAULT, "上网人");
DicLibrary.insert(DicLibrary.DEFAULT, "网人");
AnsjAnalyzer ansjAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
TokenStream tokenStream = ansjAnalyzer.tokenStream("上网人员测试", "test");
while (tokenStream.incrementToken()) {
System.out.println(tokenStream.getAttribute(CharTermAttribute.class));
}
IndexWriterConfig config = new IndexWriterConfig(ansjAnalyzer);
IndexWriter writer = new IndexWriter(new RAMDirectory(), config);
Document doc = new Document();
doc.add(new TextField("test", "上网人员测试", Field.Store.YES));
writer.addDocument(doc);
writer.commit();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer));
System.out.println(searcher.count(new TermQuery(new Term("test", "网人"))));
Query q = new QueryParser("test", new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj)).parse("\"上网人\"");
System.out.println(q);
System.out.println(searcher.count(q));
}
Aggregations