use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class ChooseOneWordQueryBuilder method implGetQuery.
public Query implGetQuery(Element e, boolean span) throws ParserException {
Term term = null;
final String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
for (Node node = e.getFirstChild(); node != null; node = node.getNextSibling()) {
if (node.getNodeType() == Node.ELEMENT_NODE && node.getNodeName().equals("Word")) {
final String word = DOMUtils.getNonBlankTextOrFail((Element) node);
final Term t = new Term(fieldName, word);
if (term == null || term.text().length() < t.text().length()) {
term = t;
}
}
}
return (span ? new SpanTermQuery(term) : new TermQuery(term));
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class QueryBuilder method analyzeGraphPhrase.
/**
* Creates a span near (phrase) query from a graph token stream. The articulation points of the graph are visited in
* order and the queries created at each point are merged in the returned near query.
*/
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
source.reset();
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
List<SpanQuery> clauses = new ArrayList<>();
int[] articulationPoints = graph.articulationPoints();
int lastState = 0;
for (int i = 0; i <= articulationPoints.length; i++) {
int start = lastState;
int end = -1;
if (i < articulationPoints.length) {
end = articulationPoints[i];
}
lastState = end;
final SpanQuery queryPos;
if (graph.hasSidePath(start)) {
List<SpanQuery> queries = new ArrayList<>();
Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
while (it.hasNext()) {
TokenStream ts = it.next();
SpanQuery q = createSpanQuery(ts, field);
if (q != null) {
queries.add(q);
}
}
if (queries.size() > 0) {
queryPos = new SpanOrQuery(queries.toArray(new SpanQuery[0]));
} else {
queryPos = null;
}
} else {
Term[] terms = graph.getTerms(field, start);
assert terms.length > 0;
if (terms.length == 1) {
queryPos = new SpanTermQuery(terms[0]);
} else {
SpanTermQuery[] orClauses = new SpanTermQuery[terms.length];
for (int idx = 0; idx < terms.length; idx++) {
orClauses[idx] = new SpanTermQuery(terms[idx]);
}
queryPos = new SpanOrQuery(orClauses);
}
}
if (queryPos != null) {
clauses.add(queryPos);
}
}
if (clauses.isEmpty()) {
return null;
} else if (clauses.size() == 1) {
return clauses.get(0);
} else {
return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), phraseSlop, true);
}
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestBooleanQuery method testBooleanSpanQuery.
// LUCENE-4477 / LUCENE-4401:
public void testBooleanSpanQuery() throws Exception {
boolean failed = false;
int hits = 0;
Directory directory = newDirectory();
Analyzer indexerAnalyzer = new MockAnalyzer(random());
IndexWriterConfig config = new IndexWriterConfig(indexerAnalyzer);
IndexWriter writer = new IndexWriter(directory, config);
String FIELD = "content";
Document d = new Document();
d.add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
writer.addDocument(d);
writer.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher searcher = newSearcher(indexReader);
BooleanQuery.Builder query = new BooleanQuery.Builder();
SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
query.add(sq1, BooleanClause.Occur.SHOULD);
query.add(sq2, BooleanClause.Occur.SHOULD);
TopScoreDocCollector collector = TopScoreDocCollector.create(1000);
searcher.search(query.build(), collector);
hits = collector.topDocs().scoreDocs.length;
for (ScoreDoc scoreDoc : collector.topDocs().scoreDocs) {
System.out.println(scoreDoc.doc);
}
indexReader.close();
assertEquals("Bug in boolean query composed of span queries", failed, false);
assertEquals("Bug in boolean query composed of span queries", hits, 1);
directory.close();
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class EnwikiQueryMaker method getPrebuiltQueries.
private static Query[] getPrebuiltQueries(String field) {
WildcardQuery wcq = new WildcardQuery(new Term(field, "fo*"));
wcq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
// be wary of unanalyzed text
return new Query[] { new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(field, "night")), new SpanTermQuery(new Term(field, "trading")) }, 4, false), new SpanNearQuery(new SpanQuery[] { new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 10), new SpanTermQuery(new Term(field, "credit")) }, 10, false), wcq };
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestQueryRescorer method testBasic.
public void testBasic() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
Document doc = new Document();
doc.add(newStringField("id", "0", Field.Store.YES));
doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "1", Field.Store.YES));
// 1 extra token, but wizard and oz are close;
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = getSearcher(r);
searcher.setSimilarity(new ClassicSimilarity());
TopDocs hits = searcher.search(bq.build(), 10);
assertEquals(2, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
// Now, resort using PhraseQuery:
PhraseQuery pq = new PhraseQuery(5, "field", "wizard", "oz");
TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10);
// Resorting changed the order:
assertEquals(2, hits2.totalHits);
assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
// Resort using SpanNearQuery:
SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard"));
SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz"));
SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true);
TopDocs hits3 = QueryRescorer.rescore(searcher, hits, snq, 2.0, 10);
// Resorting changed the order:
assertEquals(2, hits3.totalHits);
assertEquals("1", searcher.doc(hits3.scoreDocs[0].doc).get("id"));
assertEquals("0", searcher.doc(hits3.scoreDocs[1].doc).get("id"));
r.close();
dir.close();
}
Aggregations