use of org.apache.lucene.queries.payloads.SpanPayloadCheckQuery in project lucene-solr by apache.
the class PayloadCheckQParserPlugin method createParser.
@Override
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new QParser(qstr, localParams, params, req) {
@Override
public Query parse() throws SyntaxError {
String field = localParams.get(QueryParsing.F);
String value = localParams.get(QueryParsing.V);
String p = localParams.get("payloads");
if (field == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'f' not specified");
}
if (value == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "query string missing");
}
if (p == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'payloads' not specified");
}
FieldType ft = req.getCore().getLatestSchema().getFieldType(field);
Analyzer analyzer = ft.getQueryAnalyzer();
SpanQuery query = null;
try {
query = PayloadUtils.createSpanQuery(field, value, analyzer);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
if (query == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "SpanQuery is null");
}
PayloadEncoder encoder = null;
String e = PayloadUtils.getPayloadEncoder(ft);
if ("float".equals(e)) {
// TODO: centralize this string->PayloadEncoder logic (see DelimitedPayloadTokenFilterFactory)
encoder = new FloatEncoder();
} else if ("integer".equals(e)) {
encoder = new IntegerEncoder();
} else if ("identity".equals(e)) {
encoder = new IdentityEncoder();
}
if (encoder == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "invalid encoder: " + e + " for field: " + field);
}
List<BytesRef> payloads = new ArrayList<>();
// since payloads (most likely) came in whitespace delimited, just split
String[] rawPayloads = p.split(" ");
for (String rawPayload : rawPayloads) {
if (rawPayload.length() > 0)
payloads.add(encoder.encode(rawPayload.toCharArray()));
}
return new SpanPayloadCheckQuery(query, payloads);
}
};
}
use of org.apache.lucene.queries.payloads.SpanPayloadCheckQuery in project lucene-solr by apache.
the class HighlighterTest method payloadFilteringSpanQuery.
@Test
public void payloadFilteringSpanQuery() throws IOException {
clearIndex();
String FIELD_NAME = "payloadDelimited";
assertU(adoc("id", "0", FIELD_NAME, "word|7 word|2"));
assertU(commit());
//We search at a lower level than typical Solr tests because there's no QParser for payloads
//Create query matching this payload
Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "word")), //bytes for integer 7
Collections.singletonList(new BytesRef(new byte[] { 0, 0, 0, 7 })));
//invoke highlight component... the hard way
final SearchComponent hlComp = h.getCore().getSearchComponent("highlight");
SolrQueryRequest req = req("hl", "true", "hl.fl", FIELD_NAME, HighlightParams.USE_PHRASE_HIGHLIGHTER, "true");
try {
SolrQueryResponse resp = new SolrQueryResponse();
ResponseBuilder rb = new ResponseBuilder(req, resp, Collections.singletonList(hlComp));
rb.setHighlightQuery(query);
rb.setResults(req.getSearcher().getDocListAndSet(query, (DocSet) null, null, 0, 1));
//highlight:
hlComp.prepare(rb);
hlComp.process(rb);
//inspect response
final String[] snippets = (String[]) resp.getValues().findRecursive("highlighting", "0", FIELD_NAME);
assertEquals("<em>word|7</em> word|2", snippets[0]);
} finally {
req.close();
}
}
use of org.apache.lucene.queries.payloads.SpanPayloadCheckQuery in project lucene-solr by apache.
the class SynonymTokenizer method testPayloadQuery.
/** We can highlight based on payloads. It's supported both via term vectors and MemoryIndex since Lucene 5. */
public void testPayloadQuery() throws IOException, InvalidTokenOffsetsException {
//"words" at positions 1 & 4
final String text = "random words and words";
//sets payload to "pos: X" (where X is position #)
Analyzer analyzer = new MockPayloadAnalyzer();
try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer))) {
writer.deleteAll();
Document doc = new Document();
doc.add(new Field(FIELD_NAME, text, fieldType));
writer.addDocument(doc);
writer.commit();
}
try (IndexReader reader = DirectoryReader.open(dir)) {
Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "words")), //just match the first "word" occurrence
Collections.singletonList(new BytesRef("pos: 1")));
IndexSearcher searcher = newSearcher(reader);
QueryScorer scorer = new QueryScorer(query, searcher.getIndexReader(), FIELD_NAME);
scorer.setUsePayloads(true);
Highlighter h = new Highlighter(scorer);
TopDocs hits = searcher.search(query, 10);
assertEquals(1, hits.scoreDocs.length);
TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), 0, FIELD_NAME, analyzer);
if (random().nextBoolean()) {
//conceals detection of TokenStreamFromTermVector
stream = new CachingTokenFilter(stream);
}
String result = h.getBestFragment(stream, text);
//only highlight first "word"
assertEquals("random <B>words</B> and words", result);
}
}
Aggregations