use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class SynonymTokenizer method testToChildBlockJoinQuery.
public void testToChildBlockJoinQuery() throws Exception {
BitSetProducer parentFilter = new QueryBitSetProducer(new TermQuery(new Term(FIELD_NAME, "parent")));
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
booleanQuery.add(new ToChildBlockJoinQuery(new TermQuery(new Term(FIELD_NAME, "parent")), parentFilter), Occur.MUST);
booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "child")), Occur.MUST);
query = booleanQuery.build();
searcher = newSearcher(reader);
hits = searcher.search(query, 100);
int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
String text = "parent document";
final int docId = hits.scoreDocs[i].doc;
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 1);
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class SynonymTokenizer method testDimensionalRangeQuery.
public void testDimensionalRangeQuery() throws Exception {
// doesn't currently highlight, but make sure it doesn't cause exception either
query = IntPoint.newRangeQuery(NUMERIC_FIELD_NAME, 2, 6);
searcher = newSearcher(reader);
hits = searcher.search(query, 100);
int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).getField(NUMERIC_FIELD_NAME).numericValue().toString();
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
// String result =
highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
//if (VERBOSE) System.out.println("\t" + result);
}
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class SynonymTokenizer method testSimpleSpanFragmenter.
public void testSimpleSpanFragmenter() throws Exception {
Builder builder = new PhraseQuery.Builder();
builder.add(new Term(FIELD_NAME, "piece"), 0);
builder.add(new Term(FIELD_NAME, "text"), 2);
builder.add(new Term(FIELD_NAME, "very"), 5);
builder.add(new Term(FIELD_NAME, "long"), 6);
PhraseQuery phraseQuery = builder.build();
doSearching(phraseQuery);
int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
}
phraseQuery = new PhraseQuery(FIELD_NAME, "been", "shot");
doSearching(query);
maxNumFragmentsRequired = 2;
scorer = new QueryScorer(query, FIELD_NAME);
highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
}
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class SynonymTokenizer method testSimpleQueryScorerPhraseHighlighting.
public void testSimpleQueryScorerPhraseHighlighting() throws Exception {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(new Term(FIELD_NAME, "very"), 0);
builder.add(new Term(FIELD_NAME, "long"), 1);
builder.add(new Term(FIELD_NAME, "contains"), 3);
PhraseQuery phraseQuery = builder.build();
doSearching(phraseQuery);
int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 3);
numHighlights = 0;
builder = new PhraseQuery.Builder();
builder.add(new Term(FIELD_NAME, "piece"), 1);
builder.add(new Term(FIELD_NAME, "text"), 3);
builder.add(new Term(FIELD_NAME, "refers"), 4);
builder.add(new Term(FIELD_NAME, "kennedy"), 6);
phraseQuery = builder.build();
doSearching(phraseQuery);
maxNumFragmentsRequired = 2;
scorer = new QueryScorer(query, FIELD_NAME);
highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
numHighlights = 0;
builder = new PhraseQuery.Builder();
builder.add(new Term(FIELD_NAME, "lets"), 0);
builder.add(new Term(FIELD_NAME, "lets"), 4);
builder.add(new Term(FIELD_NAME, "lets"), 8);
builder.add(new Term(FIELD_NAME, "lets"), 12);
phraseQuery = builder.build();
doSearching(phraseQuery);
maxNumFragmentsRequired = 2;
scorer = new QueryScorer(query, FIELD_NAME);
highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class MemoryIndex method storeTerms.
private void storeTerms(Info info, TokenStream tokenStream, int positionIncrementGap, int offsetGap) {
int pos = -1;
int offset = 0;
if (info.numTokens > 0) {
pos = info.lastPosition + positionIncrementGap;
offset = info.lastOffset + offsetGap;
}
try (TokenStream stream = tokenStream) {
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
PayloadAttribute payloadAtt = storePayloads ? stream.addAttribute(PayloadAttribute.class) : null;
stream.reset();
while (stream.incrementToken()) {
// if (DEBUG) System.err.println("token='" + term + "'");
info.numTokens++;
final int posIncr = posIncrAttribute.getPositionIncrement();
if (posIncr == 0) {
info.numOverlapTokens++;
}
pos += posIncr;
int ord = info.terms.add(termAtt.getBytesRef());
if (ord < 0) {
ord = (-ord) - 1;
postingsWriter.reset(info.sliceArray.end[ord]);
} else {
info.sliceArray.start[ord] = postingsWriter.startNewSlice();
}
info.sliceArray.freq[ord]++;
info.sumTotalTermFreq++;
postingsWriter.writeInt(pos);
if (storeOffsets) {
postingsWriter.writeInt(offsetAtt.startOffset() + offset);
postingsWriter.writeInt(offsetAtt.endOffset() + offset);
}
if (storePayloads) {
final BytesRef payload = payloadAtt.getPayload();
final int pIndex;
if (payload == null || payload.length == 0) {
pIndex = -1;
} else {
pIndex = payloadsBytesRefs.append(payload);
}
postingsWriter.writeInt(pIndex);
}
info.sliceArray.end[ord] = postingsWriter.getCurrentOffset();
}
stream.end();
if (info.numTokens > 0) {
info.lastPosition = pos;
info.lastOffset = offsetAtt.endOffset() + offset;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
Aggregations