use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class MissesTest method testBooleanQuery.
public void testBooleanQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final BooleanQuery.Builder query = new BooleanQuery.Builder();
query.add(new TermQuery(new Term("test", "foo")), Occur.MUST);
query.add(new TermQuery(new Term("test", "bar")), Occur.MUST);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query.build()));
assertEquals("this is a <B>foo</B> <B>bar</B> example", highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class MissesTest method testSpanNearQuery.
public void testSpanNearQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final Query query = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("test", "foo")), new SpanTermQuery(new Term("test", "bar")) }, 0, true);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> <B>bar</B> example", highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class MissesTest method testPhraseQuery.
public void testPhraseQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final PhraseQuery query = new PhraseQuery("test", "foo", "bar");
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> <B>bar</B> example", highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class TokenSourcesTest method testMaxStartOffsetConsistency.
public void testMaxStartOffsetConsistency() throws IOException {
FieldType tvFieldType = new FieldType(TextField.TYPE_NOT_STORED);
tvFieldType.setStoreTermVectors(true);
tvFieldType.setStoreTermVectorOffsets(true);
tvFieldType.setStoreTermVectorPositions(true);
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
//we don't necessarily consume the whole stream because of limiting by startOffset
analyzer.setEnableChecks(false);
Document doc = new Document();
final String TEXT = " f gg h";
doc.add(new Field("fld_tv", analyzer.tokenStream("fooFld", TEXT), tvFieldType));
doc.add(new TextField("fld_notv", analyzer.tokenStream("barFld", TEXT)));
IndexReader reader;
try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
writer.addDocument(doc);
reader = writer.getReader();
}
try {
Fields tvFields = reader.getTermVectors(0);
for (int maxStartOffset = -1; maxStartOffset <= TEXT.length(); maxStartOffset++) {
TokenStream tvStream = TokenSources.getTokenStream("fld_tv", tvFields, TEXT, analyzer, maxStartOffset);
TokenStream anaStream = TokenSources.getTokenStream("fld_notv", tvFields, TEXT, analyzer, maxStartOffset);
//assert have same tokens, none of which has a start offset > maxStartOffset
final OffsetAttribute tvOffAtt = tvStream.addAttribute(OffsetAttribute.class);
final OffsetAttribute anaOffAtt = anaStream.addAttribute(OffsetAttribute.class);
tvStream.reset();
anaStream.reset();
while (tvStream.incrementToken()) {
assertTrue(anaStream.incrementToken());
assertEquals(tvOffAtt.startOffset(), anaOffAtt.startOffset());
if (maxStartOffset >= 0)
assertTrue(tvOffAtt.startOffset() <= maxStartOffset);
}
assertTrue(anaStream.incrementToken() == false);
tvStream.end();
anaStream.end();
tvStream.close();
anaStream.close();
}
} finally {
reader.close();
}
dir.close();
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class SynonymTokenizer method testMaxSizeEndHighlight.
public void testMaxSizeEndHighlight() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
TermQuery query = new TermQuery(new Term("text", "searchterm"));
String text = "this is a text with searchterm in it";
SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
Highlighter hg = getHighlighter(query, "text", fm);
hg.setTextFragmenter(new NullFragmenter());
hg.setMaxDocCharsToAnalyze(36);
String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
assertTrue("Matched text should contain remainder of text after highlighted query ", match.endsWith("in it"));
}
};
helper.start();
}
Aggregations