use of org.apache.lucene.document.FieldType in project elasticsearch by elastic.
the class BlendedTermQueryTests method testDismaxQuery.
public void testDismaxQuery() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
String[] username = new String[] { "foo fighters", "some cool fan", "cover band" };
String[] song = new String[] { "generator", "foo fighers - generator", "foo fighters generator" };
final boolean omitNorms = random().nextBoolean();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
ft.setOmitNorms(omitNorms);
ft.freeze();
FieldType ft1 = new FieldType(TextField.TYPE_NOT_STORED);
ft1.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
ft1.setOmitNorms(omitNorms);
ft1.freeze();
for (int i = 0; i < username.length; i++) {
Document d = new Document();
d.add(new TextField("id", Integer.toString(i), Field.Store.YES));
d.add(new Field("username", username[i], ft));
d.add(new Field("song", song[i], ft));
w.addDocument(d);
}
int iters = scaledRandomIntBetween(25, 100);
for (int j = 0; j < iters; j++) {
Document d = new Document();
d.add(new TextField("id", Integer.toString(username.length + j), Field.Store.YES));
d.add(new Field("username", "foo fighters", ft1));
d.add(new Field("song", "some bogus text to bump up IDF", ft1));
w.addDocument(d);
}
w.commit();
DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = setSimilarity(newSearcher(reader));
{
String[] fields = new String[] { "username", "song" };
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.setDisableCoord(true);
query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 0.1f), BooleanClause.Occur.SHOULD);
query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "fighters"), 0.1f), BooleanClause.Occur.SHOULD);
query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "generator"), 0.1f), BooleanClause.Occur.SHOULD);
TopDocs search = searcher.search(query.build(), 10);
ScoreDoc[] scoreDocs = search.scoreDocs;
assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue());
}
{
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.setDisableCoord(true);
DisjunctionMaxQuery uname = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "foo")), new TermQuery(new Term("song", "foo"))), 0.0f);
DisjunctionMaxQuery s = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "fighers")), new TermQuery(new Term("song", "fighers"))), 0.0f);
DisjunctionMaxQuery gen = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "generator")), new TermQuery(new Term("song", "generator"))), 0f);
query.add(uname, BooleanClause.Occur.SHOULD);
query.add(s, BooleanClause.Occur.SHOULD);
query.add(gen, BooleanClause.Occur.SHOULD);
TopDocs search = searcher.search(query.build(), 4);
ScoreDoc[] scoreDocs = search.scoreDocs;
assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue());
}
reader.close();
w.close();
dir.close();
}
use of org.apache.lucene.document.FieldType in project elasticsearch by elastic.
the class CustomPostingsHighlighterTests method testCustomPostingsHighlighter.
public void testCustomPostingsHighlighter() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
//good position but only one match
final String firstValue = "This is a test. Just a test1 highlighting from postings highlighter.";
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue(firstValue);
//two matches, not the best snippet due to its length though
final String secondValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower.";
Field body2 = new Field("body", "", offsetsType);
doc.add(body2);
body2.setStringValue(secondValue);
//two matches and short, will be scored highest
final String thirdValue = "This is highlighting the third short highlighting value.";
Field body3 = new Field("body", "", offsetsType);
doc.add(body3);
body3.setStringValue(thirdValue);
//one match, same as first but at the end, will be scored lower due to its position
final String fourthValue = "Just a test4 highlighting from postings highlighter.";
Field body4 = new Field("body", "", offsetsType);
doc.add(body4);
body4.setStringValue(fourthValue);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
String firstHlValue = "Just a test1 <b>highlighting</b> from postings highlighter.";
String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a longer text that gets scored lower.";
String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value.";
String fourthHlValue = "Just a test4 <b>highlighting</b> from postings highlighter.";
IndexSearcher searcher = newSearcher(ir);
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1));
int docId = topDocs.scoreDocs[0].doc;
String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue;
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValue, false);
Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5);
assertThat(snippets.length, equalTo(4));
assertThat(snippets[0].getText(), equalTo(firstHlValue));
assertThat(snippets[1].getText(), equalTo(secondHlValue));
assertThat(snippets[2].getText(), equalTo(thirdHlValue));
assertThat(snippets[3].getText(), equalTo(fourthHlValue));
ir.close();
dir.close();
}
use of org.apache.lucene.document.FieldType in project elasticsearch by elastic.
the class VectorHighlighterTests method testVectorHighlighter.
public void testVectorHighlighter() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
FieldType vectorsType = new FieldType(TextField.TYPE_STORED);
vectorsType.setStoreTermVectors(true);
vectorsType.setStoreTermVectorPositions(true);
vectorsType.setStoreTermVectorOffsets(true);
document.add(new Field("content", "the big bad dog", vectorsType));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
assertThat(topDocs.totalHits, equalTo(1));
FastVectorHighlighter highlighter = new FastVectorHighlighter();
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30);
assertThat(fragment, notNullValue());
assertThat(fragment, equalTo("the big <b>bad</b> dog"));
}
use of org.apache.lucene.document.FieldType in project elasticsearch by elastic.
the class SimpleAllTests method testMultipleTokensAllNoBoost.
public void testMultipleTokensAllNoBoost() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
FieldType allFt = getAllFieldType();
Document doc = new Document();
doc.add(new Field("_id", "1", StoredField.TYPE));
doc.add(new AllField("_all", "something moo", 1.0f, allFt));
doc.add(new AllField("_all", "else koo", 1.0f, allFt));
indexWriter.addDocument(doc);
doc = new Document();
doc.add(new Field("_id", "2", StoredField.TYPE));
doc.add(new AllField("_all", "else koo", 1.0f, allFt));
doc.add(new AllField("_all", "something moo", 1.0f, allFt));
indexWriter.addDocument(doc);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
indexWriter.close();
}
use of org.apache.lucene.document.FieldType in project elasticsearch by elastic.
the class SimpleAllTests method testMultipleTokensAllWithBoost.
public void testMultipleTokensAllWithBoost() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
FieldType allFt = getAllFieldType();
Document doc = new Document();
doc.add(new Field("_id", "1", StoredField.TYPE));
doc.add(new AllField("_all", "something moo", 1.0f, allFt));
doc.add(new AllField("_all", "else koo", 1.0f, allFt));
indexWriter.addDocument(doc);
doc = new Document();
doc.add(new Field("_id", "2", StoredField.TYPE));
doc.add(new AllField("_all", "else koo", 2.0f, allFt));
doc.add(new AllField("_all", "something moo", 1.0f, allFt));
indexWriter.addDocument(doc);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(1));
assertThat(docs.scoreDocs[1].doc, equalTo(0));
docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(1));
assertThat(docs.scoreDocs[1].doc, equalTo(0));
docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
indexWriter.close();
}
Aggregations