use of org.apache.lucene.document.StringField in project lucene-solr by apache.
the class TestJoinUtil method testMinMaxScore.
public void testMinMaxScore() throws Exception {
String priceField = "price";
Query priceQuery = numericDocValuesScoreQuery(priceField);
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)));
Map<String, Float> lowestScoresPerParent = new HashMap<>();
Map<String, Float> highestScoresPerParent = new HashMap<>();
int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
for (int p = 0; p < numParents; p++) {
String parentId = Integer.toString(p);
Document parentDoc = new Document();
parentDoc.add(new StringField("id", parentId, Field.Store.YES));
parentDoc.add(new StringField("type", "to", Field.Store.NO));
parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
iw.addDocument(parentDoc);
int numChildren = RandomNumbers.randomIntBetween(random(), 2, 16);
int lowest = Integer.MAX_VALUE;
int highest = Integer.MIN_VALUE;
for (int c = 0; c < numChildren; c++) {
String childId = Integer.toString(p + c);
Document childDoc = new Document();
childDoc.add(new StringField("id", childId, Field.Store.YES));
childDoc.add(new StringField("type", "from", Field.Store.NO));
childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
int price = random().nextInt(1000);
childDoc.add(new NumericDocValuesField(priceField, price));
iw.addDocument(childDoc);
lowest = Math.min(lowest, price);
highest = Math.max(highest, price);
}
lowestScoresPerParent.put(parentId, (float) lowest);
highestScoresPerParent.put(parentId, (float) highest);
}
iw.close();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
fromQuery.add(priceQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "to"));
Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Min, ordinalMap);
TopDocs topDocs = searcher.search(joinQuery, numParents);
assertEquals(numParents, topDocs.totalHits);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
String id = searcher.doc(scoreDoc.doc).get("id");
assertEquals(lowestScoresPerParent.get(id), scoreDoc.score, 0f);
}
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Max, ordinalMap);
topDocs = searcher.search(joinQuery, numParents);
assertEquals(numParents, topDocs.totalHits);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
String id = searcher.doc(scoreDoc.doc).get("id");
assertEquals(highestScoresPerParent.get(id), scoreDoc.score, 0f);
}
searcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.document.StringField in project lucene-solr by apache.
the class TestMemoryIndex method testBuildFromDocument.
@Test
public void testBuildFromDocument() {
Document doc = new Document();
doc.add(new TextField("field1", "some text", Field.Store.NO));
doc.add(new TextField("field1", "some more text", Field.Store.NO));
doc.add(new StringField("field2", "untokenized text", Field.Store.NO));
analyzer.setPositionIncrementGap(100);
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
assertThat(mi.search(new TermQuery(new Term("field1", "text"))), not(0.0f));
assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f));
assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f));
}
use of org.apache.lucene.document.StringField in project lucene-solr by apache.
the class TestDocValuesNumbersQuery method testApproximation.
public void testApproximation() throws IOException {
final int iters = atLeast(2);
for (int iter = 0; iter < iters; ++iter) {
final List<Long> allNumbers = new ArrayList<>();
final int numNumbers = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
for (int i = 0; i < numNumbers; ++i) {
allNumbers.add(random().nextLong());
}
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
final int numDocs = atLeast(100);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final Long number = allNumbers.get(random().nextInt(allNumbers.size()));
doc.add(new StringField("text", number.toString(), Store.NO));
doc.add(new NumericDocValuesField("long", number));
iw.addDocument(doc);
}
if (numNumbers > 1 && random().nextBoolean()) {
iw.deleteDocuments(new TermQuery(new Term("text", allNumbers.get(0).toString())));
}
iw.commit();
final IndexReader reader = iw.getReader();
final IndexSearcher searcher = newSearcher(reader);
iw.close();
if (reader.numDocs() == 0) {
// may occasionally happen if all documents got the same term
IOUtils.close(reader, dir);
continue;
}
for (int i = 0; i < 100; ++i) {
final float boost = random().nextFloat() * 10;
final int numQueryNumbers = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
Set<Long> queryNumbers = new HashSet<>();
for (int j = 0; j < numQueryNumbers; ++j) {
queryNumbers.add(allNumbers.get(random().nextInt(allNumbers.size())));
}
final BooleanQuery.Builder bq = new BooleanQuery.Builder();
for (Long number : queryNumbers) {
bq.add(new TermQuery(new Term("text", number.toString())), Occur.SHOULD);
}
Query q1 = new BoostQuery(new ConstantScoreQuery(bq.build()), boost);
final Query q2 = new BoostQuery(new DocValuesNumbersQuery("long", queryNumbers), boost);
BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
bq1.add(q1, Occur.MUST);
bq1.add(new TermQuery(new Term("text", allNumbers.get(0).toString())), Occur.FILTER);
BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
bq2.add(q2, Occur.MUST);
bq2.add(new TermQuery(new Term("text", allNumbers.get(0).toString())), Occur.FILTER);
assertSameMatches(searcher, bq1.build(), bq2.build(), true);
}
reader.close();
dir.close();
}
}
use of org.apache.lucene.document.StringField in project lucene-solr by apache.
the class BasePostingsFormatTestCase method testPostingsEnumDocsOnly.
public void testPostingsEnumDocsOnly() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(null);
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new StringField("foo", "bar", Field.Store.NO));
iw.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(iw);
// sugar method (FREQS)
PostingsEnum postings = getOnlyLeafReader(reader).postings(new Term("foo", "bar"));
assertEquals(-1, postings.docID());
assertEquals(0, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
// termsenum reuse (FREQS)
TermsEnum termsEnum = getOnlyLeafReader(reader).terms("foo").iterator();
termsEnum.seekExact(new BytesRef("bar"));
PostingsEnum postings2 = termsEnum.postings(postings);
assertNotNull(postings2);
assertReused("foo", postings, postings2);
// and it had better work
assertEquals(-1, postings.docID());
assertEquals(0, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
// asking for any flags: ok
for (int flag : new int[] { NONE, FREQS, POSITIONS, PAYLOADS, OFFSETS, ALL }) {
postings = termsEnum.postings(null, flag);
assertEquals(-1, postings.docID());
assertEquals(0, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
// reuse that too
postings2 = termsEnum.postings(postings, flag);
assertNotNull(postings2);
assertReused("foo", postings, postings2);
// and it had better work
assertEquals(-1, postings2.docID());
assertEquals(0, postings2.nextDoc());
assertEquals(1, postings2.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
}
iw.close();
reader.close();
dir.close();
}
use of org.apache.lucene.document.StringField in project lucene-solr by apache.
the class BasePostingsFormatTestCase method testLevel2Ghosts.
// tests that level 2 ghost fields still work
public void testLevel2Ghosts() throws Exception {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = newIndexWriterConfig(null);
iwc.setCodec(getCodec());
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwc);
Document document = new Document();
document.add(new StringField("id", "0", Field.Store.NO));
document.add(new StringField("suggest_field", "apples", Field.Store.NO));
iw.addDocument(document);
// need another document so whole segment isn't deleted
iw.addDocument(new Document());
iw.commit();
document = new Document();
document.add(new StringField("id", "1", Field.Store.NO));
document.add(new StringField("suggest_field2", "apples", Field.Store.NO));
iw.addDocument(document);
iw.commit();
iw.deleteDocuments(new Term("id", "0"));
// first force merge creates a level 1 ghost field
iw.forceMerge(1);
// second force merge creates a level 2 ghost field, causing MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
// this field anymore)
iw.addDocument(new Document());
iw.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(iw);
IndexSearcher indexSearcher = new IndexSearcher(reader);
assertEquals(1, indexSearcher.count(new TermQuery(new Term("id", "1"))));
reader.close();
iw.close();
dir.close();
}
Aggregations