use of org.apache.lucene.document.FloatDocValuesField in project lucene-solr by apache.
the class TestSort method testFloatMissing.
/** Tests sorting on type float with a missing value */
public void testFloatMissing() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", -1.3F));
doc.add(newStringField("value", "-1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", 4.2F));
doc.add(newStringField("value", "4.2", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// null is treated as 0
assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("4.2", searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
use of org.apache.lucene.document.FloatDocValuesField in project lucene-solr by apache.
the class TestLTRScoringQuery method testLTRScoringQuery.
@Test
public void testLTRScoringQuery() throws IOException, ModelException {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newStringField("id", "0", Field.Store.YES));
doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 1.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "1", Field.Store.YES));
// 1 extra token, but wizard and oz are close;
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 2.0f));
w.addDocument(doc);
final IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
final IndexSearcher searcher = getSearcher(r);
// first run the standard query
final TopDocs hits = searcher.search(bqBuilder.build(), 10);
assertEquals(2, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
List<Feature> features = makeFeatures(new int[] { 0, 1, 2 });
final List<Feature> allFeatures = makeFeatures(new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
List<Normalizer> norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
LTRScoringQuery.ModelWeight modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(ltrScoringModel));
assertEquals(3, modelWeight.getModelFeatureValuesNormalized().length);
for (int i = 0; i < 3; i++) {
assertEquals(i, modelWeight.getModelFeatureValuesNormalized()[i], 0.0001);
}
int[] posVals = new int[] { 0, 1, 2 };
int pos = 0;
for (LTRScoringQuery.FeatureInfo fInfo : modelWeight.getFeaturesInfo()) {
if (fInfo == null) {
continue;
}
assertEquals(posVals[pos], fInfo.getValue(), 0.0001);
assertEquals("f" + posVals[pos], fInfo.getName());
pos++;
}
final int[] mixPositions = new int[] { 8, 2, 4, 9, 0 };
features = makeFeatures(mixPositions);
norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(ltrScoringModel));
assertEquals(mixPositions.length, modelWeight.getModelFeatureWeights().length);
for (int i = 0; i < mixPositions.length; i++) {
assertEquals(mixPositions[i], modelWeight.getModelFeatureValuesNormalized()[i], 0.0001);
}
final ModelException expectedModelException = new ModelException("no features declared for model test");
final int[] noPositions = new int[] {};
features = makeFeatures(noPositions);
norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
try {
ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
fail("unexpectedly got here instead of catching " + expectedModelException);
modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(ltrScoringModel));
assertEquals(0, modelWeight.getModelFeatureWeights().length);
} catch (ModelException actualModelException) {
assertEquals(expectedModelException.toString(), actualModelException.toString());
}
// test normalizers
features = makeFilterFeatures(mixPositions);
final Normalizer norm = new Normalizer() {
@Override
public float normalize(float value) {
return 42.42f;
}
@Override
public LinkedHashMap<String, Object> paramsToMap() {
return null;
}
@Override
protected void validate() throws NormalizerException {
}
};
norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), norm));
final LTRScoringModel normMeta = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(normMeta));
normMeta.normalizeFeaturesInPlace(modelWeight.getModelFeatureValuesNormalized());
assertEquals(mixPositions.length, modelWeight.getModelFeatureWeights().length);
for (int i = 0; i < mixPositions.length; i++) {
assertEquals(42.42f, modelWeight.getModelFeatureValuesNormalized()[i], 0.0001);
}
r.close();
dir.close();
}
use of org.apache.lucene.document.FloatDocValuesField in project lucene-solr by apache.
the class TestLTRReRankingPipeline method testDifferentTopN.
@Ignore
@Test
public void testDifferentTopN() throws IOException {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newStringField("id", "0", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 1.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "1", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 2.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "2", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 3.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "3", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz the the the the ", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 4.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "4", Field.Store.YES));
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 5.0f));
w.addDocument(doc);
final IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
final IndexSearcher searcher = getSearcher(r);
// first run the standard query
TopDocs hits = searcher.search(bqBuilder.build(), 10);
assertEquals(5, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
final List<Feature> features = makeFieldValueFeatures(new int[] { 0, 1, 2 }, "final-score");
final List<Normalizer> norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
final List<Feature> allFeatures = makeFieldValueFeatures(new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, "final-score");
final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, null);
final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
// rerank @ 0 should not change the order
hits = rescorer.rescore(searcher, hits, 0);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
for (int topN = 1; topN <= 5; topN++) {
log.info("rerank {} documents ", topN);
hits = searcher.search(bqBuilder.build(), 10);
final ScoreDoc[] slice = new ScoreDoc[topN];
System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
hits = new TopDocs(hits.totalHits, slice, hits.getMaxScore());
hits = rescorer.rescore(searcher, hits, topN);
for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc).get("id"), j);
assertEquals(i, Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
}
}
r.close();
dir.close();
}
use of org.apache.lucene.document.FloatDocValuesField in project lucene-solr by apache.
the class TestTopDocsMerge method testSort.
void testSort(boolean useFrom) throws Exception {
IndexReader reader = null;
Directory dir = null;
final int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100);
final String[] tokens = new String[] { "a", "b", "c", "d", "e" };
if (VERBOSE) {
System.out.println("TEST: make index");
}
{
dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
// w.setDoRandomForceMerge(false);
// w.w.getConfig().setMaxBufferedDocs(atLeast(100));
final String[] content = new String[atLeast(20)];
for (int contentIDX = 0; contentIDX < content.length; contentIDX++) {
final StringBuilder sb = new StringBuilder();
final int numTokens = TestUtil.nextInt(random(), 1, 10);
for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++) {
sb.append(tokens[random().nextInt(tokens.length)]).append(' ');
}
content[contentIDX] = sb.toString();
}
for (int docIDX = 0; docIDX < numDocs; docIDX++) {
final Document doc = new Document();
doc.add(new SortedDocValuesField("string", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
doc.add(newTextField("text", content[random().nextInt(content.length)], Field.Store.NO));
doc.add(new FloatDocValuesField("float", random().nextFloat()));
final int intValue;
if (random().nextInt(100) == 17) {
intValue = Integer.MIN_VALUE;
} else if (random().nextInt(100) == 17) {
intValue = Integer.MAX_VALUE;
} else {
intValue = random().nextInt();
}
doc.add(new NumericDocValuesField("int", intValue));
if (VERBOSE) {
System.out.println(" doc=" + doc);
}
w.addDocument(doc);
}
reader = w.getReader();
w.close();
}
// NOTE: sometimes reader has just one segment, which is
// important to test
final IndexSearcher searcher = newSearcher(reader);
final IndexReaderContext ctx = searcher.getTopReaderContext();
final ShardSearcher[] subSearchers;
final int[] docStarts;
if (ctx instanceof LeafReaderContext) {
subSearchers = new ShardSearcher[1];
docStarts = new int[1];
subSearchers[0] = new ShardSearcher((LeafReaderContext) ctx, ctx);
docStarts[0] = 0;
} else {
final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
final int size = compCTX.leaves().size();
subSearchers = new ShardSearcher[size];
docStarts = new int[size];
int docBase = 0;
for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX);
docStarts[searcherIDX] = docBase;
docBase += leave.reader().maxDoc();
}
}
final List<SortField> sortFields = new ArrayList<>();
sortFields.add(new SortField("string", SortField.Type.STRING, true));
sortFields.add(new SortField("string", SortField.Type.STRING, false));
sortFields.add(new SortField("int", SortField.Type.INT, true));
sortFields.add(new SortField("int", SortField.Type.INT, false));
sortFields.add(new SortField("float", SortField.Type.FLOAT, true));
sortFields.add(new SortField("float", SortField.Type.FLOAT, false));
sortFields.add(new SortField(null, SortField.Type.SCORE, true));
sortFields.add(new SortField(null, SortField.Type.SCORE, false));
sortFields.add(new SortField(null, SortField.Type.DOC, true));
sortFields.add(new SortField(null, SortField.Type.DOC, false));
int numIters = atLeast(300);
for (int iter = 0; iter < numIters; iter++) {
// TODO: custom FieldComp...
final Query query = new TermQuery(new Term("text", tokens[random().nextInt(tokens.length)]));
final Sort sort;
if (random().nextInt(10) == 4) {
// Sort by score
sort = null;
} else {
final SortField[] randomSortFields = new SortField[TestUtil.nextInt(random(), 1, 3)];
for (int sortIDX = 0; sortIDX < randomSortFields.length; sortIDX++) {
randomSortFields[sortIDX] = sortFields.get(random().nextInt(sortFields.size()));
}
sort = new Sort(randomSortFields);
}
final int numHits = TestUtil.nextInt(random(), 1, numDocs + 5);
if (VERBOSE) {
System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
}
int from = -1;
int size = -1;
// First search on whole index:
final TopDocs topHits;
if (sort == null) {
if (useFrom) {
TopScoreDocCollector c = TopScoreDocCollector.create(numHits);
searcher.search(query, c);
from = TestUtil.nextInt(random(), 0, numHits - 1);
size = numHits - from;
TopDocs tempTopHits = c.topDocs();
if (from < tempTopHits.scoreDocs.length) {
// Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
// than TopDocs#merge currently has
ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
tempTopHits.scoreDocs = newScoreDocs;
topHits = tempTopHits;
} else {
topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
}
} else {
topHits = searcher.search(query, numHits);
}
} else {
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true);
searcher.search(query, c);
if (useFrom) {
from = TestUtil.nextInt(random(), 0, numHits - 1);
size = numHits - from;
TopDocs tempTopHits = c.topDocs();
if (from < tempTopHits.scoreDocs.length) {
// Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
// than TopDocs#merge currently has
ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
tempTopHits.scoreDocs = newScoreDocs;
topHits = tempTopHits;
} else {
topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
}
} else {
topHits = c.topDocs(0, numHits);
}
}
if (VERBOSE) {
if (useFrom) {
System.out.println("from=" + from + " size=" + size);
}
System.out.println(" top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length + " maxScore=" + topHits.getMaxScore()));
if (topHits.scoreDocs != null) {
for (int hitIDX = 0; hitIDX < topHits.scoreDocs.length; hitIDX++) {
final ScoreDoc sd = topHits.scoreDocs[hitIDX];
System.out.println(" doc=" + sd.doc + " score=" + sd.score);
}
}
}
// ... then all shards:
final Weight w = searcher.createNormalizedWeight(query, true);
final TopDocs[] shardHits;
if (sort == null) {
shardHits = new TopDocs[subSearchers.length];
} else {
shardHits = new TopFieldDocs[subSearchers.length];
}
for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
final TopDocs subHits;
final ShardSearcher subSearcher = subSearchers[shardIDX];
if (sort == null) {
subHits = subSearcher.search(w, numHits);
} else {
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true);
subSearcher.search(w, c);
subHits = c.topDocs(0, numHits);
}
shardHits[shardIDX] = subHits;
if (VERBOSE) {
System.out.println(" shard=" + shardIDX + " " + subHits.totalHits + " totalHits hits=" + (subHits.scoreDocs == null ? "null" : subHits.scoreDocs.length));
if (subHits.scoreDocs != null) {
for (ScoreDoc sd : subHits.scoreDocs) {
System.out.println(" doc=" + sd.doc + " score=" + sd.score);
}
}
}
}
// Merge:
final TopDocs mergedHits;
if (useFrom) {
if (sort == null) {
mergedHits = TopDocs.merge(from, size, shardHits, true);
} else {
mergedHits = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardHits, true);
}
} else {
if (sort == null) {
mergedHits = TopDocs.merge(numHits, shardHits);
} else {
mergedHits = TopDocs.merge(sort, numHits, (TopFieldDocs[]) shardHits);
}
}
if (mergedHits.scoreDocs != null) {
// Make sure the returned shards are correct:
for (int hitIDX = 0; hitIDX < mergedHits.scoreDocs.length; hitIDX++) {
final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
assertEquals("doc=" + sd.doc + " wrong shard", ReaderUtil.subIndex(sd.doc, docStarts), sd.shardIndex);
}
}
TestUtil.assertEquals(topHits, mergedHits);
}
reader.close();
dir.close();
}
use of org.apache.lucene.document.FloatDocValuesField in project lucene-solr by apache.
the class TestSort method testFloatMissingLast.
/** Tests sorting on type float, specifying the missing value should be treated as Float.MAX_VALUE */
public void testFloatMissingLast() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", -1.3F));
doc.add(newStringField("value", "-1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", 4.2F));
doc.add(newStringField("value", "4.2", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(ir);
SortField sortField = new SortField("value", SortField.Type.FLOAT);
sortField.setMissingValue(Float.MAX_VALUE);
Sort sort = new Sort(sortField);
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// null is treated as Float.MAX_VALUE
assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
Aggregations