Search in sources :

Example 16 with FloatDocValuesField

use of org.apache.lucene.document.FloatDocValuesField in project lucene-solr by apache.

the class TestSelectiveWeightCreation method testScoringQueryWeightCreation.

@Test
public void testScoringQueryWeightCreation() throws IOException, ModelException {
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newStringField("id", "10", Field.Store.YES));
    doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
    doc.add(new FloatDocValuesField("final-score", 1.0f));
    w.addDocument(doc);
    doc = new Document();
    doc.add(newStringField("id", "11", Field.Store.YES));
    // 1 extra token, but wizard and oz are close;
    doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
    doc.add(new FloatDocValuesField("final-score", 2.0f));
    w.addDocument(doc);
    final IndexReader r = w.getReader();
    w.close();
    // Do ordinary BooleanQuery:
    final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
    bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
    bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
    final IndexSearcher searcher = getSearcher(r);
    // first run the standard query
    final TopDocs hits = searcher.search(bqBuilder.build(), 10);
    assertEquals(2, hits.totalHits);
    assertEquals("10", searcher.doc(hits.scoreDocs[0].doc).get("id"));
    assertEquals("11", searcher.doc(hits.scoreDocs[1].doc).get("id"));
    List<Feature> features = makeFeatures(new int[] { 0, 1, 2 });
    final List<Feature> allFeatures = makeFeatures(new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
    final List<Normalizer> norms = new ArrayList<>();
    for (int k = 0; k < features.size(); ++k) {
        norms.add(IdentityNormalizer.INSTANCE);
    }
    // when features are NOT requested in the response, only the modelFeature weights should be created
    final LTRScoringModel ltrScoringModel1 = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
    LTRScoringQuery.ModelWeight modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, // features not requested in response
    new LTRScoringQuery(ltrScoringModel1, false));
    LTRScoringQuery.FeatureInfo[] featuresInfo = modelWeight.getFeaturesInfo();
    assertEquals(features.size(), modelWeight.getModelFeatureValuesNormalized().length);
    int validFeatures = 0;
    for (int i = 0; i < featuresInfo.length; ++i) {
        if (featuresInfo[i] != null && featuresInfo[i].isUsed()) {
            validFeatures += 1;
        }
    }
    assertEquals(validFeatures, features.size());
    // when features are requested in the response, weights should be created for all features
    final LTRScoringModel ltrScoringModel2 = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
    modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, // features requested in response
    new LTRScoringQuery(ltrScoringModel2, true));
    featuresInfo = modelWeight.getFeaturesInfo();
    assertEquals(features.size(), modelWeight.getModelFeatureValuesNormalized().length);
    assertEquals(allFeatures.size(), modelWeight.getExtractedFeatureWeights().length);
    validFeatures = 0;
    for (int i = 0; i < featuresInfo.length; ++i) {
        if (featuresInfo[i] != null && featuresInfo[i].isUsed()) {
            validFeatures += 1;
        }
    }
    assertEquals(validFeatures, allFeatures.size());
    assertU(delI("10"));
    assertU(delI("11"));
    r.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Normalizer(org.apache.solr.ltr.norm.Normalizer) IdentityNormalizer(org.apache.solr.ltr.norm.IdentityNormalizer) ArrayList(java.util.ArrayList) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) ValueFeature(org.apache.solr.ltr.feature.ValueFeature) Feature(org.apache.solr.ltr.feature.Feature) TopDocs(org.apache.lucene.search.TopDocs) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) LTRScoringModel(org.apache.solr.ltr.model.LTRScoringModel) Directory(org.apache.lucene.store.Directory) Test(org.junit.Test)

Example 17 with FloatDocValuesField

use of org.apache.lucene.document.FloatDocValuesField in project lucene-solr by apache.

the class TestLTRReRankingPipeline method testRescorer.

@Ignore
@Test
public void testRescorer() throws IOException {
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newStringField("id", "0", Field.Store.YES));
    doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
    doc.add(new FloatDocValuesField("final-score", 1.0f));
    w.addDocument(doc);
    doc = new Document();
    doc.add(newStringField("id", "1", Field.Store.YES));
    // 1 extra token, but wizard and oz are close;
    doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
    doc.add(new FloatDocValuesField("final-score", 2.0f));
    w.addDocument(doc);
    final IndexReader r = w.getReader();
    w.close();
    // Do ordinary BooleanQuery:
    final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
    bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
    bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
    final IndexSearcher searcher = getSearcher(r);
    // first run the standard query
    TopDocs hits = searcher.search(bqBuilder.build(), 10);
    assertEquals(2, hits.totalHits);
    assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
    assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
    final List<Feature> features = makeFieldValueFeatures(new int[] { 0, 1, 2 }, "final-score");
    final List<Normalizer> norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
    final List<Feature> allFeatures = makeFieldValueFeatures(new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, "final-score");
    final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, null);
    final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
    hits = rescorer.rescore(searcher, hits, 2);
    // rerank using the field final-score
    assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id"));
    assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id"));
    r.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Normalizer(org.apache.solr.ltr.norm.Normalizer) IdentityNormalizer(org.apache.solr.ltr.norm.IdentityNormalizer) ArrayList(java.util.ArrayList) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldValueFeature(org.apache.solr.ltr.feature.FieldValueFeature) Feature(org.apache.solr.ltr.feature.Feature) TopDocs(org.apache.lucene.search.TopDocs) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) LTRScoringModel(org.apache.solr.ltr.model.LTRScoringModel) Directory(org.apache.lucene.store.Directory) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 18 with FloatDocValuesField

use of org.apache.lucene.document.FloatDocValuesField in project lucene-solr by apache.

the class TestIndexSorting method testBasicFloat.

public void testBasicFloat() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT));
    iwc.setIndexSort(indexSort);
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new FloatDocValuesField("foo", 18.0f));
    w.addDocument(doc);
    // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
    w.commit();
    doc = new Document();
    doc.add(new FloatDocValuesField("foo", -1.0f));
    w.addDocument(doc);
    w.commit();
    doc = new Document();
    doc.add(new FloatDocValuesField("foo", 7.0f));
    w.addDocument(doc);
    w.forceMerge(1);
    DirectoryReader r = DirectoryReader.open(w);
    LeafReader leaf = getOnlyLeafReader(r);
    assertEquals(3, leaf.maxDoc());
    NumericDocValues values = leaf.getNumericDocValues("foo");
    assertEquals(0, values.nextDoc());
    assertEquals(-1.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
    assertEquals(1, values.nextDoc());
    assertEquals(7.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
    assertEquals(2, values.nextDoc());
    assertEquals(18.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
    r.close();
    w.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Sort(org.apache.lucene.search.Sort) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory)

Example 19 with FloatDocValuesField

use of org.apache.lucene.document.FloatDocValuesField in project lucene-solr by apache.

the class TestIndexSorting method testMissingFloatFirst.

public void testMissingFloatFirst() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    SortField sortField = new SortField("foo", SortField.Type.FLOAT);
    sortField.setMissingValue(Float.NEGATIVE_INFINITY);
    Sort indexSort = new Sort(sortField);
    iwc.setIndexSort(indexSort);
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new FloatDocValuesField("foo", 18.0f));
    w.addDocument(doc);
    // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
    w.commit();
    // missing
    w.addDocument(new Document());
    w.commit();
    doc = new Document();
    doc.add(new FloatDocValuesField("foo", 7.0f));
    w.addDocument(doc);
    w.forceMerge(1);
    DirectoryReader r = DirectoryReader.open(w);
    LeafReader leaf = getOnlyLeafReader(r);
    assertEquals(3, leaf.maxDoc());
    NumericDocValues values = leaf.getNumericDocValues("foo");
    assertEquals(1, values.nextDoc());
    assertEquals(7.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
    assertEquals(2, values.nextDoc());
    assertEquals(18.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
    r.close();
    w.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Sort(org.apache.lucene.search.Sort) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory)

Example 20 with FloatDocValuesField

use of org.apache.lucene.document.FloatDocValuesField in project lucene-solr by apache.

the class TestJoinUtil method addLinkFields.

private void addLinkFields(final Random random, Document document, final String fieldName, String linkValue, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) {
    document.add(newTextField(random, fieldName, linkValue, Field.Store.NO));
    final int linkInt = Integer.parseUnsignedInt(linkValue, 16);
    document.add(new IntPoint(fieldName + "INT", linkInt));
    document.add(new FloatPoint(fieldName + "FLOAT", linkInt));
    final long linkLong = linkInt << 32 | linkInt;
    document.add(new LongPoint(fieldName + "LONG", linkLong));
    document.add(new DoublePoint(fieldName + "DOUBLE", linkLong));
    if (multipleValuesPerDocument) {
        document.add(new SortedSetDocValuesField(fieldName, new BytesRef(linkValue)));
        document.add(new SortedNumericDocValuesField(fieldName + "INT", linkInt));
        document.add(new SortedNumericDocValuesField(fieldName + "FLOAT", Float.floatToRawIntBits(linkInt)));
        document.add(new SortedNumericDocValuesField(fieldName + "LONG", linkLong));
        document.add(new SortedNumericDocValuesField(fieldName + "DOUBLE", Double.doubleToRawLongBits(linkLong)));
    } else {
        document.add(new SortedDocValuesField(fieldName, new BytesRef(linkValue)));
        document.add(new NumericDocValuesField(fieldName + "INT", linkInt));
        document.add(new FloatDocValuesField(fieldName + "FLOAT", linkInt));
        document.add(new NumericDocValuesField(fieldName + "LONG", linkLong));
        document.add(new DoubleDocValuesField(fieldName + "DOUBLE", linkLong));
    }
    if (globalOrdinalJoin) {
        document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
    }
}
Also used : FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) IntPoint(org.apache.lucene.document.IntPoint) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) FloatPoint(org.apache.lucene.document.FloatPoint) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) DoublePoint(org.apache.lucene.document.DoublePoint) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

FloatDocValuesField (org.apache.lucene.document.FloatDocValuesField)23 Document (org.apache.lucene.document.Document)22 Directory (org.apache.lucene.store.Directory)16 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)15 IndexReader (org.apache.lucene.index.IndexReader)10 ArrayList (java.util.ArrayList)8 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)8 Term (org.apache.lucene.index.Term)7 IndexSearcher (org.apache.lucene.search.IndexSearcher)7 DoubleDocValuesField (org.apache.lucene.document.DoubleDocValuesField)6 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)6 TermQuery (org.apache.lucene.search.TermQuery)6 TopDocs (org.apache.lucene.search.TopDocs)6 BytesRef (org.apache.lucene.util.BytesRef)6 BooleanQuery (org.apache.lucene.search.BooleanQuery)5 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4 Field (org.apache.lucene.document.Field)4 Sort (org.apache.lucene.search.Sort)4 SortField (org.apache.lucene.search.SortField)4 Feature (org.apache.solr.ltr.feature.Feature)4