Search in sources :

Example 76 with Explanation

use of org.apache.lucene.search.Explanation in project greplin-lucene-utils by Cue.

the class PredicateQuery method createWeight.

/**
 * Returns a Weight that applies the predicate to the enclosed query's Weight.
 * This is accomplished by overriding the Scorer returned by the Weight.
 * @param searcher the searcher to create a weight for.
 * @return a Weight that applies the predicate to the query.
 * @throws IOException if IO issues occur.
 */
@Override
public Weight createWeight(final Searcher searcher) throws IOException {
    final Weight weight = this.query.createWeight(searcher);
    final Similarity similarity = this.query.getSimilarity(searcher);
    return new Weight() {

        private float value;

        // pass these methods through to enclosed query's weight
        @Override
        public float getValue() {
            return this.value;
        }

        @Override
        public boolean scoresDocsOutOfOrder() {
            return false;
        }

        public float sumOfSquaredWeights() throws IOException {
            return weight.sumOfSquaredWeights() * getBoost() * getBoost();
        }

        @Override
        public void normalize(final float v) {
            // incorporate boost
            weight.normalize(v * getBoost());
            this.value = weight.getValue();
        }

        @Override
        public Explanation explain(final IndexReader reader, final int i) throws IOException {
            Explanation inner = weight.explain(reader, i);
            Bits predicate = PredicateQuery.this.predicate.get(reader);
            if (predicate.get(i)) {
                return inner;
            } else {
                Explanation result = new Explanation(0.0f, "failure to match predicate: " + predicate.toString());
                result.addDetail(inner);
                return result;
            }
        }

        @Override
        public Query getQuery() {
            return PredicateQuery.this;
        }

        @Override
        public Scorer scorer(final IndexReader reader, final boolean scoreDocsInOrder, final boolean topScorer) throws IOException {
            Bits predicate = PredicateQuery.this.predicate.get(reader);
            return PredicateQuery.getScorer(reader, similarity, weight, this, predicate);
        }
    };
}
Also used : Similarity(org.apache.lucene.search.Similarity) Explanation(org.apache.lucene.search.Explanation) IndexReader(org.apache.lucene.index.IndexReader) Bits(org.apache.lucene.util.Bits) Weight(org.apache.lucene.search.Weight)

Example 77 with Explanation

use of org.apache.lucene.search.Explanation in project ltr4l by LTR4L.

the class FeaturesExtractor method call.

@Override
public Integer call() {
    PrintWriter pw = null;
    final boolean _debug = false;
    List<Explanation> _debugExpls = null;
    try {
        pw = new PrintWriter(featuresFile, "UTF-8");
        pw.println("{");
        pw.print("  featuresSet: [");
        int cntFE = 0;
        for (FieldFeatureExtractorFactory factory : featuresSpec) {
            if (cntFE > 0) {
                pw.printf(" ,\"%s\"", factory.getFeatureName());
            } else {
                pw.printf(" \"%s\"", factory.getFeatureName());
            }
            cntFE++;
        }
        pw.println("],");
        pw.println("  queries: [");
        IndexReaderContext context = req.getSearcher().getTopReaderContext();
        int cntQ = 0;
        for (TrainingDataReader.QueryDataDesc queryDataDesc : queryDataDescs) {
            if (cntQ > 0) {
                pw.println(",\n    {");
            } else {
                pw.println("    {");
            }
            final int qid = queryDataDesc.qid;
            final String qstr = queryDataDesc.queryStr;
            pw.printf("      qid: %d,\n", qid);
            pw.printf("      query: \"%s\",\n", qstr);
            pw.println("      docs: [");
            List<Integer> docIds = new ArrayList<Integer>();
            int cntD = 0;
            for (String key : queryDataDesc.docs) {
                TermQuery idQuery = new TermQuery(new Term(idField, key));
                TopDocs topDocs = req.getSearcher().search(idQuery, 1);
                if (topDocs.scoreDocs.length > 0) {
                    docIds.add(topDocs.scoreDocs[0].doc);
                }
            }
            Collections.sort(docIds);
            List<LeafReaderContext> leaves = req.getSearcher().getIndexReader().leaves();
            int readerUpto = -1;
            int endDoc = 0;
            int docBase = 0;
            List<FieldFeatureExtractor[]> spec = null;
            Set<Integer> allDocs = null;
            for (int docId : docIds) {
                LeafReaderContext readerContext = null;
                while (docId >= endDoc) {
                    readerUpto++;
                    readerContext = leaves.get(readerUpto);
                    endDoc = readerContext.docBase + readerContext.reader().maxDoc();
                }
                if (readerContext != null) {
                    // We advanced to another segment:
                    docBase = readerContext.docBase;
                    spec = new ArrayList<FieldFeatureExtractor[]>();
                    allDocs = new HashSet<Integer>();
                    for (FieldFeatureExtractorFactory factory : featuresSpec) {
                        String fieldName = factory.getFieldName();
                        FieldType fieldType = req.getSchema().getFieldType(fieldName);
                        Analyzer analyzer = fieldType.getQueryAnalyzer();
                        factory.init(context, FieldFeatureExtractorFactory.terms(fieldName, qstr, analyzer));
                        FieldFeatureExtractor[] extractors = factory.create(readerContext, allDocs);
                        spec.add(extractors);
                    }
                }
                if (allDocs.size() > 0) {
                    final List<Integer> aldocs = new ArrayList<Integer>(allDocs);
                    Collections.sort(aldocs);
                    DocIdSetIterator disi = new DocIdSetIterator() {

                        int pos = -1;

                        int docId = -1;

                        @Override
                        public int docID() {
                            return docId;
                        }

                        @Override
                        public int nextDoc() throws IOException {
                            pos++;
                            docId = pos >= aldocs.size() ? NO_MORE_DOCS : aldocs.get(pos);
                            return docId;
                        }

                        @Override
                        public int advance(int target) throws IOException {
                            while (docId < target) {
                                nextDoc();
                            }
                            return docId;
                        }

                        @Override
                        public long cost() {
                            return 0;
                        }
                    };
                    int targetDoc = docId - docBase;
                    int actualDoc = disi.docID();
                    if (actualDoc < targetDoc) {
                        actualDoc = disi.advance(targetDoc);
                    }
                    if (actualDoc == targetDoc) {
                        if (cntD > 0) {
                            pw.println(",\n        {");
                        } else {
                            pw.println("        {");
                        }
                        Document luceneDoc = req.getSearcher().doc(docId);
                        String idValue = luceneDoc.get(idField);
                        pw.printf("          id: \"%s\",\n", idValue);
                        pw.print("          features: [");
                        int cntF = 0;
                        for (FieldFeatureExtractor[] extractors : spec) {
                            float feature = 0;
                            if (_debug) {
                                _debugExpls = new ArrayList<Explanation>();
                            }
                            for (FieldFeatureExtractor extractor : extractors) {
                                feature += extractor.feature(targetDoc);
                                if (_debug) {
                                    _debugExpls.add(extractor.explain(targetDoc));
                                }
                            }
                            if (cntF > 0) {
                                pw.printf(", %f", feature);
                            } else {
                                pw.printf(" %f", feature);
                            }
                            if (_debug) {
                                pw.printf(": %s", Explanation.match(feature, "sum of ", _debugExpls));
                            }
                            cntF++;
                        }
                        pw.println("]");
                        // end of a doc
                        pw.print("        }");
                        cntD++;
                    } else {
                        // Query did not match this doc, no output
                        assert actualDoc > targetDoc;
                    }
                }
                incProgress();
            }
            // end of docs
            pw.println("\n      ]");
            // end of a query
            pw.print("    }");
            cntQ++;
        }
        pw.println("\n  ]");
        pw.println("}");
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeWhileHandlingException(pw);
    }
    req.close();
    return 100;
}
Also used : Explanation(org.apache.lucene.search.Explanation) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) PrintWriter(java.io.PrintWriter) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) FieldType(org.apache.solr.schema.FieldType) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Aggregations

Explanation (org.apache.lucene.search.Explanation)77 TermQuery (org.apache.lucene.search.TermQuery)16 ArrayList (java.util.ArrayList)13 Query (org.apache.lucene.search.Query)13 IndexSearcher (org.apache.lucene.search.IndexSearcher)12 Term (org.apache.lucene.index.Term)11 IOException (java.io.IOException)9 BooleanQuery (org.apache.lucene.search.BooleanQuery)8 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)8 Directory (org.apache.lucene.store.Directory)8 Document (org.apache.lucene.document.Document)7 TopDocs (org.apache.lucene.search.TopDocs)7 IndexReader (org.apache.lucene.index.IndexReader)6 IndexWriter (org.apache.lucene.index.IndexWriter)6 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)6 Collectors (java.util.stream.Collectors)5 Collections (java.util.Collections)4 DirectoryReader (org.apache.lucene.index.DirectoryReader)4 FunctionValues (org.apache.lucene.queries.function.FunctionValues)4 SearchResponse (org.elasticsearch.action.search.SearchResponse)4