use of org.apache.lucene.search.Explanation in project greplin-lucene-utils by Cue.
the class PredicateQuery method createWeight.
/**
* Returns a Weight that applies the predicate to the enclosed query's Weight.
* This is accomplished by overriding the Scorer returned by the Weight.
* @param searcher the searcher to create a weight for.
* @return a Weight that applies the predicate to the query.
* @throws IOException if IO issues occur.
*/
@Override
public Weight createWeight(final Searcher searcher) throws IOException {
final Weight weight = this.query.createWeight(searcher);
final Similarity similarity = this.query.getSimilarity(searcher);
return new Weight() {
private float value;
// pass these methods through to enclosed query's weight
@Override
public float getValue() {
return this.value;
}
@Override
public boolean scoresDocsOutOfOrder() {
return false;
}
public float sumOfSquaredWeights() throws IOException {
return weight.sumOfSquaredWeights() * getBoost() * getBoost();
}
@Override
public void normalize(final float v) {
// incorporate boost
weight.normalize(v * getBoost());
this.value = weight.getValue();
}
@Override
public Explanation explain(final IndexReader reader, final int i) throws IOException {
Explanation inner = weight.explain(reader, i);
Bits predicate = PredicateQuery.this.predicate.get(reader);
if (predicate.get(i)) {
return inner;
} else {
Explanation result = new Explanation(0.0f, "failure to match predicate: " + predicate.toString());
result.addDetail(inner);
return result;
}
}
@Override
public Query getQuery() {
return PredicateQuery.this;
}
@Override
public Scorer scorer(final IndexReader reader, final boolean scoreDocsInOrder, final boolean topScorer) throws IOException {
Bits predicate = PredicateQuery.this.predicate.get(reader);
return PredicateQuery.getScorer(reader, similarity, weight, this, predicate);
}
};
}
use of org.apache.lucene.search.Explanation in project ltr4l by LTR4L.
the class FeaturesExtractor method call.
@Override
public Integer call() {
PrintWriter pw = null;
final boolean _debug = false;
List<Explanation> _debugExpls = null;
try {
pw = new PrintWriter(featuresFile, "UTF-8");
pw.println("{");
pw.print(" featuresSet: [");
int cntFE = 0;
for (FieldFeatureExtractorFactory factory : featuresSpec) {
if (cntFE > 0) {
pw.printf(" ,\"%s\"", factory.getFeatureName());
} else {
pw.printf(" \"%s\"", factory.getFeatureName());
}
cntFE++;
}
pw.println("],");
pw.println(" queries: [");
IndexReaderContext context = req.getSearcher().getTopReaderContext();
int cntQ = 0;
for (TrainingDataReader.QueryDataDesc queryDataDesc : queryDataDescs) {
if (cntQ > 0) {
pw.println(",\n {");
} else {
pw.println(" {");
}
final int qid = queryDataDesc.qid;
final String qstr = queryDataDesc.queryStr;
pw.printf(" qid: %d,\n", qid);
pw.printf(" query: \"%s\",\n", qstr);
pw.println(" docs: [");
List<Integer> docIds = new ArrayList<Integer>();
int cntD = 0;
for (String key : queryDataDesc.docs) {
TermQuery idQuery = new TermQuery(new Term(idField, key));
TopDocs topDocs = req.getSearcher().search(idQuery, 1);
if (topDocs.scoreDocs.length > 0) {
docIds.add(topDocs.scoreDocs[0].doc);
}
}
Collections.sort(docIds);
List<LeafReaderContext> leaves = req.getSearcher().getIndexReader().leaves();
int readerUpto = -1;
int endDoc = 0;
int docBase = 0;
List<FieldFeatureExtractor[]> spec = null;
Set<Integer> allDocs = null;
for (int docId : docIds) {
LeafReaderContext readerContext = null;
while (docId >= endDoc) {
readerUpto++;
readerContext = leaves.get(readerUpto);
endDoc = readerContext.docBase + readerContext.reader().maxDoc();
}
if (readerContext != null) {
// We advanced to another segment:
docBase = readerContext.docBase;
spec = new ArrayList<FieldFeatureExtractor[]>();
allDocs = new HashSet<Integer>();
for (FieldFeatureExtractorFactory factory : featuresSpec) {
String fieldName = factory.getFieldName();
FieldType fieldType = req.getSchema().getFieldType(fieldName);
Analyzer analyzer = fieldType.getQueryAnalyzer();
factory.init(context, FieldFeatureExtractorFactory.terms(fieldName, qstr, analyzer));
FieldFeatureExtractor[] extractors = factory.create(readerContext, allDocs);
spec.add(extractors);
}
}
if (allDocs.size() > 0) {
final List<Integer> aldocs = new ArrayList<Integer>(allDocs);
Collections.sort(aldocs);
DocIdSetIterator disi = new DocIdSetIterator() {
int pos = -1;
int docId = -1;
@Override
public int docID() {
return docId;
}
@Override
public int nextDoc() throws IOException {
pos++;
docId = pos >= aldocs.size() ? NO_MORE_DOCS : aldocs.get(pos);
return docId;
}
@Override
public int advance(int target) throws IOException {
while (docId < target) {
nextDoc();
}
return docId;
}
@Override
public long cost() {
return 0;
}
};
int targetDoc = docId - docBase;
int actualDoc = disi.docID();
if (actualDoc < targetDoc) {
actualDoc = disi.advance(targetDoc);
}
if (actualDoc == targetDoc) {
if (cntD > 0) {
pw.println(",\n {");
} else {
pw.println(" {");
}
Document luceneDoc = req.getSearcher().doc(docId);
String idValue = luceneDoc.get(idField);
pw.printf(" id: \"%s\",\n", idValue);
pw.print(" features: [");
int cntF = 0;
for (FieldFeatureExtractor[] extractors : spec) {
float feature = 0;
if (_debug) {
_debugExpls = new ArrayList<Explanation>();
}
for (FieldFeatureExtractor extractor : extractors) {
feature += extractor.feature(targetDoc);
if (_debug) {
_debugExpls.add(extractor.explain(targetDoc));
}
}
if (cntF > 0) {
pw.printf(", %f", feature);
} else {
pw.printf(" %f", feature);
}
if (_debug) {
pw.printf(": %s", Explanation.match(feature, "sum of ", _debugExpls));
}
cntF++;
}
pw.println("]");
// end of a doc
pw.print(" }");
cntD++;
} else {
// Query did not match this doc, no output
assert actualDoc > targetDoc;
}
}
incProgress();
}
// end of docs
pw.println("\n ]");
// end of a query
pw.print(" }");
cntQ++;
}
pw.println("\n ]");
pw.println("}");
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
IOUtils.closeWhileHandlingException(pw);
}
req.close();
return 100;
}
Aggregations