use of org.opengrok.suggest.query.data.BitIntsHolder in project OpenGrok by OpenGrok.
the class CustomSloppyPhraseScorer method phraseFreq.
/**
* Score a candidate doc for all slop-valid position-combinations (matches)
* encountered while traversing/hopping the PhrasePositions.
* <br> The score contribution of a match depends on the distance:
* <br> - highest score for distance=0 (exact match).
* <br> - score gets lower as distance gets higher.
* <br>Example: for query "a b"~2, a document "x a b a y" can be scored twice:
* once for "a b" (distance=0), and once for "b a" (distance=2).
* <br>Possibly not all valid combinations are encountered, because for efficiency
* we always propagate the least PhrasePosition. This allows to base on
* PriorityQueue and move forward faster.
* As result, for example, document "a b c b a"
* would score differently for queries "a b c"~4 and "c b a"~4, although
* they really are equivalent.
* Similarly, for doc "a b c b a f g", query "c b"~2
* would get same score as "g f"~2, although "c b"~2 could be matched twice.
* We may want to fix this in the future (currently not, for performance reasons).
*/
private float phraseFreq() throws IOException {
// custom begins
BitIntsHolder allPositions = new BitIntsHolder();
BitIntsHolder positions = new BitIntsHolder();
if (phrasePositions.length == 1) {
// special handling for one term
end = Integer.MIN_VALUE;
PhrasePositions pp = phrasePositions[0];
pp.firstPosition();
if (pp.position > end) {
end = pp.position;
}
int matchCount = 0;
while (advancePP(pp)) {
allPositions.set(pp.position + pp.offset);
addPositions(positions, allPositions, pp.position + pp.offset, 0);
matchCount++;
}
if (!positions.isEmpty()) {
documentsToPositionsMap.put(docID(), positions);
}
return matchCount;
}
if (!initPhrasePositions()) {
return 0.0f;
}
// custom begins
for (PhrasePositions phrasePositions : this.pq) {
allPositions.set(phrasePositions.position + phrasePositions.offset);
}
// custom ends
int numMatches = 0;
PhrasePositions pp = pq.pop();
int matchLength = end - pp.position;
int next = pq.top().position;
// custom – remember last matched position
int lastEnd = this.end;
while (advancePP(pp)) {
if (hasRpts && !advanceRpts(pp)) {
// pps exhausted
break;
}
allPositions.set(pp.position + pp.offset);
if (pp.position > next) {
// done minimizing current match-length
if (matchLength <= slop) {
numMatches++;
// custom – match found, remember positions
addPositions(positions, allPositions, lastEnd, matchLength);
}
pq.add(pp);
pp = pq.pop();
next = pq.top().position;
matchLength = end - pp.position;
// custom – remember position of last match
lastEnd = this.end;
} else {
int matchLength2 = end - pp.position;
if (matchLength2 < matchLength) {
matchLength = matchLength2;
}
// custom – remember position of last match
lastEnd = this.end;
}
}
if (matchLength <= slop) {
numMatches++;
// custom – match found, remember positions
addPositions(positions, allPositions, lastEnd, matchLength);
}
// custom begins – if some positions were found then store them
if (!positions.isEmpty()) {
documentsToPositionsMap.put(docID(), positions);
}
// custom ends
return numMatches;
}
use of org.opengrok.suggest.query.data.BitIntsHolder in project OpenGrok by OpenGrok.
the class SuggesterSearcher method getComplexQueryData.
private ComplexQueryData getComplexQueryData(final Query query, final LeafReaderContext leafReaderContext) {
ComplexQueryData data = new ComplexQueryData();
if (query == null || query instanceof SuggesterQuery) {
data.documentIds = new BitIntsHolder(0);
return data;
}
BitIntsHolder documentIds = new BitIntsHolder();
try {
search(query, new Collector() {
@Override
public LeafCollector getLeafCollector(final LeafReaderContext context) {
return new LeafCollector() {
final int docBase = context.docBase;
@Override
public void setScorer(final Scorable scorer) {
if (leafReaderContext == context) {
if (scorer instanceof PhraseScorer) {
data.scorer = (PhraseScorer) scorer;
} else {
try {
// in #setScorer but no better way was found
for (Scorer.ChildScorable childScorer : scorer.getChildren()) {
if (childScorer.child instanceof PhraseScorer) {
data.scorer = (PhraseScorer) childScorer.child;
}
}
} catch (Exception e) {
// ignore
}
}
}
}
@Override
public void collect(int doc) {
if (leafReaderContext == context) {
documentIds.set(docBase + doc);
}
}
};
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
});
} catch (IOException e) {
if (Thread.currentThread().isInterrupted()) {
interrupted = true;
return null;
} else {
logger.log(Level.WARNING, "Could not get document ids for " + query, e);
}
} catch (Exception e) {
logger.log(Level.WARNING, "Could not get document ids for " + query, e);
}
data.documentIds = documentIds;
return data;
}
use of org.opengrok.suggest.query.data.BitIntsHolder in project OpenGrok by OpenGrok.
the class CustomExactPhraseScorer method phraseFreq.
private int phraseFreq() throws IOException {
// reset state
final PostingsAndPosition[] postings = this.postings;
for (PostingsAndPosition posting : postings) {
posting.freq = posting.postings.freq();
posting.pos = posting.postings.nextPosition();
posting.upTo = 1;
}
int freq = 0;
final PostingsAndPosition lead = postings[0];
// custom – store positions
BitIntsHolder positions = null;
advanceHead: while (true) {
final int phrasePos = lead.pos - lead.offset;
for (int j = 1; j < postings.length; ++j) {
final PostingsAndPosition posting = postings[j];
final int expectedPos = phrasePos + posting.offset;
// advance up to the same position as the lead
if (!advancePosition(posting, expectedPos)) {
break advanceHead;
}
if (posting.pos != expectedPos) {
// we advanced too far
if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) {
continue advanceHead;
} else {
break advanceHead;
}
}
}
freq += 1;
// custom begins – found a match
if (positions == null) {
positions = new BitIntsHolder();
}
positions.set(phrasePos + offset);
if (lead.upTo == lead.freq) {
break;
}
lead.pos = lead.postings.nextPosition();
lead.upTo += 1;
}
// custom begin – if some positions were found then store them
if (positions != null) {
documentToPositionsMap.put(docID(), positions);
}
return freq;
}
use of org.opengrok.suggest.query.data.BitIntsHolder in project OpenGrok by OpenGrok.
the class CustomSloppyPhraseScorerTest method test.
public static void test(final int slop, final int offset, final String[] terms, final Integer[] expectedPositions) throws IOException {
Directory dir = new ByteBuffersDirectory();
try (IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig())) {
Document doc = new Document();
doc.add(new TextField("test", "zero one two three four five six seven eight nine ten", Field.Store.NO));
iw.addDocument(doc);
}
CustomPhraseQuery query = new CustomPhraseQuery(slop, "test", terms);
query.setOffset(offset);
try (IndexReader ir = DirectoryReader.open(dir)) {
IndexSearcher is = new IndexSearcher(ir);
Weight w = query.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1);
LeafReaderContext context = ir.getContext().leaves().get(0);
Scorer scorer = w.scorer(context);
TwoPhaseIterator it = scorer.twoPhaseIterator();
int correctDoc = -1;
int docId;
while ((docId = it.approximation().nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (it.matches()) {
correctDoc = docId;
}
}
BitIntsHolder bs = (BitIntsHolder) ((PhraseScorer) scorer).getPositions(correctDoc);
assertThat(toSet(bs), contains(expectedPositions));
}
}
Aggregations