use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.
the class TermsSliceQuery method build.
/**
* Returns a DocIdSet per segments containing the matching docs for the specified slice.
*/
private DocIdSet build(LeafReader reader) throws IOException {
final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
final Terms terms = reader.terms(getField());
final TermsEnum te = terms.iterator();
PostingsEnum docsEnum = null;
for (BytesRef term = te.next(); term != null; term = te.next()) {
int hashCode = term.hashCode();
if (contains(hashCode)) {
docsEnum = te.postings(docsEnum, PostingsEnum.NONE);
builder.add(docsEnum);
}
}
return builder.build();
}
use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.
the class IndexFieldTerm method getPostings.
private PostingsEnum getPostings(int luceneFlags, LeafReader reader) throws IOException {
assert identifier.field() != null;
assert identifier.bytes() != null;
final Fields fields = reader.fields();
PostingsEnum newPostings = null;
if (fields != null) {
final Terms terms = fields.terms(identifier.field());
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(identifier.bytes())) {
newPostings = termsEnum.postings(postings, luceneFlags);
final Bits liveDocs = reader.getLiveDocs();
if (liveDocs != null) {
newPostings = new FilterPostingsEnum(newPostings) {
private int doNext(int d) throws IOException {
for (; d != NO_MORE_DOCS; d = super.nextDoc()) {
if (liveDocs.get(d)) {
return d;
}
}
return NO_MORE_DOCS;
}
@Override
public int nextDoc() throws IOException {
return doNext(super.nextDoc());
}
@Override
public int advance(int target) throws IOException {
return doNext(super.advance(target));
}
};
}
}
}
}
return newPostings;
}
use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.
the class IndexFieldTerm method setReader.
// when the reader changes, we have to get the posting list for this term
// and reader
private void setReader(LeafReader reader) {
try {
postings = getPostings(convertToLuceneFlags(flags), reader);
if (postings == null) {
// no term or field for this segment, fake out the postings...
final DocIdSetIterator empty = DocIdSetIterator.empty();
postings = new PostingsEnum() {
@Override
public int docID() {
return empty.docID();
}
@Override
public int nextDoc() throws IOException {
return empty.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return empty.advance(target);
}
@Override
public long cost() {
return empty.cost();
}
@Override
public int freq() throws IOException {
return 1;
}
@Override
public int nextPosition() throws IOException {
return -1;
}
@Override
public int startOffset() throws IOException {
return -1;
}
@Override
public int endOffset() throws IOException {
return -1;
}
@Override
public BytesRef getPayload() throws IOException {
return null;
}
};
}
} catch (IOException e) {
throw new ElasticsearchException("Unable to get postings for field " + fieldName + " and term " + term, e);
}
}
use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.
the class GetTermVectorsCheckDocFreqIT method checkWithoutFieldStatistics.
private void checkWithoutFieldStatistics(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException {
TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(true).setFieldStatistics(false).setSelectedFields();
TermVectorsResponse response = resp.execute().actionGet();
assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
Fields fields = response.getFields();
assertThat(fields.size(), equalTo(1));
Terms terms = fields.terms("field");
assertThat(terms.size(), equalTo(8L));
assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) -1));
assertThat(terms.getDocCount(), Matchers.equalTo(-1));
assertThat(terms.getSumDocFreq(), equalTo((long) -1));
TermsEnum iterator = terms.iterator();
for (int j = 0; j < values.length; j++) {
String string = values[j];
BytesRef next = iterator.next();
assertThat(next, Matchers.notNullValue());
assertThat("expected " + string, string, equalTo(next.utf8ToString()));
assertThat(next, Matchers.notNullValue());
if (string.equals("the")) {
assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
} else {
assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
}
PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
assertThat(docsAndPositions.nextDoc(), equalTo(0));
assertThat(freq[j], equalTo(docsAndPositions.freq()));
assertThat(iterator.docFreq(), equalTo(numDocs));
int[] termPos = pos[j];
int[] termStartOffset = startOffset[j];
int[] termEndOffset = endOffset[j];
assertThat(termPos.length, equalTo(freq[j]));
assertThat(termStartOffset.length, equalTo(freq[j]));
assertThat(termEndOffset.length, equalTo(freq[j]));
for (int k = 0; k < freq[j]; k++) {
int nextPosition = docsAndPositions.nextPosition();
assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
}
}
assertThat(iterator.next(), Matchers.nullValue());
XContentBuilder xBuilder = XContentFactory.jsonBuilder();
response.toXContent(xBuilder, null);
String utf8 = xBuilder.bytes().utf8ToString().replaceFirst("\"took\":\\d+,", "");
;
String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
assertThat(utf8, equalTo(expectedString));
}
use of org.apache.lucene.index.PostingsEnum in project elasticsearch by elastic.
the class GetTermVectorsCheckDocFreqIT method checkAllInfo.
private void checkAllInfo(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException {
TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true).setPositions(true).setFieldStatistics(true).setTermStatistics(true).setSelectedFields();
assertThat(resp.request().fieldStatistics(), equalTo(true));
TermVectorsResponse response = resp.execute().actionGet();
assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
Fields fields = response.getFields();
assertThat(fields.size(), equalTo(1));
Terms terms = fields.terms("field");
assertThat(terms.size(), equalTo(8L));
assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
TermsEnum iterator = terms.iterator();
for (int j = 0; j < values.length; j++) {
String string = values[j];
BytesRef next = iterator.next();
assertThat(next, Matchers.notNullValue());
assertThat("expected " + string, string, equalTo(next.utf8ToString()));
assertThat(next, Matchers.notNullValue());
if (string.equals("the")) {
assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
} else {
assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
}
PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
assertThat(docsAndPositions.nextDoc(), equalTo(0));
assertThat(freq[j], equalTo(docsAndPositions.freq()));
assertThat(iterator.docFreq(), equalTo(numDocs));
int[] termPos = pos[j];
int[] termStartOffset = startOffset[j];
int[] termEndOffset = endOffset[j];
assertThat(termPos.length, equalTo(freq[j]));
assertThat(termStartOffset.length, equalTo(freq[j]));
assertThat(termEndOffset.length, equalTo(freq[j]));
for (int k = 0; k < freq[j]; k++) {
int nextPosition = docsAndPositions.nextPosition();
assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
}
}
assertThat(iterator.next(), Matchers.nullValue());
XContentBuilder xBuilder = XContentFactory.jsonBuilder();
response.toXContent(xBuilder, ToXContent.EMPTY_PARAMS);
String utf8 = xBuilder.bytes().utf8ToString().replaceFirst("\"took\":\\d+,", "");
;
String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
assertThat(utf8, equalTo(expectedString));
}
Aggregations