Search in sources :

Example 1 with Terms

use of org.apache.lucene.index.Terms in project elasticsearch by elastic.

the class TermsSliceQuery method build.

/**
     * Returns a DocIdSet per segments containing the matching docs for the specified slice.
     */
private DocIdSet build(LeafReader reader) throws IOException {
    final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
    final Terms terms = reader.terms(getField());
    final TermsEnum te = terms.iterator();
    PostingsEnum docsEnum = null;
    for (BytesRef term = te.next(); term != null; term = te.next()) {
        int hashCode = term.hashCode();
        if (contains(hashCode)) {
            docsEnum = te.postings(docsEnum, PostingsEnum.NONE);
            builder.add(docsEnum);
        }
    }
    return builder.build();
}
Also used : Terms(org.apache.lucene.index.Terms) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 2 with Terms

use of org.apache.lucene.index.Terms in project elasticsearch by elastic.

the class IndexFieldTerm method getPostings.

private PostingsEnum getPostings(int luceneFlags, LeafReader reader) throws IOException {
    assert identifier.field() != null;
    assert identifier.bytes() != null;
    final Fields fields = reader.fields();
    PostingsEnum newPostings = null;
    if (fields != null) {
        final Terms terms = fields.terms(identifier.field());
        if (terms != null) {
            TermsEnum termsEnum = terms.iterator();
            if (termsEnum.seekExact(identifier.bytes())) {
                newPostings = termsEnum.postings(postings, luceneFlags);
                final Bits liveDocs = reader.getLiveDocs();
                if (liveDocs != null) {
                    newPostings = new FilterPostingsEnum(newPostings) {

                        private int doNext(int d) throws IOException {
                            for (; d != NO_MORE_DOCS; d = super.nextDoc()) {
                                if (liveDocs.get(d)) {
                                    return d;
                                }
                            }
                            return NO_MORE_DOCS;
                        }

                        @Override
                        public int nextDoc() throws IOException {
                            return doNext(super.nextDoc());
                        }

                        @Override
                        public int advance(int target) throws IOException {
                            return doNext(super.advance(target));
                        }
                    };
                }
            }
        }
    }
    return newPostings;
}
Also used : Fields(org.apache.lucene.index.Fields) FilterPostingsEnum(org.apache.lucene.index.FilterLeafReader.FilterPostingsEnum) Terms(org.apache.lucene.index.Terms) Bits(org.apache.lucene.util.Bits) IOException(java.io.IOException) PostingsEnum(org.apache.lucene.index.PostingsEnum) FilterPostingsEnum(org.apache.lucene.index.FilterLeafReader.FilterPostingsEnum) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 3 with Terms

use of org.apache.lucene.index.Terms in project elasticsearch by elastic.

the class GetTermVectorsCheckDocFreqIT method checkWithoutFieldStatistics.

private void checkWithoutFieldStatistics(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(true).setFieldStatistics(false).setSelectedFields();
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8L));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) -1));
    assertThat(terms.getDocCount(), Matchers.equalTo(-1));
    assertThat(terms.getSumDocFreq(), equalTo((long) -1));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }
        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());
    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    response.toXContent(xBuilder, null);
    String utf8 = xBuilder.bytes().utf8ToString().replaceFirst("\"took\":\\d+,", "");
    ;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));
}
Also used : Fields(org.apache.lucene.index.Fields) Terms(org.apache.lucene.index.Terms) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 4 with Terms

use of org.apache.lucene.index.Terms in project elasticsearch by elastic.

the class GetTermVectorsCheckDocFreqIT method checkAllInfo.

private void checkAllInfo(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true).setPositions(true).setFieldStatistics(true).setTermStatistics(true).setSelectedFields();
    assertThat(resp.request().fieldStatistics(), equalTo(true));
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8L));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }
        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());
    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    response.toXContent(xBuilder, ToXContent.EMPTY_PARAMS);
    String utf8 = xBuilder.bytes().utf8ToString().replaceFirst("\"took\":\\d+,", "");
    ;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));
}
Also used : Fields(org.apache.lucene.index.Fields) Terms(org.apache.lucene.index.Terms) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 5 with Terms

use of org.apache.lucene.index.Terms in project elasticsearch by elastic.

the class GetTermVectorsCheckDocFreqIT method checkWithoutTermStatistics.

private void checkWithoutTermStatistics(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(false).setFieldStatistics(true).setSelectedFields();
    assertThat(resp.request().termStatistics(), equalTo(false));
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8L));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        assertThat("expected ttf of " + string, -1, equalTo((int) iterator.totalTermFreq()));
        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(-1));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());
    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    response.toXContent(xBuilder, null);
    String utf8 = xBuilder.bytes().utf8ToString().replaceFirst("\"took\":\\d+,", "");
    ;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));
}
Also used : Fields(org.apache.lucene.index.Fields) Terms(org.apache.lucene.index.Terms) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) TermsEnum(org.apache.lucene.index.TermsEnum)

Aggregations

Terms (org.apache.lucene.index.Terms)202 TermsEnum (org.apache.lucene.index.TermsEnum)143 BytesRef (org.apache.lucene.util.BytesRef)111 PostingsEnum (org.apache.lucene.index.PostingsEnum)53 Fields (org.apache.lucene.index.Fields)49 IndexReader (org.apache.lucene.index.IndexReader)49 IOException (java.io.IOException)41 Term (org.apache.lucene.index.Term)36 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)35 LeafReader (org.apache.lucene.index.LeafReader)34 ArrayList (java.util.ArrayList)26 Document (org.apache.lucene.document.Document)26 Directory (org.apache.lucene.store.Directory)23 MultiFields (org.apache.lucene.index.MultiFields)22 HashMap (java.util.HashMap)20 HashSet (java.util.HashSet)17 Test (org.junit.Test)16 MultiTerms (org.apache.lucene.index.MultiTerms)15 IndexSearcher (org.apache.lucene.search.IndexSearcher)14 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)13