Search in sources :

Example 1 with Fields

use of org.apache.lucene.index.Fields in project elasticsearch by elastic.

the class ThrowingLeafReaderWrapper method getTermVectors.

@Override
public Fields getTermVectors(int docID) throws IOException {
    Fields fields = super.getTermVectors(docID);
    thrower.maybeThrow(Flags.TermVectors);
    return fields == null ? null : new ThrowingFields(fields, thrower);
}
Also used : Fields(org.apache.lucene.index.Fields)

Example 2 with Fields

use of org.apache.lucene.index.Fields in project elasticsearch by elastic.

the class IndexFieldTerm method getPostings.

private PostingsEnum getPostings(int luceneFlags, LeafReader reader) throws IOException {
    assert identifier.field() != null;
    assert identifier.bytes() != null;
    final Fields fields = reader.fields();
    PostingsEnum newPostings = null;
    if (fields != null) {
        final Terms terms = fields.terms(identifier.field());
        if (terms != null) {
            TermsEnum termsEnum = terms.iterator();
            if (termsEnum.seekExact(identifier.bytes())) {
                newPostings = termsEnum.postings(postings, luceneFlags);
                final Bits liveDocs = reader.getLiveDocs();
                if (liveDocs != null) {
                    newPostings = new FilterPostingsEnum(newPostings) {

                        private int doNext(int d) throws IOException {
                            for (; d != NO_MORE_DOCS; d = super.nextDoc()) {
                                if (liveDocs.get(d)) {
                                    return d;
                                }
                            }
                            return NO_MORE_DOCS;
                        }

                        @Override
                        public int nextDoc() throws IOException {
                            return doNext(super.nextDoc());
                        }

                        @Override
                        public int advance(int target) throws IOException {
                            return doNext(super.advance(target));
                        }
                    };
                }
            }
        }
    }
    return newPostings;
}
Also used : Fields(org.apache.lucene.index.Fields) FilterPostingsEnum(org.apache.lucene.index.FilterLeafReader.FilterPostingsEnum) Terms(org.apache.lucene.index.Terms) Bits(org.apache.lucene.util.Bits) IOException(java.io.IOException) PostingsEnum(org.apache.lucene.index.PostingsEnum) FilterPostingsEnum(org.apache.lucene.index.FilterLeafReader.FilterPostingsEnum) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 3 with Fields

use of org.apache.lucene.index.Fields in project elasticsearch by elastic.

the class GetTermVectorsCheckDocFreqIT method checkWithoutFieldStatistics.

private void checkWithoutFieldStatistics(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(true).setFieldStatistics(false).setSelectedFields();
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8L));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) -1));
    assertThat(terms.getDocCount(), Matchers.equalTo(-1));
    assertThat(terms.getSumDocFreq(), equalTo((long) -1));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }
        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());
    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    response.toXContent(xBuilder, null);
    String utf8 = xBuilder.bytes().utf8ToString().replaceFirst("\"took\":\\d+,", "");
    ;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));
}
Also used : Fields(org.apache.lucene.index.Fields) Terms(org.apache.lucene.index.Terms) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 4 with Fields

use of org.apache.lucene.index.Fields in project elasticsearch by elastic.

the class GetTermVectorsCheckDocFreqIT method checkAllInfo.

private void checkAllInfo(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true).setPositions(true).setFieldStatistics(true).setTermStatistics(true).setSelectedFields();
    assertThat(resp.request().fieldStatistics(), equalTo(true));
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8L));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }
        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());
    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    response.toXContent(xBuilder, ToXContent.EMPTY_PARAMS);
    String utf8 = xBuilder.bytes().utf8ToString().replaceFirst("\"took\":\\d+,", "");
    ;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));
}
Also used : Fields(org.apache.lucene.index.Fields) Terms(org.apache.lucene.index.Terms) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 5 with Fields

use of org.apache.lucene.index.Fields in project elasticsearch by elastic.

the class GetTermVectorsCheckDocFreqIT method checkWithoutTermStatistics.

private void checkWithoutTermStatistics(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(false).setFieldStatistics(true).setSelectedFields();
    assertThat(resp.request().termStatistics(), equalTo(false));
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8L));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        assertThat("expected ttf of " + string, -1, equalTo((int) iterator.totalTermFreq()));
        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(-1));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());
    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    response.toXContent(xBuilder, null);
    String utf8 = xBuilder.bytes().utf8ToString().replaceFirst("\"took\":\\d+,", "");
    ;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));
}
Also used : Fields(org.apache.lucene.index.Fields) Terms(org.apache.lucene.index.Terms) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) TermsEnum(org.apache.lucene.index.TermsEnum)

Aggregations

Fields (org.apache.lucene.index.Fields)75 Terms (org.apache.lucene.index.Terms)49 TermsEnum (org.apache.lucene.index.TermsEnum)40 BytesRef (org.apache.lucene.util.BytesRef)33 MultiFields (org.apache.lucene.index.MultiFields)28 PostingsEnum (org.apache.lucene.index.PostingsEnum)19 IndexReader (org.apache.lucene.index.IndexReader)18 IOException (java.io.IOException)15 ArrayList (java.util.ArrayList)11 XContentBuilder (org.elasticsearch.common.xcontent.XContentBuilder)9 HashSet (java.util.HashSet)8 LeafReader (org.apache.lucene.index.LeafReader)8 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)8 Term (org.apache.lucene.index.Term)7 TokenStream (org.apache.lucene.analysis.TokenStream)6 Document (org.apache.lucene.document.Document)6 FieldType (org.apache.lucene.document.FieldType)6 DirectoryReader (org.apache.lucene.index.DirectoryReader)6 File (java.io.File)5 HashMap (java.util.HashMap)5