Search in sources :

Example 21 with Term

use of org.apache.lucene.index.Term in project elasticsearch by elastic.

the class TermVectorsService method getTermVectors.

static TermVectorsResponse getTermVectors(IndexShard indexShard, TermVectorsRequest request, LongSupplier nanoTimeSupplier) {
    final long startTime = nanoTimeSupplier.getAsLong();
    final TermVectorsResponse termVectorsResponse = new TermVectorsResponse(indexShard.shardId().getIndex().getName(), request.type(), request.id());
    final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id()));
    Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm).version(request.version()).versionType(request.versionType()));
    Fields termVectorsByField = null;
    AggregatedDfs dfs = null;
    TermVectorsFilter termVectorsFilter = null;
    /* handle potential wildcards in fields */
    if (request.selectedFields() != null) {
        handleFieldWildcards(indexShard, request);
    }
    final Engine.Searcher searcher = indexShard.acquireSearcher("term_vector");
    try {
        Fields topLevelFields = MultiFields.getFields(get.searcher() != null ? get.searcher().reader() : searcher.reader());
        Versions.DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
        /* from an artificial document */
        if (request.doc() != null) {
            termVectorsByField = generateTermVectorsFromDoc(indexShard, request);
            // if no document indexed in shard, take the queried document itself for stats
            if (topLevelFields == null) {
                topLevelFields = termVectorsByField;
            }
            termVectorsResponse.setArtificial(true);
            termVectorsResponse.setExists(true);
        } else /* or from an existing document */
        if (docIdAndVersion != null) {
            // fields with stored term vectors
            termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
            Set<String> selectedFields = request.selectedFields();
            // generate tvs for fields where analyzer is overridden
            if (selectedFields == null && request.perFieldAnalyzer() != null) {
                selectedFields = getFieldsToGenerate(request.perFieldAnalyzer(), termVectorsByField);
            }
            // fields without term vectors
            if (selectedFields != null) {
                termVectorsByField = addGeneratedTermVectors(indexShard, get, termVectorsByField, request, selectedFields);
            }
            termVectorsResponse.setDocVersion(docIdAndVersion.version);
            termVectorsResponse.setExists(true);
        } else /* no term vectors generated or found */
        {
            termVectorsResponse.setExists(false);
        }
        /* if there are term vectors, optional compute dfs and/or terms filtering */
        if (termVectorsByField != null) {
            if (request.filterSettings() != null) {
                termVectorsFilter = new TermVectorsFilter(termVectorsByField, topLevelFields, request.selectedFields(), dfs);
                termVectorsFilter.setSettings(request.filterSettings());
                try {
                    termVectorsFilter.selectBestTerms();
                } catch (IOException e) {
                    throw new ElasticsearchException("failed to select best terms", e);
                }
            }
            // write term vectors
            termVectorsResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields, dfs, termVectorsFilter);
        }
        termVectorsResponse.setTookInMillis(TimeUnit.NANOSECONDS.toMillis(nanoTimeSupplier.getAsLong() - startTime));
    } catch (Exception ex) {
        throw new ElasticsearchException("failed to execute term vector request", ex);
    } finally {
        searcher.close();
        get.release();
    }
    return termVectorsResponse;
}
Also used : TermVectorsResponse(org.elasticsearch.action.termvectors.TermVectorsResponse) HashSet(java.util.HashSet) Set(java.util.Set) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) ElasticsearchException(org.elasticsearch.ElasticsearchException) ElasticsearchException(org.elasticsearch.ElasticsearchException) IOException(java.io.IOException) TermVectorsFilter(org.elasticsearch.action.termvectors.TermVectorsFilter) Fields(org.apache.lucene.index.Fields) MultiFields(org.apache.lucene.index.MultiFields) Versions(org.elasticsearch.common.lucene.uid.Versions) AggregatedDfs(org.elasticsearch.search.dfs.AggregatedDfs) Engine(org.elasticsearch.index.engine.Engine)

Example 22 with Term

use of org.apache.lucene.index.Term in project elasticsearch by elastic.

the class CustomUnifiedHighlighterTests method testMultiPhrasePrefixQuery.

public void testMultiPhrasePrefixQuery() throws Exception {
    final String[] inputs = { "The quick brown fox." };
    final String[] outputs = { "The <b>quick</b> <b>brown</b> <b>fox</b>." };
    MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery();
    query.add(new Term("text", "quick"));
    query.add(new Term("text", "brown"));
    query.add(new Term("text", "fo"));
    assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs);
}
Also used : StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) MultiPhrasePrefixQuery(org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery) Term(org.apache.lucene.index.Term)

Example 23 with Term

use of org.apache.lucene.index.Term in project elasticsearch by elastic.

the class CustomUnifiedHighlighterTests method testSentenceBoundedBreakIterator.

public void testSentenceBoundedBreakIterator() throws Exception {
    final String[] inputs = { "The quick brown fox in a long sentence with another quick brown fox. " + "Another sentence with brown fox." };
    final String[] outputs = { "The <b>quick</b> <b>brown</b>", "<b>fox</b> in a long", "with another <b>quick</b>", "<b>brown</b> <b>fox</b>.", "sentence with <b>brown</b>", "<b>fox</b>." };
    BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("text", "quick")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("text", "brown")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("text", "fox")), BooleanClause.Occur.SHOULD).build();
    assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 10), 0, outputs);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) AllTermQuery(org.elasticsearch.common.lucene.all.AllTermQuery) TermQuery(org.apache.lucene.search.TermQuery) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Term(org.apache.lucene.index.Term)

Example 24 with Term

use of org.apache.lucene.index.Term in project elasticsearch by elastic.

the class CustomUnifiedHighlighterTests method testSimple.

public void testSimple() throws Exception {
    final String[] inputs = { "This is a test. Just a test1 highlighting from unified highlighter.", "This is the second highlighting value to perform highlighting on a longer text that gets scored lower.", "This is highlighting the third short highlighting value.", "Just a test4 highlighting from unified highlighter." };
    String[] expectedPassages = { "Just a test1 <b>highlighting</b> from unified highlighter.", "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a" + " longer text that gets scored lower.", "This is <b>highlighting</b> the third short <b>highlighting</b> value.", "Just a test4 <b>highlighting</b> from unified highlighter." };
    Query query = new TermQuery(new Term("text", "highlighting"));
    assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, expectedPassages);
}
Also used : AllTermQuery(org.elasticsearch.common.lucene.all.AllTermQuery) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) AllTermQuery(org.elasticsearch.common.lucene.all.AllTermQuery) MultiPhrasePrefixQuery(org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Term(org.apache.lucene.index.Term)

Example 25 with Term

use of org.apache.lucene.index.Term in project elasticsearch by elastic.

the class CustomUnifiedHighlighterTests method testRepeat.

public void testRepeat() throws Exception {
    final String[] inputs = { "Fun  fun fun  fun  fun  fun  fun  fun  fun  fun" };
    final String[] outputs = { "<b>Fun</b>  <b>fun</b> <b>fun</b>", "<b>fun</b>  <b>fun</b>", "<b>fun</b>  <b>fun</b>  <b>fun</b>", "<b>fun</b>  <b>fun</b>" };
    Query query = new TermQuery(new Term("text", "fun"));
    assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 10), 0, outputs);
    query = new PhraseQuery.Builder().add(new Term("text", "fun")).add(new Term("text", "fun")).build();
    assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 10), 0, outputs);
}
Also used : AllTermQuery(org.elasticsearch.common.lucene.all.AllTermQuery) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) AllTermQuery(org.elasticsearch.common.lucene.all.AllTermQuery) MultiPhrasePrefixQuery(org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Term(org.apache.lucene.index.Term)

Aggregations

Term (org.apache.lucene.index.Term)1833 TermQuery (org.apache.lucene.search.TermQuery)758 Document (org.apache.lucene.document.Document)499 BooleanQuery (org.apache.lucene.search.BooleanQuery)459 Query (org.apache.lucene.search.Query)395 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)349 Directory (org.apache.lucene.store.Directory)347 IndexReader (org.apache.lucene.index.IndexReader)346 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)333 Test (org.junit.Test)302 IndexSearcher (org.apache.lucene.search.IndexSearcher)287 ArrayList (java.util.ArrayList)226 TopDocs (org.apache.lucene.search.TopDocs)209 PhraseQuery (org.apache.lucene.search.PhraseQuery)200 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)192 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)178 SpanQuery (org.apache.lucene.search.spans.SpanQuery)167 IndexWriter (org.apache.lucene.index.IndexWriter)159 BytesRef (org.apache.lucene.util.BytesRef)158 Field (org.apache.lucene.document.Field)157