Examples with TextField - org.apache.lucene.document.TextField

Example 11 with TextField

use of org.apache.lucene.document.TextField in project elasticsearch by elastic.

the class SimpleLuceneTests method testOrdering.

/**
     * Here, we verify that the order that we add fields to a document counts, and not the lexi order
     * of the field. This means that heavily accessed fields that use field selector should be added
     * first (with load and break).
     */
public void testOrdering() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new TextField("#id", "1", Field.Store.YES));
    indexWriter.addDocument(document);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
    final ArrayList<String> fieldsOrder = new ArrayList<>();
    searcher.doc(topDocs.scoreDocs[0].doc, new StoredFieldVisitor() {

        @Override
        public Status needsField(FieldInfo fieldInfo) throws IOException {
            fieldsOrder.add(fieldInfo.name);
            return Status.YES;
        }
    });
    assertThat(fieldsOrder.size(), equalTo(2));
    assertThat(fieldsOrder.get(0), equalTo("_id"));
    assertThat(fieldsOrder.get(1), equalTo("#id"));
    indexWriter.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) TopDocs(org.apache.lucene.search.TopDocs) IndexWriter(org.apache.lucene.index.IndexWriter) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) FieldInfo(org.apache.lucene.index.FieldInfo) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 12 with TextField

use of org.apache.lucene.document.TextField in project elasticsearch by elastic.

the class VectorHighlighterTests method testVectorHighlighterNoTermVector.

public void testVectorHighlighterNoTermVector() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new TextField("content", "the big bad dog", Field.Store.YES));
    indexWriter.addDocument(document);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
    assertThat(topDocs.totalHits, equalTo(1));
    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, nullValue());
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) FastVectorHighlighter(org.apache.lucene.search.vectorhighlight.FastVectorHighlighter) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) TopDocs(org.apache.lucene.search.TopDocs) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 13 with TextField

use of org.apache.lucene.document.TextField in project elasticsearch by elastic.

the class VectorHighlighterTests method testVectorHighlighter.

public void testVectorHighlighter() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    FieldType vectorsType = new FieldType(TextField.TYPE_STORED);
    vectorsType.setStoreTermVectors(true);
    vectorsType.setStoreTermVectorPositions(true);
    vectorsType.setStoreTermVectorOffsets(true);
    document.add(new Field("content", "the big bad dog", vectorsType));
    indexWriter.addDocument(document);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
    assertThat(topDocs.totalHits, equalTo(1));
    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, notNullValue());
    assertThat(fragment, equalTo("the big <b>bad</b> dog"));
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) FastVectorHighlighter(org.apache.lucene.search.vectorhighlight.FastVectorHighlighter) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 14 with TextField

use of org.apache.lucene.document.TextField in project elasticsearch by elastic.

the class ShadowEngineTests method testDocumentWithTextField.

private ParseContext.Document testDocumentWithTextField() {
    ParseContext.Document document = testDocument();
    document.add(new TextField("value", "test", Field.Store.YES));
    return document;
}

Also used : ParseContext(org.elasticsearch.index.mapper.ParseContext) TextField(org.apache.lucene.document.TextField)

Example 15 with TextField

use of org.apache.lucene.document.TextField in project elasticsearch by elastic.

the class ShadowEngineTests method testSimpleOperations.

public void testSimpleOperations() throws Exception {
    Engine.Searcher searchResult = primaryEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(0));
    searchResult.close();
    // create a document
    ParseContext.Document document = testDocumentWithTextField();
    document.add(new Field(SourceFieldMapper.NAME, BytesReference.toBytes(B_1), SourceFieldMapper.Defaults.FIELD_TYPE));
    ParsedDocument doc = testParsedDocument("1", "test", null, document, B_1, null);
    primaryEngine.index(indexForDoc(doc));
    // its not there...
    searchResult = primaryEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 0));
    searchResult.close();
    // not on the replica either...
    searchResult = replicaEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 0));
    searchResult.close();
    // but, we can still get it (in realtime)
    Engine.GetResult getResult = primaryEngine.get(new Engine.Get(true, newUid(doc)));
    assertThat(getResult.exists(), equalTo(true));
    assertThat(getResult.docIdAndVersion(), notNullValue());
    getResult.release();
    // can't get it from the replica, because it's not in the translog for a shadow replica
    getResult = replicaEngine.get(new Engine.Get(true, newUid(doc)));
    assertThat(getResult.exists(), equalTo(false));
    getResult.release();
    // but, not there non realtime
    getResult = primaryEngine.get(new Engine.Get(false, newUid(doc)));
    assertThat(getResult.exists(), equalTo(true));
    getResult.release();
    // now its there...
    searchResult = primaryEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 1));
    searchResult.close();
    // also in non realtime
    getResult = primaryEngine.get(new Engine.Get(false, newUid(doc)));
    assertThat(getResult.exists(), equalTo(true));
    assertThat(getResult.docIdAndVersion(), notNullValue());
    getResult.release();
    // still not in the replica because no flush
    searchResult = replicaEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 0));
    searchResult.close();
    // now do an update
    document = testDocument();
    document.add(new TextField("value", "test1", Field.Store.YES));
    document.add(new Field(SourceFieldMapper.NAME, BytesReference.toBytes(B_2), SourceFieldMapper.Defaults.FIELD_TYPE));
    doc = testParsedDocument("1", "test", null, document, B_2, null);
    primaryEngine.index(indexForDoc(doc));
    // its not updated yet...
    searchResult = primaryEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test1")), 0));
    searchResult.close();
    // but, we can still get it (in realtime)
    getResult = primaryEngine.get(new Engine.Get(true, newUid(doc)));
    assertThat(getResult.exists(), equalTo(true));
    assertThat(getResult.docIdAndVersion(), notNullValue());
    getResult.release();
    // refresh and it should be updated
    primaryEngine.refresh("test");
    searchResult = primaryEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test1")), 1));
    searchResult.close();
    // flush, now shadow replica should have the files
    primaryEngine.flush();
    // still not in the replica because the replica hasn't refreshed
    searchResult = replicaEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 0));
    searchResult.close();
    replicaEngine.refresh("test");
    // the replica finally sees it because primary has flushed and replica refreshed
    searchResult = replicaEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test1")), 1));
    searchResult.close();
    // now delete
    primaryEngine.delete(new Engine.Delete("test", "1", newUid(doc)));
    // its not deleted yet
    searchResult = primaryEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test1")), 1));
    searchResult.close();
    // but, get should not see it (in realtime)
    getResult = primaryEngine.get(new Engine.Get(true, newUid(doc)));
    assertThat(getResult.exists(), equalTo(false));
    getResult.release();
    // refresh and it should be deleted
    primaryEngine.refresh("test");
    searchResult = primaryEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test1")), 0));
    searchResult.close();
    // add it back
    document = testDocumentWithTextField();
    document.add(new Field(SourceFieldMapper.NAME, BytesReference.toBytes(B_1), SourceFieldMapper.Defaults.FIELD_TYPE));
    doc = testParsedDocument("1", "test", null, document, B_1, null);
    primaryEngine.index(indexForDoc(doc));
    // its not there...
    searchResult = primaryEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test1")), 0));
    searchResult.close();
    // refresh and it should be there
    primaryEngine.refresh("test");
    // now its there...
    searchResult = primaryEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test1")), 0));
    searchResult.close();
    // now flush
    primaryEngine.flush();
    // and, verify get (in real time)
    getResult = primaryEngine.get(new Engine.Get(true, newUid(doc)));
    assertThat(getResult.exists(), equalTo(true));
    assertThat(getResult.docIdAndVersion(), notNullValue());
    getResult.release();
    // the replica should see it if we refresh too!
    replicaEngine.refresh("test");
    searchResult = replicaEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test1")), 0));
    searchResult.close();
    getResult = replicaEngine.get(new Engine.Get(true, newUid(doc)));
    assertThat(getResult.exists(), equalTo(true));
    assertThat(getResult.docIdAndVersion(), notNullValue());
    getResult.release();
    // make sure we can still work with the engine
    // now do an update
    document = testDocument();
    document.add(new TextField("value", "test1", Field.Store.YES));
    doc = testParsedDocument("1", "test", null, document, B_1, null);
    primaryEngine.index(indexForDoc(doc));
    // its not updated yet...
    searchResult = primaryEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test1")), 0));
    searchResult.close();
    // refresh and it should be updated
    primaryEngine.refresh("test");
    searchResult = primaryEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test1")), 1));
    searchResult.close();
    // Make visible to shadow replica
    primaryEngine.flush();
    replicaEngine.refresh("test");
    searchResult = replicaEngine.acquireSearcher("test");
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(1));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test")), 0));
    MatcherAssert.assertThat(searchResult, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(new TermQuery(new Term("value", "test1")), 1));
    searchResult.close();
}

Also used : TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) ParsedDocument(org.elasticsearch.index.mapper.ParsedDocument) ParseContext(org.elasticsearch.index.mapper.ParseContext) TextField(org.apache.lucene.document.TextField)

Aggregations

TextField (org.apache.lucene.document.TextField)192 Document (org.apache.lucene.document.Document)171 Directory (org.apache.lucene.store.Directory)99 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)61 Term (org.apache.lucene.index.Term)61 IndexWriter (org.apache.lucene.index.IndexWriter)58 IndexSearcher (org.apache.lucene.search.IndexSearcher)55 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)52 Field (org.apache.lucene.document.Field)50 StringField (org.apache.lucene.document.StringField)48 BytesRef (org.apache.lucene.util.BytesRef)48 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)44 IndexReader (org.apache.lucene.index.IndexReader)43 TermQuery (org.apache.lucene.search.TermQuery)41 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)31 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)30 TopDocs (org.apache.lucene.search.TopDocs)29 RAMDirectory (org.apache.lucene.store.RAMDirectory)29 FieldType (org.apache.lucene.document.FieldType)23 Query (org.apache.lucene.search.Query)23