Examples with StringField - org.apache.lucene.document.StringField

Example 66 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestJoinUtil method testMinMaxScore.

public void testMinMaxScore() throws Exception {
    String priceField = "price";
    Query priceQuery = numericDocValuesScoreQuery(priceField);
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)));
    Map<String, Float> lowestScoresPerParent = new HashMap<>();
    Map<String, Float> highestScoresPerParent = new HashMap<>();
    int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
    for (int p = 0; p < numParents; p++) {
        String parentId = Integer.toString(p);
        Document parentDoc = new Document();
        parentDoc.add(new StringField("id", parentId, Field.Store.YES));
        parentDoc.add(new StringField("type", "to", Field.Store.NO));
        parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
        iw.addDocument(parentDoc);
        int numChildren = RandomNumbers.randomIntBetween(random(), 2, 16);
        int lowest = Integer.MAX_VALUE;
        int highest = Integer.MIN_VALUE;
        for (int c = 0; c < numChildren; c++) {
            String childId = Integer.toString(p + c);
            Document childDoc = new Document();
            childDoc.add(new StringField("id", childId, Field.Store.YES));
            childDoc.add(new StringField("type", "from", Field.Store.NO));
            childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
            int price = random().nextInt(1000);
            childDoc.add(new NumericDocValuesField(priceField, price));
            iw.addDocument(childDoc);
            lowest = Math.min(lowest, price);
            highest = Math.max(highest, price);
        }
        lowestScoresPerParent.put(parentId, (float) lowest);
        highestScoresPerParent.put(parentId, (float) highest);
    }
    iw.close();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
    SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
    for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
        values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
    }
    MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
    BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
    fromQuery.add(priceQuery, BooleanClause.Occur.MUST);
    Query toQuery = new TermQuery(new Term("type", "to"));
    Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Min, ordinalMap);
    TopDocs topDocs = searcher.search(joinQuery, numParents);
    assertEquals(numParents, topDocs.totalHits);
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = topDocs.scoreDocs[i];
        String id = searcher.doc(scoreDoc.doc).get("id");
        assertEquals(lowestScoresPerParent.get(id), scoreDoc.score, 0f);
    }
    joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Max, ordinalMap);
    topDocs = searcher.search(joinQuery, numParents);
    assertEquals(numParents, topDocs.totalHits);
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = topDocs.scoreDocs[i];
        String id = searcher.doc(scoreDoc.doc).get("id");
        assertEquals(highestScoresPerParent.get(id), scoreDoc.score, 0f);
    }
    searcher.getIndexReader().close();
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) FieldValueQuery(org.apache.lucene.search.FieldValueQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) HashMap(java.util.HashMap) Document(org.apache.lucene.document.Document) MultiDocValues(org.apache.lucene.index.MultiDocValues) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) SortedDocValues(org.apache.lucene.index.SortedDocValues) StringField(org.apache.lucene.document.StringField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 67 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestMemoryIndex method testBuildFromDocument.

@Test
public void testBuildFromDocument() {
    Document doc = new Document();
    doc.add(new TextField("field1", "some text", Field.Store.NO));
    doc.add(new TextField("field1", "some more text", Field.Store.NO));
    doc.add(new StringField("field2", "untokenized text", Field.Store.NO));
    analyzer.setPositionIncrementGap(100);
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
    assertThat(mi.search(new TermQuery(new Term("field1", "text"))), not(0.0f));
    assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f));
    assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f));
    assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f));
    assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f));
    assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f));
}

Also used : TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Test(org.junit.Test)

Example 68 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestDocValuesNumbersQuery method testApproximation.

public void testApproximation() throws IOException {
    final int iters = atLeast(2);
    for (int iter = 0; iter < iters; ++iter) {
        final List<Long> allNumbers = new ArrayList<>();
        final int numNumbers = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
        for (int i = 0; i < numNumbers; ++i) {
            allNumbers.add(random().nextLong());
        }
        Directory dir = newDirectory();
        RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
        final int numDocs = atLeast(100);
        for (int i = 0; i < numDocs; ++i) {
            Document doc = new Document();
            final Long number = allNumbers.get(random().nextInt(allNumbers.size()));
            doc.add(new StringField("text", number.toString(), Store.NO));
            doc.add(new NumericDocValuesField("long", number));
            iw.addDocument(doc);
        }
        if (numNumbers > 1 && random().nextBoolean()) {
            iw.deleteDocuments(new TermQuery(new Term("text", allNumbers.get(0).toString())));
        }
        iw.commit();
        final IndexReader reader = iw.getReader();
        final IndexSearcher searcher = newSearcher(reader);
        iw.close();
        if (reader.numDocs() == 0) {
            // may occasionally happen if all documents got the same term
            IOUtils.close(reader, dir);
            continue;
        }
        for (int i = 0; i < 100; ++i) {
            final float boost = random().nextFloat() * 10;
            final int numQueryNumbers = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
            Set<Long> queryNumbers = new HashSet<>();
            for (int j = 0; j < numQueryNumbers; ++j) {
                queryNumbers.add(allNumbers.get(random().nextInt(allNumbers.size())));
            }
            final BooleanQuery.Builder bq = new BooleanQuery.Builder();
            for (Long number : queryNumbers) {
                bq.add(new TermQuery(new Term("text", number.toString())), Occur.SHOULD);
            }
            Query q1 = new BoostQuery(new ConstantScoreQuery(bq.build()), boost);
            final Query q2 = new BoostQuery(new DocValuesNumbersQuery("long", queryNumbers), boost);
            BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
            bq1.add(q1, Occur.MUST);
            bq1.add(new TermQuery(new Term("text", allNumbers.get(0).toString())), Occur.FILTER);
            BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
            bq2.add(q2, Occur.MUST);
            bq2.add(new TermQuery(new Term("text", allNumbers.get(0).toString())), Occur.FILTER);
            assertSameMatches(searcher, bq1.build(), bq2.build(), true);
        }
        reader.close();
        dir.close();
    }
}

Also used : ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 69 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class BasePostingsFormatTestCase method testPostingsEnumDocsOnly.

public void testPostingsEnumDocsOnly() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new StringField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);
    DirectoryReader reader = DirectoryReader.open(iw);
    // sugar method (FREQS)
    PostingsEnum postings = getOnlyLeafReader(reader).postings(new Term("foo", "bar"));
    assertEquals(-1, postings.docID());
    assertEquals(0, postings.nextDoc());
    assertEquals(1, postings.freq());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
    // termsenum reuse (FREQS)
    TermsEnum termsEnum = getOnlyLeafReader(reader).terms("foo").iterator();
    termsEnum.seekExact(new BytesRef("bar"));
    PostingsEnum postings2 = termsEnum.postings(postings);
    assertNotNull(postings2);
    assertReused("foo", postings, postings2);
    // and it had better work
    assertEquals(-1, postings.docID());
    assertEquals(0, postings.nextDoc());
    assertEquals(1, postings.freq());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
    // asking for any flags: ok
    for (int flag : new int[] { NONE, FREQS, POSITIONS, PAYLOADS, OFFSETS, ALL }) {
        postings = termsEnum.postings(null, flag);
        assertEquals(-1, postings.docID());
        assertEquals(0, postings.nextDoc());
        assertEquals(1, postings.freq());
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
        // reuse that too
        postings2 = termsEnum.postings(postings, flag);
        assertNotNull(postings2);
        assertReused("foo", postings, postings2);
        // and it had better work
        assertEquals(-1, postings2.docID());
        assertEquals(0, postings2.nextDoc());
        assertEquals(1, postings2.freq());
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
    }
    iw.close();
    reader.close();
    dir.close();
}

Also used : StringField(org.apache.lucene.document.StringField) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 70 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class BasePostingsFormatTestCase method testLevel2Ghosts.

// tests that level 2 ghost fields still work
public void testLevel2Ghosts() throws Exception {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(null);
    iwc.setCodec(getCodec());
    iwc.setMergePolicy(newLogMergePolicy());
    IndexWriter iw = new IndexWriter(dir, iwc);
    Document document = new Document();
    document.add(new StringField("id", "0", Field.Store.NO));
    document.add(new StringField("suggest_field", "apples", Field.Store.NO));
    iw.addDocument(document);
    // need another document so whole segment isn't deleted
    iw.addDocument(new Document());
    iw.commit();
    document = new Document();
    document.add(new StringField("id", "1", Field.Store.NO));
    document.add(new StringField("suggest_field2", "apples", Field.Store.NO));
    iw.addDocument(document);
    iw.commit();
    iw.deleteDocuments(new Term("id", "0"));
    // first force merge creates a level 1 ghost field
    iw.forceMerge(1);
    // second force merge creates a level 2 ghost field, causing MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
    // this field anymore)
    iw.addDocument(new Document());
    iw.forceMerge(1);
    DirectoryReader reader = DirectoryReader.open(iw);
    IndexSearcher indexSearcher = new IndexSearcher(reader);
    assertEquals(1, indexSearcher.count(new TermQuery(new Term("id", "1"))));
    reader.close();
    iw.close();
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory)

Aggregations

StringField (org.apache.lucene.document.StringField)328 Document (org.apache.lucene.document.Document)306 Directory (org.apache.lucene.store.Directory)227 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)129 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)94 Term (org.apache.lucene.index.Term)93 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)82 BytesRef (org.apache.lucene.util.BytesRef)74 IndexSearcher (org.apache.lucene.search.IndexSearcher)58 TextField (org.apache.lucene.document.TextField)57 DirectoryReader (org.apache.lucene.index.DirectoryReader)56 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)55 ArrayList (java.util.ArrayList)54 Field (org.apache.lucene.document.Field)51 IndexReader (org.apache.lucene.index.IndexReader)51 TermQuery (org.apache.lucene.search.TermQuery)50 IndexWriter (org.apache.lucene.index.IndexWriter)46 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)43 NRTCachingDirectory (org.apache.lucene.store.NRTCachingDirectory)43 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)41