Search in sources :

Example 91 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestIndexSearcher method testCount.

public void testCount() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    final int numDocs = atLeast(100);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        if (random().nextBoolean()) {
            doc.add(new StringField("foo", "bar", Store.NO));
        }
        if (random().nextBoolean()) {
            doc.add(new StringField("foo", "baz", Store.NO));
        }
        if (rarely()) {
            doc.add(new StringField("delete", "yes", Store.NO));
        }
        w.addDocument(doc);
    }
    for (boolean delete : new boolean[] { false, true }) {
        if (delete) {
            w.deleteDocuments(new Term("delete", "yes"));
        }
        final IndexReader reader = w.getReader();
        final IndexSearcher searcher = newSearcher(reader);
        // Test multiple queries, some of them are optimized by IndexSearcher.count()
        for (Query query : Arrays.asList(new MatchAllDocsQuery(), new MatchNoDocsQuery(), new TermQuery(new Term("foo", "bar")), new ConstantScoreQuery(new TermQuery(new Term("foo", "baz"))), new BooleanQuery.Builder().add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD).add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD).build())) {
            assertEquals(searcher.count(query), searcher.search(query, 1).totalHits);
        }
        reader.close();
    }
    w.close();
    dir.close();
}
Also used : Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 92 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestFuzzyQuery method testRandom.

@SuppressWarnings({ "unchecked", "rawtypes" })
public void testRandom() throws Exception {
    int digits = TestUtil.nextInt(random(), 2, 3);
    // underestimated total number of unique terms that randomSimpleString
    // maybe generate, it assumes all terms have a length of 7
    int vocabularySize = digits << 7;
    int numTerms = Math.min(atLeast(100), vocabularySize);
    Set<String> terms = new HashSet<>();
    while (terms.size() < numTerms) {
        terms.add(randomSimpleString(digits));
    }
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    for (String term : terms) {
        Document doc = new Document();
        doc.add(new StringField("field", term, Field.Store.YES));
        w.addDocument(doc);
    }
    DirectoryReader r = w.getReader();
    //System.out.println("TEST: reader=" + r);
    IndexSearcher s = newSearcher(r);
    int iters = atLeast(1000);
    for (int iter = 0; iter < iters; iter++) {
        String queryTerm = randomSimpleString(digits);
        int prefixLength = random().nextInt(queryTerm.length());
        String queryPrefix = queryTerm.substring(0, prefixLength);
        // we don't look at scores here:
        List<TermAndScore>[] expected = new List[3];
        for (int ed = 0; ed < 3; ed++) {
            expected[ed] = new ArrayList<TermAndScore>();
        }
        for (String term : terms) {
            if (term.startsWith(queryPrefix) == false) {
                continue;
            }
            int ed = getDistance(term, queryTerm);
            float score = 1f - (float) ed / (float) Math.min(queryTerm.length(), term.length());
            while (ed < 3) {
                expected[ed].add(new TermAndScore(term, score));
                ed++;
            }
        }
        for (int ed = 0; ed < 3; ed++) {
            Collections.sort(expected[ed]);
            int queueSize = TestUtil.nextInt(random(), 1, terms.size());
            /*
        System.out.println("\nTEST: query=" + queryTerm + " ed=" + ed + " queueSize=" + queueSize + " vs expected match size=" + expected[ed].size() + " prefixLength=" + prefixLength);
        for(TermAndScore ent : expected[ed]) {
          System.out.println("  " + ent);
        }
        */
            FuzzyQuery query = new FuzzyQuery(new Term("field", queryTerm), ed, prefixLength, queueSize, true);
            TopDocs hits = s.search(query, terms.size());
            Set<String> actual = new HashSet<>();
            for (ScoreDoc hit : hits.scoreDocs) {
                Document doc = s.doc(hit.doc);
                actual.add(doc.get("field"));
            //System.out.println("   actual: " + doc.get("field") + " score=" + hit.score);
            }
            Set<String> expectedTop = new HashSet<>();
            int limit = Math.min(queueSize, expected[ed].size());
            for (int i = 0; i < limit; i++) {
                expectedTop.add(expected[ed].get(i).term);
            }
            if (actual.equals(expectedTop) == false) {
                StringBuilder sb = new StringBuilder();
                sb.append("FAILED: query=" + queryTerm + " ed=" + ed + " queueSize=" + queueSize + " vs expected match size=" + expected[ed].size() + " prefixLength=" + prefixLength + "\n");
                boolean first = true;
                for (String term : actual) {
                    if (expectedTop.contains(term) == false) {
                        if (first) {
                            sb.append("  these matched but shouldn't:\n");
                            first = false;
                        }
                        sb.append("    " + term + "\n");
                    }
                }
                first = true;
                for (String term : expectedTop) {
                    if (actual.contains(term) == false) {
                        if (first) {
                            sb.append("  these did not match but should:\n");
                            first = false;
                        }
                        sb.append("    " + term + "\n");
                    }
                }
                throw new AssertionError(sb.toString());
            }
        }
    }
    IOUtils.close(r, w, dir);
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) StringField(org.apache.lucene.document.StringField) ArrayList(java.util.ArrayList) List(java.util.List) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) HashSet(java.util.HashSet) Directory(org.apache.lucene.store.Directory)

Example 93 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestFieldValueQuery method testApproximation.

public void testApproximation() throws IOException {
    final int iters = atLeast(10);
    for (int iter = 0; iter < iters; ++iter) {
        Directory dir = newDirectory();
        RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
        final int numDocs = atLeast(100);
        for (int i = 0; i < numDocs; ++i) {
            Document doc = new Document();
            final boolean hasValue = random().nextBoolean();
            if (hasValue) {
                doc.add(new NumericDocValuesField("dv1", 1));
                doc.add(new SortedNumericDocValuesField("dv2", 1));
                doc.add(new SortedNumericDocValuesField("dv2", 2));
                doc.add(new StringField("has_value", "yes", Store.NO));
            }
            doc.add(new StringField("f", random().nextBoolean() ? "yes" : "no", Store.NO));
            iw.addDocument(doc);
        }
        if (random().nextBoolean()) {
            iw.deleteDocuments(new TermQuery(new Term("f", "no")));
        }
        iw.commit();
        final IndexReader reader = iw.getReader();
        final IndexSearcher searcher = newSearcher(reader);
        iw.close();
        BooleanQuery.Builder ref = new BooleanQuery.Builder();
        ref.add(new TermQuery(new Term("f", "yes")), Occur.MUST);
        ref.add(new TermQuery(new Term("has_value", "yes")), Occur.FILTER);
        BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
        bq1.add(new TermQuery(new Term("f", "yes")), Occur.MUST);
        bq1.add(new FieldValueQuery("dv1"), Occur.FILTER);
        assertSameMatches(searcher, ref.build(), bq1.build(), true);
        BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
        bq2.add(new TermQuery(new Term("f", "yes")), Occur.MUST);
        bq2.add(new FieldValueQuery("dv2"), Occur.FILTER);
        assertSameMatches(searcher, ref.build(), bq2.build(), true);
        reader.close();
        dir.close();
    }
}
Also used : Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 94 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestKeywordAnalyzer method setUp.

@Override
public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();
    analyzer = new SimpleAnalyzer();
    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));
    Document doc = new Document();
    doc.add(new StringField("partnum", "Q36", Field.Store.YES));
    doc.add(new TextField("description", "Illidium Space Modulator", Field.Store.YES));
    writer.addDocument(doc);
    writer.close();
    reader = DirectoryReader.open(directory);
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 95 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestAddIndexes method testExistingDeletes.

// LUCENE-2996: tests that addIndexes(IndexReader) applies existing deletes correctly.
public void testExistingDeletes() throws Exception {
    Directory[] dirs = new Directory[2];
    for (int i = 0; i < dirs.length; i++) {
        dirs[i] = newDirectory();
        IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
        IndexWriter writer = new IndexWriter(dirs[i], conf);
        Document doc = new Document();
        doc.add(new StringField("id", "myid", Field.Store.NO));
        writer.addDocument(doc);
        writer.close();
    }
    IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter writer = new IndexWriter(dirs[0], conf);
    // Now delete the document
    writer.deleteDocuments(new Term("id", "myid"));
    try (DirectoryReader r = DirectoryReader.open(dirs[1])) {
        TestUtil.addIndexesSlowly(writer, r);
    }
    writer.commit();
    assertEquals("Documents from the incoming index should not have been deleted", 1, writer.numDocs());
    writer.close();
    for (Directory dir : dirs) {
        dir.close();
    }
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory)

Aggregations

StringField (org.apache.lucene.document.StringField)323 Document (org.apache.lucene.document.Document)302 Directory (org.apache.lucene.store.Directory)227 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)129 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)94 Term (org.apache.lucene.index.Term)90 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)82 BytesRef (org.apache.lucene.util.BytesRef)73 IndexSearcher (org.apache.lucene.search.IndexSearcher)57 DirectoryReader (org.apache.lucene.index.DirectoryReader)56 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)55 ArrayList (java.util.ArrayList)54 TextField (org.apache.lucene.document.TextField)54 IndexReader (org.apache.lucene.index.IndexReader)51 Field (org.apache.lucene.document.Field)50 TermQuery (org.apache.lucene.search.TermQuery)50 IndexWriter (org.apache.lucene.index.IndexWriter)45 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)43 NRTCachingDirectory (org.apache.lucene.store.NRTCachingDirectory)43 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)40