Search in sources :

Example 96 with IndexReader

use of org.apache.lucene.index.IndexReader in project lucene-solr by apache.

the class JoinDocFreqValueSource method getValues.

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    final BinaryDocValues terms = DocValues.getBinary(readerContext.reader(), field);
    final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
    Terms t = MultiFields.getTerms(top, qfield);
    final TermsEnum termsEnum = t == null ? TermsEnum.EMPTY : t.iterator();
    return new IntDocValues(this) {

        int lastDocID = -1;

        @Override
        public int intVal(int doc) throws IOException {
            if (doc < lastDocID) {
                throw new IllegalArgumentException("docs were sent out-of-order: lastDocID=" + lastDocID + " vs docID=" + doc);
            }
            lastDocID = doc;
            int curDocID = terms.docID();
            if (doc > curDocID) {
                curDocID = terms.advance(doc);
            }
            if (doc == curDocID) {
                BytesRef term = terms.binaryValue();
                if (termsEnum.seekExact(term)) {
                    return termsEnum.docFreq();
                }
            }
            return 0;
        }
    };
}
Also used : IndexReader(org.apache.lucene.index.IndexReader) Terms(org.apache.lucene.index.Terms) IntDocValues(org.apache.lucene.queries.function.docvalues.IntDocValues) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 97 with IndexReader

use of org.apache.lucene.index.IndexReader in project lucene-solr by apache.

the class TestIDVersionPostingsFormat method testRandom.

// TODO make a similar test for BT, w/ varied IDs:
public void testRandom() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    int minItemsInBlock = TestUtil.nextInt(random(), 2, 50);
    int maxItemsInBlock = 2 * (minItemsInBlock - 1) + random().nextInt(50);
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat(minItemsInBlock, maxItemsInBlock)));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    //IndexWriter w = new IndexWriter(dir, iwc);
    int numDocs = atLeast(1000);
    Map<String, Long> idValues = new HashMap<String, Long>();
    int docUpto = 0;
    if (VERBOSE) {
        System.out.println("TEST: numDocs=" + numDocs);
    }
    IDSource ids = getRandomIDs();
    String idPrefix;
    if (random().nextBoolean()) {
        idPrefix = "";
    } else {
        idPrefix = TestUtil.randomSimpleString(random());
        if (VERBOSE) {
            System.out.println("TEST: use id prefix: " + idPrefix);
        }
    }
    boolean useMonotonicVersion = random().nextBoolean();
    if (VERBOSE) {
        System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion);
    }
    List<String> idsList = new ArrayList<>();
    long version = 0;
    while (docUpto < numDocs) {
        String idValue = idPrefix + ids.next();
        if (idValues.containsKey(idValue)) {
            continue;
        }
        if (useMonotonicVersion) {
            version += TestUtil.nextInt(random(), 1, 10);
        } else {
            version = random().nextLong() & 0x3fffffffffffffffL;
        }
        idValues.put(idValue, version);
        if (VERBOSE) {
            System.out.println("  " + idValue + " -> " + version);
        }
        Document doc = new Document();
        doc.add(makeIDField(idValue, version));
        w.addDocument(doc);
        idsList.add(idValue);
        if (idsList.size() > 0 && random().nextInt(7) == 5) {
            // Randomly delete or update a previous ID
            idValue = idsList.get(random().nextInt(idsList.size()));
            if (random().nextBoolean()) {
                if (useMonotonicVersion) {
                    version += TestUtil.nextInt(random(), 1, 10);
                } else {
                    version = random().nextLong() & 0x3fffffffffffffffL;
                }
                doc = new Document();
                doc.add(makeIDField(idValue, version));
                if (VERBOSE) {
                    System.out.println("  update " + idValue + " -> " + version);
                }
                w.updateDocument(new Term("id", idValue), doc);
                idValues.put(idValue, version);
            } else {
                if (VERBOSE) {
                    System.out.println("  delete " + idValue);
                }
                w.deleteDocuments(new Term("id", idValue));
                idValues.remove(idValue);
            }
        }
        docUpto++;
    }
    IndexReader r = w.getReader();
    //IndexReader r = DirectoryReader.open(w);
    PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");
    List<Map.Entry<String, Long>> idValuesList = new ArrayList<>(idValues.entrySet());
    int iters = numDocs * 5;
    for (int iter = 0; iter < iters; iter++) {
        String idValue;
        if (random().nextBoolean()) {
            idValue = idValuesList.get(random().nextInt(idValuesList.size())).getKey();
        } else if (random().nextBoolean()) {
            idValue = ids.next();
        } else {
            idValue = idPrefix + TestUtil.randomSimpleString(random());
        }
        BytesRef idValueBytes = new BytesRef(idValue);
        Long expectedVersion = idValues.get(idValue);
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " id=" + idValue + " expectedVersion=" + expectedVersion);
        }
        if (expectedVersion == null) {
            assertEquals("term should not have been found (doesn't exist)", -1, lookup.lookup(idValueBytes));
        } else {
            if (random().nextBoolean()) {
                if (VERBOSE) {
                    System.out.println("  lookup exact version (should be found)");
                }
                assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
                assertEquals(expectedVersion.longValue(), lookup.getVersion());
            } else {
                if (VERBOSE) {
                    System.out.println("  lookup version+1 (should not be found)");
                }
                assertEquals("term should not have been found (version newer)", -1, lookup.lookup(idValueBytes, expectedVersion.longValue() + 1));
            }
        }
    }
    r.close();
    w.close();
    dir.close();
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) AtomicLong(java.util.concurrent.atomic.AtomicLong) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 98 with IndexReader

use of org.apache.lucene.index.IndexReader in project lucene-solr by apache.

the class BaseGeoPointTestCase method testMultiValued.

public void testMultiValued() throws Exception {
    int numPoints = atLeast(10000);
    // Every doc has 2 points:
    double[] lats = new double[2 * numPoints];
    double[] lons = new double[2 * numPoints];
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // We rely on docID order:
    iwc.setMergePolicy(newLogMergePolicy());
    // and on seeds being able to reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    for (int id = 0; id < numPoints; id++) {
        Document doc = new Document();
        lats[2 * id] = quantizeLat(nextLatitude());
        lons[2 * id] = quantizeLon(nextLongitude());
        doc.add(newStringField("id", "" + id, Field.Store.YES));
        addPointToDoc(FIELD_NAME, doc, lats[2 * id], lons[2 * id]);
        lats[2 * id + 1] = quantizeLat(nextLatitude());
        lons[2 * id + 1] = quantizeLon(nextLongitude());
        addPointToDoc(FIELD_NAME, doc, lats[2 * id + 1], lons[2 * id + 1]);
        if (VERBOSE) {
            System.out.println("id=" + id);
            System.out.println("  lat=" + lats[2 * id] + " lon=" + lons[2 * id]);
            System.out.println("  lat=" + lats[2 * id + 1] + " lon=" + lons[2 * id + 1]);
        }
        w.addDocument(doc);
    }
    // TODO: share w/ verify; just need parallel array of the expected ids
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    IndexReader r = w.getReader();
    w.close();
    IndexSearcher s = newSearcher(r);
    int iters = atLeast(25);
    for (int iter = 0; iter < iters; iter++) {
        Rectangle rect = nextBox();
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " rect=" + rect);
        }
        Query query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
        final FixedBitSet hits = new FixedBitSet(r.maxDoc());
        s.search(query, new SimpleCollector() {

            private int docBase;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                docBase = context.docBase;
            }

            @Override
            public void collect(int doc) {
                hits.set(docBase + doc);
            }
        });
        boolean fail = false;
        for (int docID = 0; docID < lats.length / 2; docID++) {
            double latDoc1 = lats[2 * docID];
            double lonDoc1 = lons[2 * docID];
            double latDoc2 = lats[2 * docID + 1];
            double lonDoc2 = lons[2 * docID + 1];
            boolean result1 = rectContainsPoint(rect, latDoc1, lonDoc1);
            boolean result2 = rectContainsPoint(rect, latDoc2, lonDoc2);
            boolean expected = result1 || result2;
            if (hits.get(docID) != expected) {
                String id = s.doc(docID).get("id");
                if (expected) {
                    System.out.println("TEST: id=" + id + " docID=" + docID + " should match but did not");
                } else {
                    System.out.println("TEST: id=" + id + " docID=" + docID + " should not match but did");
                }
                System.out.println("  rect=" + rect);
                System.out.println("  lat=" + latDoc1 + " lon=" + lonDoc1 + "\n  lat=" + latDoc2 + " lon=" + lonDoc2);
                System.out.println("  result1=" + result1 + " result2=" + result2);
                fail = true;
            }
        }
        if (fail) {
            fail("some hits were wrong");
        }
    }
    r.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) SimpleCollector(org.apache.lucene.search.SimpleCollector) FixedBitSet(org.apache.lucene.util.FixedBitSet) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 99 with IndexReader

use of org.apache.lucene.index.IndexReader in project lucene-solr by apache.

the class BaseGeoPointTestCase method searchSmallSet.

/** return topdocs over a small set of points in field "point" */
private TopDocs searchSmallSet(Query query, int size) throws Exception {
    // this is a simple systematic test, indexing these points
    // TODO: fragile: does not understand quantization in any way yet uses extremely high precision!
    double[][] pts = new double[][] { { 32.763420, -96.774 }, { 32.7559529921407, -96.7759895324707 }, { 32.77866942010977, -96.77701950073242 }, { 32.7756745755423, -96.7706036567688 }, { 27.703618681345585, -139.73458170890808 }, { 32.94823588839368, -96.4538113027811 }, { 33.06047141970814, -96.65084838867188 }, { 32.778650, -96.7772 }, { -88.56029371730983, -177.23537676036358 }, { 33.541429799076354, -26.779373834241003 }, { 26.774024500421728, -77.35379276106497 }, { -90.0, -14.796283808944777 }, { 32.94823588839368, -178.8538113027811 }, { 32.94823588839368, 178.8538113027811 }, { 40.720611, -73.998776 }, { -44.5, -179.5 } };
    Directory directory = newDirectory();
    // TODO: must these simple tests really rely on docid order?
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000));
    iwc.setMergePolicy(newLogMergePolicy());
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwc);
    for (double[] p : pts) {
        Document doc = new Document();
        addPointToDoc("point", doc, p[0], p[1]);
        writer.addDocument(doc);
    }
    // add explicit multi-valued docs
    for (int i = 0; i < pts.length; i += 2) {
        Document doc = new Document();
        addPointToDoc("point", doc, pts[i][0], pts[i][1]);
        addPointToDoc("point", doc, pts[i + 1][0], pts[i + 1][1]);
        writer.addDocument(doc);
    }
    // index random string documents
    for (int i = 0; i < random().nextInt(10); ++i) {
        Document doc = new Document();
        doc.add(new StringField("string", Integer.toString(i), Field.Store.NO));
        writer.addDocument(doc);
    }
    IndexReader reader = writer.getReader();
    writer.close();
    IndexSearcher searcher = newSearcher(reader);
    TopDocs topDocs = searcher.search(query, size);
    reader.close();
    directory.close();
    return topDocs;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 100 with IndexReader

use of org.apache.lucene.index.IndexReader in project lucene-solr by apache.

the class BaseGeoPointTestCase method verifyRandomRectangles.

protected void verifyRandomRectangles(double[] lats, double[] lons) throws Exception {
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    // Else we can get O(N^2) merging:
    int mbd = iwc.getMaxBufferedDocs();
    if (mbd != -1 && mbd < lats.length / 100) {
        iwc.setMaxBufferedDocs(lats.length / 100);
    }
    Directory dir;
    if (lats.length > 100000) {
        dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
    } else {
        dir = newDirectory();
    }
    Set<Integer> deleted = new HashSet<>();
    // RandomIndexWriter is too slow here:
    IndexWriter w = new IndexWriter(dir, iwc);
    for (int id = 0; id < lats.length; id++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + id, Field.Store.NO));
        doc.add(new NumericDocValuesField("id", id));
        if (Double.isNaN(lats[id]) == false) {
            addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
        }
        w.addDocument(doc);
        if (id > 0 && random().nextInt(100) == 42) {
            int idToDelete = random().nextInt(id);
            w.deleteDocuments(new Term("id", "" + idToDelete));
            deleted.add(idToDelete);
            if (VERBOSE) {
                System.out.println("  delete id=" + idToDelete);
            }
        }
    }
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    IndexSearcher s = newSearcher(r);
    int iters = atLeast(25);
    Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
    int maxDoc = s.getIndexReader().maxDoc();
    for (int iter = 0; iter < iters; iter++) {
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " s=" + s);
        }
        Rectangle rect = nextBox();
        Query query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
        if (VERBOSE) {
            System.out.println("  query=" + query);
        }
        final FixedBitSet hits = new FixedBitSet(maxDoc);
        s.search(query, new SimpleCollector() {

            private int docBase;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                docBase = context.docBase;
            }

            @Override
            public void collect(int doc) {
                hits.set(docBase + doc);
            }
        });
        boolean fail = false;
        NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
        for (int docID = 0; docID < maxDoc; docID++) {
            assertEquals(docID, docIDToID.nextDoc());
            int id = (int) docIDToID.longValue();
            boolean expected;
            if (liveDocs != null && liveDocs.get(docID) == false) {
                // document is deleted
                expected = false;
            } else if (Double.isNaN(lats[id])) {
                expected = false;
            } else {
                expected = rectContainsPoint(rect, lats[id], lons[id]);
            }
            if (hits.get(docID) != expected) {
                StringBuilder b = new StringBuilder();
                b.append("docID=(" + docID + ")\n");
                if (expected) {
                    b.append("FAIL: id=" + id + " should match but did not\n");
                } else {
                    b.append("FAIL: id=" + id + " should not match but did\n");
                }
                b.append("  box=" + rect + "\n");
                b.append("  query=" + query + " docID=" + docID + "\n");
                b.append("  lat=" + lats[id] + " lon=" + lons[id] + "\n");
                b.append("  deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
                if (true) {
                    fail("wrong hit (first of possibly more):\n\n" + b);
                } else {
                    System.out.println(b.toString());
                    fail = true;
                }
            }
        }
        if (fail) {
            fail("some hits were wrong");
        }
    }
    IOUtils.close(r, dir);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) SimpleCollector(org.apache.lucene.search.SimpleCollector) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) FixedBitSet(org.apache.lucene.util.FixedBitSet) IndexReader(org.apache.lucene.index.IndexReader) Bits(org.apache.lucene.util.Bits) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Aggregations

IndexReader (org.apache.lucene.index.IndexReader)962 Document (org.apache.lucene.document.Document)610 Directory (org.apache.lucene.store.Directory)603 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)549 IndexSearcher (org.apache.lucene.search.IndexSearcher)410 Term (org.apache.lucene.index.Term)332 TopDocs (org.apache.lucene.search.TopDocs)204 TermQuery (org.apache.lucene.search.TermQuery)160 Query (org.apache.lucene.search.Query)158 IndexWriter (org.apache.lucene.index.IndexWriter)150 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)144 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)143 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)142 Field (org.apache.lucene.document.Field)135 BytesRef (org.apache.lucene.util.BytesRef)134 IOException (java.io.IOException)133 BooleanQuery (org.apache.lucene.search.BooleanQuery)122 ArrayList (java.util.ArrayList)108 TextField (org.apache.lucene.document.TextField)81 Test (org.junit.Test)81