Search in sources :

Example 66 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class BaseGeoPointTestCase method verifyRandomRectangles.

protected void verifyRandomRectangles(double[] lats, double[] lons) throws Exception {
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    // Else we can get O(N^2) merging:
    int mbd = iwc.getMaxBufferedDocs();
    if (mbd != -1 && mbd < lats.length / 100) {
        iwc.setMaxBufferedDocs(lats.length / 100);
    }
    Directory dir;
    if (lats.length > 100000) {
        dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
    } else {
        dir = newDirectory();
    }
    Set<Integer> deleted = new HashSet<>();
    // RandomIndexWriter is too slow here:
    IndexWriter w = new IndexWriter(dir, iwc);
    for (int id = 0; id < lats.length; id++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + id, Field.Store.NO));
        doc.add(new NumericDocValuesField("id", id));
        if (Double.isNaN(lats[id]) == false) {
            addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
        }
        w.addDocument(doc);
        if (id > 0 && random().nextInt(100) == 42) {
            int idToDelete = random().nextInt(id);
            w.deleteDocuments(new Term("id", "" + idToDelete));
            deleted.add(idToDelete);
            if (VERBOSE) {
                System.out.println("  delete id=" + idToDelete);
            }
        }
    }
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    IndexSearcher s = newSearcher(r);
    int iters = atLeast(25);
    Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
    int maxDoc = s.getIndexReader().maxDoc();
    for (int iter = 0; iter < iters; iter++) {
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " s=" + s);
        }
        Rectangle rect = nextBox();
        Query query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
        if (VERBOSE) {
            System.out.println("  query=" + query);
        }
        final FixedBitSet hits = new FixedBitSet(maxDoc);
        s.search(query, new SimpleCollector() {

            private int docBase;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                docBase = context.docBase;
            }

            @Override
            public void collect(int doc) {
                hits.set(docBase + doc);
            }
        });
        boolean fail = false;
        NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
        for (int docID = 0; docID < maxDoc; docID++) {
            assertEquals(docID, docIDToID.nextDoc());
            int id = (int) docIDToID.longValue();
            boolean expected;
            if (liveDocs != null && liveDocs.get(docID) == false) {
                // document is deleted
                expected = false;
            } else if (Double.isNaN(lats[id])) {
                expected = false;
            } else {
                expected = rectContainsPoint(rect, lats[id], lons[id]);
            }
            if (hits.get(docID) != expected) {
                StringBuilder b = new StringBuilder();
                b.append("docID=(" + docID + ")\n");
                if (expected) {
                    b.append("FAIL: id=" + id + " should match but did not\n");
                } else {
                    b.append("FAIL: id=" + id + " should not match but did\n");
                }
                b.append("  box=" + rect + "\n");
                b.append("  query=" + query + " docID=" + docID + "\n");
                b.append("  lat=" + lats[id] + " lon=" + lons[id] + "\n");
                b.append("  deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
                if (true) {
                    fail("wrong hit (first of possibly more):\n\n" + b);
                } else {
                    System.out.println(b.toString());
                    fail = true;
                }
            }
        }
        if (fail) {
            fail("some hits were wrong");
        }
    }
    IOUtils.close(r, dir);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) SimpleCollector(org.apache.lucene.search.SimpleCollector) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) FixedBitSet(org.apache.lucene.util.FixedBitSet) IndexReader(org.apache.lucene.index.IndexReader) Bits(org.apache.lucene.util.Bits) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 67 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class BaseGeoPointTestCase method doRandomDistanceTest.

private void doRandomDistanceTest(int numDocs, int numQueries) throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    int pointsInLeaf = 2 + random().nextInt(4);
    iwc.setCodec(new FilterCodec("Lucene70", TestUtil.getDefaultCodec()) {

        @Override
        public PointsFormat pointsFormat() {
            return new PointsFormat() {

                @Override
                public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
                    return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
                }

                @Override
                public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
                    return new Lucene60PointsReader(readState);
                }
            };
        }
    });
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    for (int i = 0; i < numDocs; i++) {
        double latRaw = nextLatitude();
        double lonRaw = nextLongitude();
        // pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
        double lat = quantizeLat(latRaw);
        double lon = quantizeLon(lonRaw);
        Document doc = new Document();
        addPointToDoc("field", doc, lat, lon);
        doc.add(new StoredField("lat", lat));
        doc.add(new StoredField("lon", lon));
        writer.addDocument(doc);
    }
    IndexReader reader = writer.getReader();
    IndexSearcher searcher = newSearcher(reader);
    for (int i = 0; i < numQueries; i++) {
        double lat = nextLatitude();
        double lon = nextLongitude();
        double radius = 50000000D * random().nextDouble();
        BitSet expected = new BitSet();
        for (int doc = 0; doc < reader.maxDoc(); doc++) {
            double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
            double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
            double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
            if (distance <= radius) {
                expected.set(doc);
            }
        }
        TopDocs topDocs = searcher.search(newDistanceQuery("field", lat, lon, radius), reader.maxDoc(), Sort.INDEXORDER);
        BitSet actual = new BitSet();
        for (ScoreDoc doc : topDocs.scoreDocs) {
            actual.set(doc.doc);
        }
        try {
            assertEquals(expected, actual);
        } catch (AssertionError e) {
            System.out.println("center: (" + lat + "," + lon + "), radius=" + radius);
            for (int doc = 0; doc < reader.maxDoc(); doc++) {
                double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
                double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
                double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
                System.out.println("" + doc + ": (" + docLatitude + "," + docLongitude + "), distance=" + distance);
            }
            throw e;
        }
    }
    reader.close();
    writer.close();
    dir.close();
}
Also used : Lucene60PointsWriter(org.apache.lucene.codecs.lucene60.Lucene60PointsWriter) PointsWriter(org.apache.lucene.codecs.PointsWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) Lucene60PointsReader(org.apache.lucene.codecs.lucene60.Lucene60PointsReader) SegmentReadState(org.apache.lucene.index.SegmentReadState) FixedBitSet(org.apache.lucene.util.FixedBitSet) BitSet(java.util.BitSet) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) FilterCodec(org.apache.lucene.codecs.FilterCodec) ScoreDoc(org.apache.lucene.search.ScoreDoc) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) PointsFormat(org.apache.lucene.codecs.PointsFormat) PointsReader(org.apache.lucene.codecs.PointsReader) Lucene60PointsReader(org.apache.lucene.codecs.lucene60.Lucene60PointsReader) IndexReader(org.apache.lucene.index.IndexReader) SegmentWriteState(org.apache.lucene.index.SegmentWriteState) Lucene60PointsWriter(org.apache.lucene.codecs.lucene60.Lucene60PointsWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 68 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class BaseGeoPointTestCase method verifyRandomDistances.

protected void verifyRandomDistances(double[] lats, double[] lons) throws Exception {
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    // Else we can get O(N^2) merging:
    int mbd = iwc.getMaxBufferedDocs();
    if (mbd != -1 && mbd < lats.length / 100) {
        iwc.setMaxBufferedDocs(lats.length / 100);
    }
    Directory dir;
    if (lats.length > 100000) {
        dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
    } else {
        dir = newDirectory();
    }
    Set<Integer> deleted = new HashSet<>();
    // RandomIndexWriter is too slow here:
    IndexWriter w = new IndexWriter(dir, iwc);
    for (int id = 0; id < lats.length; id++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + id, Field.Store.NO));
        doc.add(new NumericDocValuesField("id", id));
        if (Double.isNaN(lats[id]) == false) {
            addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
        }
        w.addDocument(doc);
        if (id > 0 && random().nextInt(100) == 42) {
            int idToDelete = random().nextInt(id);
            w.deleteDocuments(new Term("id", "" + idToDelete));
            deleted.add(idToDelete);
            if (VERBOSE) {
                System.out.println("  delete id=" + idToDelete);
            }
        }
    }
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    IndexSearcher s = newSearcher(r);
    int iters = atLeast(25);
    Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
    int maxDoc = s.getIndexReader().maxDoc();
    for (int iter = 0; iter < iters; iter++) {
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " s=" + s);
        }
        // Distance
        final double centerLat = nextLatitude();
        final double centerLon = nextLongitude();
        // So the query can cover at most 50% of the earth's surface:
        final double radiusMeters = random().nextDouble() * GeoUtils.EARTH_MEAN_RADIUS_METERS * Math.PI / 2.0 + 1.0;
        if (VERBOSE) {
            final DecimalFormat df = new DecimalFormat("#,###.00", DecimalFormatSymbols.getInstance(Locale.ENGLISH));
            System.out.println("  radiusMeters = " + df.format(radiusMeters));
        }
        Query query = newDistanceQuery(FIELD_NAME, centerLat, centerLon, radiusMeters);
        if (VERBOSE) {
            System.out.println("  query=" + query);
        }
        final FixedBitSet hits = new FixedBitSet(maxDoc);
        s.search(query, new SimpleCollector() {

            private int docBase;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                docBase = context.docBase;
            }

            @Override
            public void collect(int doc) {
                hits.set(docBase + doc);
            }
        });
        boolean fail = false;
        NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
        for (int docID = 0; docID < maxDoc; docID++) {
            assertEquals(docID, docIDToID.nextDoc());
            int id = (int) docIDToID.longValue();
            boolean expected;
            if (liveDocs != null && liveDocs.get(docID) == false) {
                // document is deleted
                expected = false;
            } else if (Double.isNaN(lats[id])) {
                expected = false;
            } else {
                expected = SloppyMath.haversinMeters(centerLat, centerLon, lats[id], lons[id]) <= radiusMeters;
            }
            if (hits.get(docID) != expected) {
                StringBuilder b = new StringBuilder();
                if (expected) {
                    b.append("FAIL: id=" + id + " should match but did not\n");
                } else {
                    b.append("FAIL: id=" + id + " should not match but did\n");
                }
                b.append("  query=" + query + " docID=" + docID + "\n");
                b.append("  lat=" + lats[id] + " lon=" + lons[id] + "\n");
                b.append("  deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
                if (Double.isNaN(lats[id]) == false) {
                    double distanceMeters = SloppyMath.haversinMeters(centerLat, centerLon, lats[id], lons[id]);
                    b.append("  centerLat=" + centerLat + " centerLon=" + centerLon + " distanceMeters=" + distanceMeters + " vs radiusMeters=" + radiusMeters);
                }
                if (true) {
                    fail("wrong hit (first of possibly more):\n\n" + b);
                } else {
                    System.out.println(b.toString());
                    fail = true;
                }
            }
        }
        if (fail) {
            fail("some hits were wrong");
        }
    }
    IOUtils.close(r, dir);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) DecimalFormat(java.text.DecimalFormat) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) SimpleCollector(org.apache.lucene.search.SimpleCollector) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Bits(org.apache.lucene.util.Bits) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 69 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class BaseGeoPointTestCase method verifyRandomPolygons.

protected void verifyRandomPolygons(double[] lats, double[] lons) throws Exception {
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    // Else we can get O(N^2) merging:
    int mbd = iwc.getMaxBufferedDocs();
    if (mbd != -1 && mbd < lats.length / 100) {
        iwc.setMaxBufferedDocs(lats.length / 100);
    }
    Directory dir;
    if (lats.length > 100000) {
        dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
    } else {
        dir = newDirectory();
    }
    Set<Integer> deleted = new HashSet<>();
    // RandomIndexWriter is too slow here:
    IndexWriter w = new IndexWriter(dir, iwc);
    for (int id = 0; id < lats.length; id++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + id, Field.Store.NO));
        doc.add(new NumericDocValuesField("id", id));
        if (Double.isNaN(lats[id]) == false) {
            addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
        }
        w.addDocument(doc);
        if (id > 0 && random().nextInt(100) == 42) {
            int idToDelete = random().nextInt(id);
            w.deleteDocuments(new Term("id", "" + idToDelete));
            deleted.add(idToDelete);
            if (VERBOSE) {
                System.out.println("  delete id=" + idToDelete);
            }
        }
    }
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    // We can't wrap with "exotic" readers because points needs to work:
    IndexSearcher s = newSearcher(r);
    final int iters = atLeast(75);
    Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
    int maxDoc = s.getIndexReader().maxDoc();
    for (int iter = 0; iter < iters; iter++) {
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " s=" + s);
        }
        // Polygon
        Polygon polygon = nextPolygon();
        Query query = newPolygonQuery(FIELD_NAME, polygon);
        if (VERBOSE) {
            System.out.println("  query=" + query);
        }
        final FixedBitSet hits = new FixedBitSet(maxDoc);
        s.search(query, new SimpleCollector() {

            private int docBase;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                docBase = context.docBase;
            }

            @Override
            public void collect(int doc) {
                hits.set(docBase + doc);
            }
        });
        boolean fail = false;
        NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
        for (int docID = 0; docID < maxDoc; docID++) {
            assertEquals(docID, docIDToID.nextDoc());
            int id = (int) docIDToID.longValue();
            boolean expected;
            if (liveDocs != null && liveDocs.get(docID) == false) {
                // document is deleted
                expected = false;
            } else if (Double.isNaN(lats[id])) {
                expected = false;
            } else {
                expected = GeoTestUtil.containsSlowly(polygon, lats[id], lons[id]);
            }
            if (hits.get(docID) != expected) {
                StringBuilder b = new StringBuilder();
                if (expected) {
                    b.append("FAIL: id=" + id + " should match but did not\n");
                } else {
                    b.append("FAIL: id=" + id + " should not match but did\n");
                }
                b.append("  query=" + query + " docID=" + docID + "\n");
                b.append("  lat=" + lats[id] + " lon=" + lons[id] + "\n");
                b.append("  deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
                b.append("  polygon=" + polygon);
                if (true) {
                    fail("wrong hit (first of possibly more):\n\n" + b);
                } else {
                    System.out.println(b.toString());
                    fail = true;
                }
            }
        }
        if (fail) {
            fail("some hits were wrong");
        }
    }
    IOUtils.close(r, dir);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) SimpleCollector(org.apache.lucene.search.SimpleCollector) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) FixedBitSet(org.apache.lucene.util.FixedBitSet) IndexReader(org.apache.lucene.index.IndexReader) Bits(org.apache.lucene.util.Bits) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 70 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class TestRAMDirectory method buildIndex.

private Path buildIndex() throws IOException {
    Path path = createTempDir("buildIndex");
    Directory dir = newFSDirectory(path);
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
    // add some documents
    Document doc = null;
    for (int i = 0; i < DOCS_TO_ADD; i++) {
        doc = new Document();
        doc.add(newStringField("content", English.intToEnglish(i).trim(), Field.Store.YES));
        writer.addDocument(doc);
    }
    assertEquals(DOCS_TO_ADD, writer.maxDoc());
    writer.close();
    dir.close();
    return path;
}
Also used : Path(java.nio.file.Path) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)513 IndexWriter (org.apache.lucene.index.IndexWriter)362 Document (org.apache.lucene.document.Document)311 Directory (org.apache.lucene.store.Directory)289 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)162 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)143 IndexReader (org.apache.lucene.index.IndexReader)140 Term (org.apache.lucene.index.Term)116 IndexSearcher (org.apache.lucene.search.IndexSearcher)106 TextField (org.apache.lucene.document.TextField)93 DirectoryReader (org.apache.lucene.index.DirectoryReader)92 RAMDirectory (org.apache.lucene.store.RAMDirectory)89 IOException (java.io.IOException)88 BytesRef (org.apache.lucene.util.BytesRef)80 Field (org.apache.lucene.document.Field)78 Analyzer (org.apache.lucene.analysis.Analyzer)74 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)61 Test (org.junit.Test)61 StringField (org.apache.lucene.document.StringField)59 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)49