Search in sources :

Example 1 with FilterCodec

use of org.apache.lucene.codecs.FilterCodec in project lucene-solr by apache.

the class BaseGeoPointTestCase method doRandomDistanceTest.

private void doRandomDistanceTest(int numDocs, int numQueries) throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    int pointsInLeaf = 2 + random().nextInt(4);
    iwc.setCodec(new FilterCodec("Lucene70", TestUtil.getDefaultCodec()) {

        @Override
        public PointsFormat pointsFormat() {
            return new PointsFormat() {

                @Override
                public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
                    return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
                }

                @Override
                public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
                    return new Lucene60PointsReader(readState);
                }
            };
        }
    });
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    for (int i = 0; i < numDocs; i++) {
        double latRaw = nextLatitude();
        double lonRaw = nextLongitude();
        // pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
        double lat = quantizeLat(latRaw);
        double lon = quantizeLon(lonRaw);
        Document doc = new Document();
        addPointToDoc("field", doc, lat, lon);
        doc.add(new StoredField("lat", lat));
        doc.add(new StoredField("lon", lon));
        writer.addDocument(doc);
    }
    IndexReader reader = writer.getReader();
    IndexSearcher searcher = newSearcher(reader);
    for (int i = 0; i < numQueries; i++) {
        double lat = nextLatitude();
        double lon = nextLongitude();
        double radius = 50000000D * random().nextDouble();
        BitSet expected = new BitSet();
        for (int doc = 0; doc < reader.maxDoc(); doc++) {
            double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
            double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
            double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
            if (distance <= radius) {
                expected.set(doc);
            }
        }
        TopDocs topDocs = searcher.search(newDistanceQuery("field", lat, lon, radius), reader.maxDoc(), Sort.INDEXORDER);
        BitSet actual = new BitSet();
        for (ScoreDoc doc : topDocs.scoreDocs) {
            actual.set(doc.doc);
        }
        try {
            assertEquals(expected, actual);
        } catch (AssertionError e) {
            System.out.println("center: (" + lat + "," + lon + "), radius=" + radius);
            for (int doc = 0; doc < reader.maxDoc(); doc++) {
                double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
                double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
                double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
                System.out.println("" + doc + ": (" + docLatitude + "," + docLongitude + "), distance=" + distance);
            }
            throw e;
        }
    }
    reader.close();
    writer.close();
    dir.close();
}
Also used : Lucene60PointsWriter(org.apache.lucene.codecs.lucene60.Lucene60PointsWriter) PointsWriter(org.apache.lucene.codecs.PointsWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) Lucene60PointsReader(org.apache.lucene.codecs.lucene60.Lucene60PointsReader) SegmentReadState(org.apache.lucene.index.SegmentReadState) FixedBitSet(org.apache.lucene.util.FixedBitSet) BitSet(java.util.BitSet) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) FilterCodec(org.apache.lucene.codecs.FilterCodec) ScoreDoc(org.apache.lucene.search.ScoreDoc) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) PointsFormat(org.apache.lucene.codecs.PointsFormat) PointsReader(org.apache.lucene.codecs.PointsReader) Lucene60PointsReader(org.apache.lucene.codecs.lucene60.Lucene60PointsReader) IndexReader(org.apache.lucene.index.IndexReader) SegmentWriteState(org.apache.lucene.index.SegmentWriteState) Lucene60PointsWriter(org.apache.lucene.codecs.lucene60.Lucene60PointsWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 2 with FilterCodec

use of org.apache.lucene.codecs.FilterCodec in project lucene-solr by apache.

the class TestGeo3DPoint method getCodec.

private static Codec getCodec() {
    if (Codec.getDefault().getName().equals("Lucene70")) {
        int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
        double maxMBSortInHeap = 3.0 + (3 * random().nextDouble());
        if (VERBOSE) {
            System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
        }
        return new FilterCodec("Lucene70", Codec.getDefault()) {

            @Override
            public PointsFormat pointsFormat() {
                return new PointsFormat() {

                    @Override
                    public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
                        return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
                    }

                    @Override
                    public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
                        return new Lucene60PointsReader(readState);
                    }
                };
            }
        };
    } else {
        return Codec.getDefault();
    }
}
Also used : PointsFormat(org.apache.lucene.codecs.PointsFormat) Lucene60PointsReader(org.apache.lucene.codecs.lucene60.Lucene60PointsReader) SegmentReadState(org.apache.lucene.index.SegmentReadState) SegmentWriteState(org.apache.lucene.index.SegmentWriteState) Lucene60PointsWriter(org.apache.lucene.codecs.lucene60.Lucene60PointsWriter) GeoPoint(org.apache.lucene.spatial3d.geom.GeoPoint) FilterCodec(org.apache.lucene.codecs.FilterCodec)

Example 3 with FilterCodec

use of org.apache.lucene.codecs.FilterCodec in project lucene-solr by apache.

the class BasePostingsFormatTestCase method testInvertedWrite.

// LUCENE-5123: make sure we can visit postings twice
// during flush/merge
public void testInvertedWrite() throws Exception {
    Directory dir = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    // Must be concurrent because thread(s) can be merging
    // while up to one thread flushes, and each of those
    // threads iterates over the map while the flushing
    // thread might be adding to it:
    final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();
    final AtomicLong sumDocFreq = new AtomicLong();
    final AtomicLong sumTotalTermFreq = new AtomicLong();
    // TODO: would be better to use / delegate to the current
    // Codec returned by getCodec()
    iwc.setCodec(new FilterCodec(getCodec().getName(), getCodec()) {

        @Override
        public PostingsFormat postingsFormat() {
            final PostingsFormat defaultPostingsFormat = delegate.postingsFormat();
            final Thread mainThread = Thread.currentThread();
            return new PostingsFormat(defaultPostingsFormat.getName()) {

                @Override
                public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
                    final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);
                    return new FieldsConsumer() {

                        @Override
                        public void write(Fields fields) throws IOException {
                            fieldsConsumer.write(fields);
                            boolean isMerge = state.context.context == IOContext.Context.MERGE;
                            // in this test:
                            assert isMerge || Thread.currentThread() == mainThread;
                            // We iterate the provided TermsEnum
                            // twice, so we excercise this new freedom
                            // with the inverted API; if
                            // addOnSecondPass is true, we add up
                            // term stats on the 2nd iteration:
                            boolean addOnSecondPass = random().nextBoolean();
                            //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);
                            // Gather our own stats:
                            Terms terms = fields.terms("body");
                            assert terms != null;
                            TermsEnum termsEnum = terms.iterator();
                            PostingsEnum docs = null;
                            while (termsEnum.next() != null) {
                                BytesRef term = termsEnum.term();
                                // TODO: also sometimes ask for payloads/offsets?
                                boolean noPositions = random().nextBoolean();
                                if (noPositions) {
                                    docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                } else {
                                    docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                }
                                int docFreq = 0;
                                long totalTermFreq = 0;
                                while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                    docFreq++;
                                    totalTermFreq += docs.freq();
                                    int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                    if (!noPositions) {
                                        for (int i = 0; i < limit; i++) {
                                            docs.nextPosition();
                                        }
                                    }
                                }
                                String termString = term.utf8ToString();
                                // During merge we should only see terms
                                // we had already seen during a
                                // previous flush:
                                assertTrue(isMerge == false || termFreqs.containsKey(termString));
                                if (isMerge == false) {
                                    if (addOnSecondPass == false) {
                                        TermFreqs tf = termFreqs.get(termString);
                                        if (tf == null) {
                                            tf = new TermFreqs();
                                            termFreqs.put(termString, tf);
                                        }
                                        tf.docFreq += docFreq;
                                        tf.totalTermFreq += totalTermFreq;
                                        sumDocFreq.addAndGet(docFreq);
                                        sumTotalTermFreq.addAndGet(totalTermFreq);
                                    } else if (termFreqs.containsKey(termString) == false) {
                                        // Add placeholder (2nd pass will
                                        // set its counts):
                                        termFreqs.put(termString, new TermFreqs());
                                    }
                                }
                            }
                            // Also test seeking the TermsEnum:
                            for (String term : termFreqs.keySet()) {
                                if (termsEnum.seekExact(new BytesRef(term))) {
                                    // TODO: also sometimes ask for payloads/offsets?
                                    boolean noPositions = random().nextBoolean();
                                    if (noPositions) {
                                        docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                    } else {
                                        docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                    }
                                    int docFreq = 0;
                                    long totalTermFreq = 0;
                                    while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                        docFreq++;
                                        totalTermFreq += docs.freq();
                                        int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                        if (!noPositions) {
                                            for (int i = 0; i < limit; i++) {
                                                docs.nextPosition();
                                            }
                                        }
                                    }
                                    if (isMerge == false && addOnSecondPass) {
                                        TermFreqs tf = termFreqs.get(term);
                                        assert tf != null;
                                        tf.docFreq += docFreq;
                                        tf.totalTermFreq += totalTermFreq;
                                        sumDocFreq.addAndGet(docFreq);
                                        sumTotalTermFreq.addAndGet(totalTermFreq);
                                    }
                                    //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
                                    assertTrue(docFreq <= termFreqs.get(term).docFreq);
                                    assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
                                }
                            }
                            // Also test seekCeil
                            for (int iter = 0; iter < 10; iter++) {
                                BytesRef term = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
                                SeekStatus status = termsEnum.seekCeil(term);
                                if (status == SeekStatus.NOT_FOUND) {
                                    assertTrue(term.compareTo(termsEnum.term()) < 0);
                                }
                            }
                        }

                        @Override
                        public void close() throws IOException {
                            fieldsConsumer.close();
                        }
                    };
                }

                @Override
                public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
                    return defaultPostingsFormat.fieldsProducer(state);
                }
            };
        }
    });
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    LineFileDocs docs = new LineFileDocs(random());
    int bytesToIndex = atLeast(100) * 1024;
    int bytesIndexed = 0;
    while (bytesIndexed < bytesToIndex) {
        Document doc = docs.nextDoc();
        Document justBodyDoc = new Document();
        justBodyDoc.add(doc.getField("body"));
        w.addDocument(justBodyDoc);
        bytesIndexed += RamUsageTester.sizeOf(justBodyDoc);
    }
    IndexReader r = w.getReader();
    w.close();
    Terms terms = MultiFields.getTerms(r, "body");
    assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
    assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());
    TermsEnum termsEnum = terms.iterator();
    long termCount = 0;
    boolean supportsOrds = true;
    while (termsEnum.next() != null) {
        BytesRef term = termsEnum.term();
        assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
        assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
        if (supportsOrds) {
            long ord;
            try {
                ord = termsEnum.ord();
            } catch (UnsupportedOperationException uoe) {
                supportsOrds = false;
                ord = -1;
            }
            if (ord != -1) {
                assertEquals(termCount, ord);
            }
        }
        termCount++;
    }
    assertEquals(termFreqs.size(), termCount);
    r.close();
    dir.close();
}
Also used : FieldsConsumer(org.apache.lucene.codecs.FieldsConsumer) Document(org.apache.lucene.document.Document) FilterCodec(org.apache.lucene.codecs.FilterCodec) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) LineFileDocs(org.apache.lucene.util.LineFileDocs) FieldsProducer(org.apache.lucene.codecs.FieldsProducer) IOException(java.io.IOException) AtomicLong(java.util.concurrent.atomic.AtomicLong) PostingsFormat(org.apache.lucene.codecs.PostingsFormat) SeekStatus(org.apache.lucene.index.TermsEnum.SeekStatus)

Example 4 with FilterCodec

use of org.apache.lucene.codecs.FilterCodec in project lucene-solr by apache.

the class TestPointQueries method getCodec.

private static Codec getCodec() {
    if (Codec.getDefault().getName().equals("Lucene70")) {
        int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
        double maxMBSortInHeap = 5.0 + (3 * random().nextDouble());
        if (VERBOSE) {
            System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
        }
        return new FilterCodec("Lucene70", Codec.getDefault()) {

            @Override
            public PointsFormat pointsFormat() {
                return new PointsFormat() {

                    @Override
                    public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
                        return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
                    }

                    @Override
                    public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
                        return new Lucene60PointsReader(readState);
                    }
                };
            }
        };
    } else {
        return Codec.getDefault();
    }
}
Also used : PointsFormat(org.apache.lucene.codecs.PointsFormat) Lucene60PointsReader(org.apache.lucene.codecs.lucene60.Lucene60PointsReader) SegmentReadState(org.apache.lucene.index.SegmentReadState) SegmentWriteState(org.apache.lucene.index.SegmentWriteState) Lucene60PointsWriter(org.apache.lucene.codecs.lucene60.Lucene60PointsWriter) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) FilterCodec(org.apache.lucene.codecs.FilterCodec)

Aggregations

FilterCodec (org.apache.lucene.codecs.FilterCodec)4 PointsFormat (org.apache.lucene.codecs.PointsFormat)3 Lucene60PointsReader (org.apache.lucene.codecs.lucene60.Lucene60PointsReader)3 Lucene60PointsWriter (org.apache.lucene.codecs.lucene60.Lucene60PointsWriter)3 SegmentReadState (org.apache.lucene.index.SegmentReadState)3 SegmentWriteState (org.apache.lucene.index.SegmentWriteState)3 IOException (java.io.IOException)2 Document (org.apache.lucene.document.Document)2 Directory (org.apache.lucene.store.Directory)2 BitSet (java.util.BitSet)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 FieldsConsumer (org.apache.lucene.codecs.FieldsConsumer)1 FieldsProducer (org.apache.lucene.codecs.FieldsProducer)1 PointsReader (org.apache.lucene.codecs.PointsReader)1 PointsWriter (org.apache.lucene.codecs.PointsWriter)1 PostingsFormat (org.apache.lucene.codecs.PostingsFormat)1 BinaryPoint (org.apache.lucene.document.BinaryPoint)1 DoublePoint (org.apache.lucene.document.DoublePoint)1