Search in sources :

Example 1 with BitSet

use of org.apache.lucene.util.BitSet in project elasticsearch by elastic.

the class LongValuesComparatorSource method newComparator.

@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
    assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());
    final Long dMissingValue = (Long) missingObject(missingValue, reversed);
    // the comparator doesn't check docsWithField since we replace missing values in select()
    return new FieldComparator.LongComparator(numHits, null, null) {

        @Override
        protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
            final SortedNumericDocValues values = indexFieldData.load(context).getLongValues();
            final NumericDocValues selectedValues;
            if (nested == null) {
                selectedValues = sortMode.select(values, dMissingValue);
            } else {
                final BitSet rootDocs = nested.rootDocs(context);
                final DocIdSetIterator innerDocs = nested.innerDocs(context);
                selectedValues = sortMode.select(values, dMissingValue, rootDocs, innerDocs, context.reader().maxDoc());
            }
            return selectedValues;
        }
    };
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) BitSet(org.apache.lucene.util.BitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 2 with BitSet

use of org.apache.lucene.util.BitSet in project elasticsearch by elastic.

the class GeoDistanceSortBuilder method build.

@Override
public SortFieldAndFormat build(QueryShardContext context) throws IOException {
    final boolean indexCreatedBeforeV2_0 = context.indexVersionCreated().before(Version.V_2_0_0);
    // validation was not available prior to 2.x, so to support bwc percolation queries we only ignore_malformed
    // on 2.x created indexes
    GeoPoint[] localPoints = points.toArray(new GeoPoint[points.size()]);
    if (!indexCreatedBeforeV2_0 && !GeoValidationMethod.isIgnoreMalformed(validation)) {
        for (GeoPoint point : localPoints) {
            if (GeoUtils.isValidLatitude(point.lat()) == false) {
                throw new ElasticsearchParseException("illegal latitude value [{}] for [GeoDistanceSort] for field [{}].", point.lat(), fieldName);
            }
            if (GeoUtils.isValidLongitude(point.lon()) == false) {
                throw new ElasticsearchParseException("illegal longitude value [{}] for [GeoDistanceSort] for field [{}].", point.lon(), fieldName);
            }
        }
    }
    if (GeoValidationMethod.isCoerce(validation)) {
        for (GeoPoint point : localPoints) {
            GeoUtils.normalizePoint(point, true, true);
        }
    }
    boolean reverse = (order == SortOrder.DESC);
    final MultiValueMode finalSortMode;
    if (sortMode == null) {
        finalSortMode = reverse ? MultiValueMode.MAX : MultiValueMode.MIN;
    } else {
        finalSortMode = MultiValueMode.fromString(sortMode.toString());
    }
    MappedFieldType fieldType = context.fieldMapper(fieldName);
    if (fieldType == null) {
        throw new IllegalArgumentException("failed to find mapper for [" + fieldName + "] for geo distance based sort");
    }
    final IndexGeoPointFieldData geoIndexFieldData = context.getForField(fieldType);
    final Nested nested = resolveNested(context, nestedPath, nestedFilter);
    if (// only works with 5.x geo_point
    geoIndexFieldData.getClass() == LatLonPointDVIndexFieldData.class && nested == null && // LatLonDocValuesField internally picks the closest point
    finalSortMode == MultiValueMode.MIN && unit == DistanceUnit.METERS && reverse == false && localPoints.length == 1) {
        return new SortFieldAndFormat(LatLonDocValuesField.newDistanceSort(fieldName, localPoints[0].lat(), localPoints[0].lon()), DocValueFormat.RAW);
    }
    IndexFieldData.XFieldComparatorSource geoDistanceComparatorSource = new IndexFieldData.XFieldComparatorSource() {

        @Override
        public SortField.Type reducedType() {
            return SortField.Type.DOUBLE;
        }

        @Override
        public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
            return new FieldComparator.DoubleComparator(numHits, null, null) {

                @Override
                protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
                    final MultiGeoPointValues geoPointValues = geoIndexFieldData.load(context).getGeoPointValues();
                    final SortedNumericDoubleValues distanceValues = GeoUtils.distanceValues(geoDistance, unit, geoPointValues, localPoints);
                    final NumericDoubleValues selectedValues;
                    if (nested == null) {
                        selectedValues = finalSortMode.select(distanceValues, Double.POSITIVE_INFINITY);
                    } else {
                        final BitSet rootDocs = nested.rootDocs(context);
                        final DocIdSetIterator innerDocs = nested.innerDocs(context);
                        selectedValues = finalSortMode.select(distanceValues, Double.POSITIVE_INFINITY, rootDocs, innerDocs, context.reader().maxDoc());
                    }
                    return selectedValues.getRawDoubleValues();
                }
            };
        }
    };
    return new SortFieldAndFormat(new SortField(fieldName, geoDistanceComparatorSource, reverse), DocValueFormat.RAW);
}
Also used : IndexGeoPointFieldData(org.elasticsearch.index.fielddata.IndexGeoPointFieldData) Nested(org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested) BitSet(org.apache.lucene.util.BitSet) SortField(org.apache.lucene.search.SortField) NumericDoubleValues(org.elasticsearch.index.fielddata.NumericDoubleValues) SortedNumericDoubleValues(org.elasticsearch.index.fielddata.SortedNumericDoubleValues) MultiValueMode(org.elasticsearch.search.MultiValueMode) GeoPoint(org.elasticsearch.common.geo.GeoPoint) LatLonPointDVIndexFieldData(org.elasticsearch.index.fielddata.plain.AbstractLatLonPointDVIndexFieldData.LatLonPointDVIndexFieldData) ElasticsearchParseException(org.elasticsearch.ElasticsearchParseException) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) LatLonPointDVIndexFieldData(org.elasticsearch.index.fielddata.plain.AbstractLatLonPointDVIndexFieldData.LatLonPointDVIndexFieldData) IndexFieldData(org.elasticsearch.index.fielddata.IndexFieldData) SortedNumericDoubleValues(org.elasticsearch.index.fielddata.SortedNumericDoubleValues) MultiGeoPointValues(org.elasticsearch.index.fielddata.MultiGeoPointValues) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 3 with BitSet

use of org.apache.lucene.util.BitSet in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method doTestRandomAdvance.

private void doTestRandomAdvance(FieldCreator fieldCreator) throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());
    Directory directory = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    conf.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter w = new RandomIndexWriter(random(), directory, conf);
    int numChunks = atLeast(10);
    int id = 0;
    Set<Integer> missingSet = new HashSet<>();
    for (int i = 0; i < numChunks; i++) {
        // change sparseness for each chunk
        double sparseChance = random().nextDouble();
        int docCount = atLeast(1000);
        for (int j = 0; j < docCount; j++) {
            Document doc = new Document();
            doc.add(new StoredField("id", id));
            if (random().nextDouble() > sparseChance) {
                doc.add(fieldCreator.next());
            } else {
                missingSet.add(id);
            }
            id++;
            w.addDocument(doc);
        }
    }
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    // Now search the index:
    IndexReader r = w.getReader();
    BitSet missing = new FixedBitSet(r.maxDoc());
    for (int docID = 0; docID < r.maxDoc(); docID++) {
        Document doc = r.document(docID);
        if (missingSet.contains(doc.getField("id").numericValue())) {
            missing.set(docID);
        }
    }
    for (int iter = 0; iter < 100; iter++) {
        DocIdSetIterator values = fieldCreator.iterator(r);
        assertEquals(-1, values.docID());
        while (true) {
            int docID;
            if (random().nextBoolean()) {
                docID = values.nextDoc();
            } else {
                int range;
                if (random().nextInt(10) == 7) {
                    // big jump
                    range = r.maxDoc() - values.docID();
                } else {
                    // small jump
                    range = 25;
                }
                int inc = TestUtil.nextInt(random(), 1, range);
                docID = values.advance(values.docID() + inc);
            }
            if (docID == NO_MORE_DOCS) {
                break;
            }
            assertFalse(missing.get(docID));
        }
    }
    IOUtils.close(r, w, directory);
}
Also used : BitSet(org.apache.lucene.util.BitSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) FixedBitSet(org.apache.lucene.util.FixedBitSet) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 4 with BitSet

use of org.apache.lucene.util.BitSet in project lucene-solr by apache.

the class TestJoinUtil method executeRandomJoin.

private void executeRandomJoin(boolean multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) throws Exception {
    for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) {
        if (VERBOSE) {
            System.out.println("indexIter=" + indexIter);
        }
        IndexIterationContext context = createContext(numberOfDocumentsToIndex, multipleValuesPerDocument, false);
        IndexSearcher indexSearcher = context.searcher;
        for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) {
            if (VERBOSE) {
                System.out.println("searchIter=" + searchIter);
            }
            int r = random().nextInt(context.randomUniqueValues.length);
            boolean from = context.randomFrom[r];
            String randomValue = context.randomUniqueValues[r];
            BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
            final Query actualQuery = new TermQuery(new Term("value", randomValue));
            if (VERBOSE) {
                System.out.println("actualQuery=" + actualQuery);
            }
            final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
            if (VERBOSE) {
                System.out.println("scoreMode=" + scoreMode);
            }
            final Query joinQuery;
            {
                // single val can be handled by multiple-vals
                final boolean muliValsQuery = multipleValuesPerDocument || random().nextBoolean();
                final String fromField = from ? "from" : "to";
                final String toField = from ? "to" : "from";
                int surpriseMe = random().nextInt(2);
                switch(surpriseMe) {
                    case 0:
                        Class<? extends Number> numType;
                        String suffix;
                        if (random().nextBoolean()) {
                            numType = Integer.class;
                            suffix = "INT";
                        } else if (random().nextBoolean()) {
                            numType = Float.class;
                            suffix = "FLOAT";
                        } else if (random().nextBoolean()) {
                            numType = Long.class;
                            suffix = "LONG";
                        } else {
                            numType = Double.class;
                            suffix = "DOUBLE";
                        }
                        joinQuery = JoinUtil.createJoinQuery(fromField + suffix, muliValsQuery, toField + suffix, numType, actualQuery, indexSearcher, scoreMode);
                        break;
                    case 1:
                        joinQuery = JoinUtil.createJoinQuery(fromField, muliValsQuery, toField, actualQuery, indexSearcher, scoreMode);
                        break;
                    default:
                        throw new RuntimeException("unexpected value " + surpriseMe);
                }
            }
            if (VERBOSE) {
                System.out.println("joinQuery=" + joinQuery);
            }
            // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
            final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
            final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
            indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
            // Asserting bit set...
            assertBitSet(expectedResult, actualResult, indexSearcher);
            // Asserting TopDocs...
            TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
            TopDocs actualTopDocs = topScoreDocCollector.topDocs();
            assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
        }
        context.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) FieldValueQuery(org.apache.lucene.search.FieldValueQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) BitSet(org.apache.lucene.util.BitSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) Term(org.apache.lucene.index.Term) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) TopDocs(org.apache.lucene.search.TopDocs) FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 5 with BitSet

use of org.apache.lucene.util.BitSet in project lucene-solr by apache.

the class TestQueryBitSetProducer method testSimple.

public void testSimple() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    w.addDocument(new Document());
    DirectoryReader reader = w.getReader();
    QueryBitSetProducer producer = new QueryBitSetProducer(new MatchNoDocsQuery());
    assertNull(producer.getBitSet(reader.leaves().get(0)));
    assertEquals(1, producer.cache.size());
    producer = new QueryBitSetProducer(new MatchAllDocsQuery());
    BitSet bitSet = producer.getBitSet(reader.leaves().get(0));
    assertEquals(1, bitSet.length());
    assertEquals(true, bitSet.get(0));
    assertEquals(1, producer.cache.size());
    IOUtils.close(reader, w, dir);
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) FilterDirectoryReader(org.apache.lucene.index.FilterDirectoryReader) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) BitSet(org.apache.lucene.util.BitSet) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

BitSet (org.apache.lucene.util.BitSet)26 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)14 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)10 FixedBitSet (org.apache.lucene.util.FixedBitSet)7 Scorer (org.apache.lucene.search.Scorer)6 IntPoint (org.apache.lucene.document.IntPoint)4 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)4 IndexSearcher (org.apache.lucene.search.IndexSearcher)4 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)4 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)4 Query (org.apache.lucene.search.Query)4 Weight (org.apache.lucene.search.Weight)4 Document (org.apache.lucene.document.Document)3 DoublePoint (org.apache.lucene.document.DoublePoint)3 FloatPoint (org.apache.lucene.document.FloatPoint)3 LongPoint (org.apache.lucene.document.LongPoint)3 HashSet (java.util.HashSet)2 Set (java.util.Set)2 DirectoryReader (org.apache.lucene.index.DirectoryReader)2 FilterDirectoryReader (org.apache.lucene.index.FilterDirectoryReader)2