Search in sources :

Example 41 with IntPoint

use of org.apache.lucene.document.IntPoint in project lucene-solr by apache.

the class TestIndexSorting method assertNeedsIndexSortMerge.

private static void assertNeedsIndexSortMerge(SortField sortField, Consumer<Document> defaultValueConsumer, Consumer<Document> randomValueConsumer) throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    AssertingNeedsIndexSortCodec codec = new AssertingNeedsIndexSortCodec();
    iwc.setCodec(codec);
    Sort indexSort = new Sort(sortField, new SortField("id", SortField.Type.INT));
    iwc.setIndexSort(indexSort);
    LogMergePolicy policy = newLogMergePolicy();
    // make sure that merge factor is always > 2
    if (policy.getMergeFactor() <= 2) {
        policy.setMergeFactor(3);
    }
    iwc.setMergePolicy(policy);
    // add already sorted documents
    codec.numCalls = 0;
    codec.needsIndexSort = false;
    IndexWriter w = new IndexWriter(dir, iwc);
    boolean withValues = random().nextBoolean();
    for (int i = 100; i < 200; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(i), Store.YES));
        doc.add(new NumericDocValuesField("id", i));
        doc.add(new IntPoint("point", random().nextInt()));
        if (withValues) {
            defaultValueConsumer.accept(doc);
        }
        w.addDocument(doc);
        if (i % 10 == 0) {
            w.commit();
        }
    }
    Set<Integer> deletedDocs = new HashSet<>();
    int num = random().nextInt(20);
    for (int i = 0; i < num; i++) {
        int nextDoc = random().nextInt(100);
        w.deleteDocuments(new Term("id", Integer.toString(nextDoc)));
        deletedDocs.add(nextDoc);
    }
    w.commit();
    w.waitForMerges();
    w.forceMerge(1);
    assertTrue(codec.numCalls > 0);
    // merge sort is needed
    codec.numCalls = 0;
    codec.needsIndexSort = true;
    for (int i = 10; i >= 0; i--) {
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(i), Store.YES));
        doc.add(new NumericDocValuesField("id", i));
        doc.add(new IntPoint("point", random().nextInt()));
        if (withValues) {
            defaultValueConsumer.accept(doc);
        }
        w.addDocument(doc);
        w.commit();
    }
    w.commit();
    w.waitForMerges();
    w.forceMerge(1);
    assertTrue(codec.numCalls > 0);
    // segment sort is needed
    codec.needsIndexSort = true;
    codec.numCalls = 0;
    for (int i = 201; i < 300; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(i), Store.YES));
        doc.add(new NumericDocValuesField("id", i));
        doc.add(new IntPoint("point", random().nextInt()));
        randomValueConsumer.accept(doc);
        w.addDocument(doc);
        if (i % 10 == 0) {
            w.commit();
        }
    }
    w.commit();
    w.waitForMerges();
    w.forceMerge(1);
    assertTrue(codec.numCalls > 0);
    w.close();
    dir.close();
}
Also used : SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IntPoint(org.apache.lucene.document.IntPoint) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) Sort(org.apache.lucene.search.Sort) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 42 with IntPoint

use of org.apache.lucene.document.IntPoint in project lucene-solr by apache.

the class JoinUtil method createJoinQuery.

/**
   * Method for query time joining for numeric fields. It supports multi- and single- values longs, ints, floats and longs.
   * All considerations from {@link JoinUtil#createJoinQuery(String, boolean, String, Query, IndexSearcher, ScoreMode)} are applicable here too,
   * though memory consumption might be higher.
   * <p>
   *
   * @param fromField                 The from field to join from
   * @param multipleValuesPerDocument Whether the from field has multiple terms per document
   *                                  when true fromField might be {@link DocValuesType#SORTED_NUMERIC},
   *                                  otherwise fromField should be {@link DocValuesType#NUMERIC}
   * @param toField                   The to field to join to, should be {@link IntPoint}, {@link LongPoint}, {@link FloatPoint}
   *                                  or {@link DoublePoint}.
   * @param numericType               either {@link java.lang.Integer}, {@link java.lang.Long}, {@link java.lang.Float}
   *                                  or {@link java.lang.Double} it should correspond to toField types
   * @param fromQuery                 The query to match documents on the from side
   * @param fromSearcher              The searcher that executed the specified fromQuery
   * @param scoreMode                 Instructs how scores from the fromQuery are mapped to the returned query
   * @return a {@link Query} instance that can be used to join documents based on the
   *         terms in the from and to field
   * @throws IOException If I/O related errors occur
   */
public static Query createJoinQuery(String fromField, boolean multipleValuesPerDocument, String toField, Class<? extends Number> numericType, Query fromQuery, IndexSearcher fromSearcher, ScoreMode scoreMode) throws IOException {
    TreeSet<Long> joinValues = new TreeSet<>();
    Map<Long, Float> aggregatedScores = new HashMap<>();
    Map<Long, Integer> occurrences = new HashMap<>();
    boolean needsScore = scoreMode != ScoreMode.None;
    BiConsumer<Long, Float> scoreAggregator;
    if (scoreMode == ScoreMode.Max) {
        scoreAggregator = (key, score) -> {
            Float currentValue = aggregatedScores.putIfAbsent(key, score);
            if (currentValue != null) {
                aggregatedScores.put(key, Math.max(currentValue, score));
            }
        };
    } else if (scoreMode == ScoreMode.Min) {
        scoreAggregator = (key, score) -> {
            Float currentValue = aggregatedScores.putIfAbsent(key, score);
            if (currentValue != null) {
                aggregatedScores.put(key, Math.min(currentValue, score));
            }
        };
    } else if (scoreMode == ScoreMode.Total) {
        scoreAggregator = (key, score) -> {
            Float currentValue = aggregatedScores.putIfAbsent(key, score);
            if (currentValue != null) {
                aggregatedScores.put(key, currentValue + score);
            }
        };
    } else if (scoreMode == ScoreMode.Avg) {
        scoreAggregator = (key, score) -> {
            Float currentSore = aggregatedScores.putIfAbsent(key, score);
            if (currentSore != null) {
                aggregatedScores.put(key, currentSore + score);
            }
            Integer currentOccurrence = occurrences.putIfAbsent(key, 1);
            if (currentOccurrence != null) {
                occurrences.put(key, ++currentOccurrence);
            }
        };
    } else {
        scoreAggregator = (key, score) -> {
            throw new UnsupportedOperationException();
        };
    }
    LongFunction<Float> joinScorer;
    if (scoreMode == ScoreMode.Avg) {
        joinScorer = (joinValue) -> {
            Float aggregatedScore = aggregatedScores.get(joinValue);
            Integer occurrence = occurrences.get(joinValue);
            return aggregatedScore / occurrence;
        };
    } else {
        joinScorer = aggregatedScores::get;
    }
    Collector collector;
    if (multipleValuesPerDocument) {
        collector = new SimpleCollector() {

            SortedNumericDocValues sortedNumericDocValues;

            Scorer scorer;

            @Override
            public void collect(int doc) throws IOException {
                if (doc > sortedNumericDocValues.docID()) {
                    sortedNumericDocValues.advance(doc);
                }
                if (doc == sortedNumericDocValues.docID()) {
                    for (int i = 0; i < sortedNumericDocValues.docValueCount(); i++) {
                        long value = sortedNumericDocValues.nextValue();
                        joinValues.add(value);
                        if (needsScore) {
                            scoreAggregator.accept(value, scorer.score());
                        }
                    }
                }
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                sortedNumericDocValues = DocValues.getSortedNumeric(context.reader(), fromField);
            }

            @Override
            public void setScorer(Scorer scorer) throws IOException {
                this.scorer = scorer;
            }

            @Override
            public boolean needsScores() {
                return needsScore;
            }
        };
    } else {
        collector = new SimpleCollector() {

            NumericDocValues numericDocValues;

            Scorer scorer;

            private int lastDocID = -1;

            private boolean docsInOrder(int docID) {
                if (docID < lastDocID) {
                    throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " vs docID=" + docID);
                }
                lastDocID = docID;
                return true;
            }

            @Override
            public void collect(int doc) throws IOException {
                assert docsInOrder(doc);
                int dvDocID = numericDocValues.docID();
                if (dvDocID < doc) {
                    dvDocID = numericDocValues.advance(doc);
                }
                long value;
                if (dvDocID == doc) {
                    value = numericDocValues.longValue();
                } else {
                    value = 0;
                }
                joinValues.add(value);
                if (needsScore) {
                    scoreAggregator.accept(value, scorer.score());
                }
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                numericDocValues = DocValues.getNumeric(context.reader(), fromField);
                lastDocID = -1;
            }

            @Override
            public void setScorer(Scorer scorer) throws IOException {
                this.scorer = scorer;
            }

            @Override
            public boolean needsScores() {
                return needsScore;
            }
        };
    }
    fromSearcher.search(fromQuery, collector);
    Iterator<Long> iterator = joinValues.iterator();
    final int bytesPerDim;
    final BytesRef encoded = new BytesRef();
    final PointInSetIncludingScoreQuery.Stream stream;
    if (Integer.class.equals(numericType)) {
        bytesPerDim = Integer.BYTES;
        stream = new PointInSetIncludingScoreQuery.Stream() {

            @Override
            public BytesRef next() {
                if (iterator.hasNext()) {
                    long value = iterator.next();
                    IntPoint.encodeDimension((int) value, encoded.bytes, 0);
                    if (needsScore) {
                        score = joinScorer.apply(value);
                    }
                    return encoded;
                } else {
                    return null;
                }
            }
        };
    } else if (Long.class.equals(numericType)) {
        bytesPerDim = Long.BYTES;
        stream = new PointInSetIncludingScoreQuery.Stream() {

            @Override
            public BytesRef next() {
                if (iterator.hasNext()) {
                    long value = iterator.next();
                    LongPoint.encodeDimension(value, encoded.bytes, 0);
                    if (needsScore) {
                        score = joinScorer.apply(value);
                    }
                    return encoded;
                } else {
                    return null;
                }
            }
        };
    } else if (Float.class.equals(numericType)) {
        bytesPerDim = Float.BYTES;
        stream = new PointInSetIncludingScoreQuery.Stream() {

            @Override
            public BytesRef next() {
                if (iterator.hasNext()) {
                    long value = iterator.next();
                    FloatPoint.encodeDimension(Float.intBitsToFloat((int) value), encoded.bytes, 0);
                    if (needsScore) {
                        score = joinScorer.apply(value);
                    }
                    return encoded;
                } else {
                    return null;
                }
            }
        };
    } else if (Double.class.equals(numericType)) {
        bytesPerDim = Double.BYTES;
        stream = new PointInSetIncludingScoreQuery.Stream() {

            @Override
            public BytesRef next() {
                if (iterator.hasNext()) {
                    long value = iterator.next();
                    DoublePoint.encodeDimension(Double.longBitsToDouble(value), encoded.bytes, 0);
                    if (needsScore) {
                        score = joinScorer.apply(value);
                    }
                    return encoded;
                } else {
                    return null;
                }
            }
        };
    } else {
        throw new IllegalArgumentException("unsupported numeric type, only Integer, Long, Float and Double are supported");
    }
    encoded.bytes = new byte[bytesPerDim];
    encoded.length = bytesPerDim;
    if (needsScore) {
        return new PointInSetIncludingScoreQuery(scoreMode, fromQuery, multipleValuesPerDocument, toField, bytesPerDim, stream) {

            @Override
            protected String toString(byte[] value) {
                return toString.apply(value, numericType);
            }
        };
    } else {
        return new PointInSetQuery(toField, 1, bytesPerDim, stream) {

            @Override
            protected String toString(byte[] value) {
                return PointInSetIncludingScoreQuery.toString.apply(value, numericType);
            }
        };
    }
}
Also used : Query(org.apache.lucene.search.Query) LongPoint(org.apache.lucene.document.LongPoint) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) NumericDocValues(org.apache.lucene.index.NumericDocValues) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) DoublePoint(org.apache.lucene.document.DoublePoint) PointInSetQuery(org.apache.lucene.search.PointInSetQuery) Locale(java.util.Locale) Map(java.util.Map) BiConsumer(java.util.function.BiConsumer) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IntPoint(org.apache.lucene.document.IntPoint) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SortedDocValues(org.apache.lucene.index.SortedDocValues) SimpleCollector(org.apache.lucene.search.SimpleCollector) Scorer(org.apache.lucene.search.Scorer) Iterator(java.util.Iterator) LongFunction(java.util.function.LongFunction) FloatPoint(org.apache.lucene.document.FloatPoint) MultiDocValues(org.apache.lucene.index.MultiDocValues) BytesRef(org.apache.lucene.util.BytesRef) IOException(java.io.IOException) Collector(org.apache.lucene.search.Collector) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Function(org.apache.lucene.search.join.DocValuesTermsCollector.Function) DocValues(org.apache.lucene.index.DocValues) DocValuesType(org.apache.lucene.index.DocValuesType) LeafReader(org.apache.lucene.index.LeafReader) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) HashMap(java.util.HashMap) Scorer(org.apache.lucene.search.Scorer) SimpleCollector(org.apache.lucene.search.SimpleCollector) TreeSet(java.util.TreeSet) SimpleCollector(org.apache.lucene.search.SimpleCollector) Collector(org.apache.lucene.search.Collector) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) PointInSetQuery(org.apache.lucene.search.PointInSetQuery) IOException(java.io.IOException) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint)

Example 43 with IntPoint

use of org.apache.lucene.document.IntPoint in project lucene-solr by apache.

the class TestJoinUtil method addLinkFields.

private void addLinkFields(final Random random, Document document, final String fieldName, String linkValue, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) {
    document.add(newTextField(random, fieldName, linkValue, Field.Store.NO));
    final int linkInt = Integer.parseUnsignedInt(linkValue, 16);
    document.add(new IntPoint(fieldName + "INT", linkInt));
    document.add(new FloatPoint(fieldName + "FLOAT", linkInt));
    final long linkLong = linkInt << 32 | linkInt;
    document.add(new LongPoint(fieldName + "LONG", linkLong));
    document.add(new DoublePoint(fieldName + "DOUBLE", linkLong));
    if (multipleValuesPerDocument) {
        document.add(new SortedSetDocValuesField(fieldName, new BytesRef(linkValue)));
        document.add(new SortedNumericDocValuesField(fieldName + "INT", linkInt));
        document.add(new SortedNumericDocValuesField(fieldName + "FLOAT", Float.floatToRawIntBits(linkInt)));
        document.add(new SortedNumericDocValuesField(fieldName + "LONG", linkLong));
        document.add(new SortedNumericDocValuesField(fieldName + "DOUBLE", Double.doubleToRawLongBits(linkLong)));
    } else {
        document.add(new SortedDocValuesField(fieldName, new BytesRef(linkValue)));
        document.add(new NumericDocValuesField(fieldName + "INT", linkInt));
        document.add(new FloatDocValuesField(fieldName + "FLOAT", linkInt));
        document.add(new NumericDocValuesField(fieldName + "LONG", linkLong));
        document.add(new DoubleDocValuesField(fieldName + "DOUBLE", linkLong));
    }
    if (globalOrdinalJoin) {
        document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
    }
}
Also used : FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) IntPoint(org.apache.lucene.document.IntPoint) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) FloatPoint(org.apache.lucene.document.FloatPoint) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) DoublePoint(org.apache.lucene.document.DoublePoint) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 44 with IntPoint

use of org.apache.lucene.document.IntPoint in project lucene-solr by apache.

the class TestBlockJoin method makeJob.

// ... has multiple jobs
private Document makeJob(String skill, int year) {
    Document job = new Document();
    job.add(newStringField("skill", skill, Field.Store.YES));
    job.add(new IntPoint("year", year));
    job.add(new StoredField("year", year));
    return job;
}
Also used : IntPoint(org.apache.lucene.document.IntPoint) StoredField(org.apache.lucene.document.StoredField) Document(org.apache.lucene.document.Document)

Example 45 with IntPoint

use of org.apache.lucene.document.IntPoint in project lucene-solr by apache.

the class TestManyPointsInOldIndex method main.

// To regenerate the back index zip:
//
// Compile:
//   1) temporarily remove 'extends LuceneTestCase' above (else java doesn't see our static void main)
//   2) ant compile-test
//
// Run:
//   1) java -cp ../build/backward-codecs/classes/test:../build/core/classes/java org.apache.lucene.index.TestManyPointsInOldIndex
//
//  cd manypointsindex
//  zip manypointsindex.zip *
public static void main(String[] args) throws IOException {
    Directory dir = FSDirectory.open(Paths.get("manypointsindex"));
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig());
    for (int i = 0; i < 1025; i++) {
        Document doc = new Document();
        doc.add(new IntPoint("intpoint", 1025 - i));
        w.addDocument(doc);
    }
    w.close();
    dir.close();
}
Also used : IntPoint(org.apache.lucene.document.IntPoint) IndexWriter(org.apache.lucene.index.IndexWriter) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

IntPoint (org.apache.lucene.document.IntPoint)69 Document (org.apache.lucene.document.Document)64 Directory (org.apache.lucene.store.Directory)47 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)25 DoublePoint (org.apache.lucene.document.DoublePoint)23 FloatPoint (org.apache.lucene.document.FloatPoint)23 LongPoint (org.apache.lucene.document.LongPoint)23 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)22 IndexReader (org.apache.lucene.index.IndexReader)22 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)20 IndexWriter (org.apache.lucene.index.IndexWriter)19 BinaryPoint (org.apache.lucene.document.BinaryPoint)17 StoredField (org.apache.lucene.document.StoredField)16 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)16 RAMDirectory (org.apache.lucene.store.RAMDirectory)15 BytesRef (org.apache.lucene.util.BytesRef)14 StringField (org.apache.lucene.document.StringField)13 FSDirectory (org.apache.lucene.store.FSDirectory)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)11 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)9