Search in sources :

Example 31 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.

the class TestMemoryIndex method testDocValuesDoNotAffectBoostPositionsOrOffset.

public void testDocValuesDoNotAffectBoostPositionsOrOffset() throws Exception {
    Document doc = new Document();
    doc.add(new BinaryDocValuesField("text", new BytesRef("quick brown fox")));
    doc.add(new TextField("text", "quick brown fox", Field.Store.NO));
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer, true, true);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    TermsEnum tenum = leafReader.terms("text").iterator();
    assertEquals("brown", tenum.next().utf8ToString());
    PostingsEnum penum = tenum.postings(null, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(1, penum.nextPosition());
    assertEquals(6, penum.startOffset());
    assertEquals(11, penum.endOffset());
    assertEquals("fox", tenum.next().utf8ToString());
    penum = tenum.postings(penum, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(2, penum.nextPosition());
    assertEquals(12, penum.startOffset());
    assertEquals(15, penum.endOffset());
    assertEquals("quick", tenum.next().utf8ToString());
    penum = tenum.postings(penum, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(0, penum.nextPosition());
    assertEquals(0, penum.startOffset());
    assertEquals(5, penum.endOffset());
    BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("text");
    assertEquals(0, binaryDocValues.nextDoc());
    assertEquals("quick brown fox", binaryDocValues.binaryValue().utf8ToString());
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) PostingsEnum(org.apache.lucene.index.PostingsEnum) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 32 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.

the class TestMemoryIndex method testPointValuesDoNotAffectPositionsOrOffset.

public void testPointValuesDoNotAffectPositionsOrOffset() throws Exception {
    MemoryIndex mi = new MemoryIndex(true, true);
    mi.addField(new TextField("text", "quick brown fox", Field.Store.NO), analyzer);
    mi.addField(new BinaryPoint("text", "quick".getBytes(StandardCharsets.UTF_8)), analyzer);
    mi.addField(new BinaryPoint("text", "brown".getBytes(StandardCharsets.UTF_8)), analyzer);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    TermsEnum tenum = leafReader.terms("text").iterator();
    assertEquals("brown", tenum.next().utf8ToString());
    PostingsEnum penum = tenum.postings(null, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(1, penum.nextPosition());
    assertEquals(6, penum.startOffset());
    assertEquals(11, penum.endOffset());
    assertEquals("fox", tenum.next().utf8ToString());
    penum = tenum.postings(penum, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(2, penum.nextPosition());
    assertEquals(12, penum.startOffset());
    assertEquals(15, penum.endOffset());
    assertEquals("quick", tenum.next().utf8ToString());
    penum = tenum.postings(penum, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(0, penum.nextPosition());
    assertEquals(0, penum.startOffset());
    assertEquals(5, penum.endOffset());
    IndexSearcher indexSearcher = mi.createSearcher();
    assertEquals(1, indexSearcher.count(BinaryPoint.newExactQuery("text", "quick".getBytes(StandardCharsets.UTF_8))));
    assertEquals(1, indexSearcher.count(BinaryPoint.newExactQuery("text", "brown".getBytes(StandardCharsets.UTF_8))));
    assertEquals(0, indexSearcher.count(BinaryPoint.newExactQuery("text", "jumps".getBytes(StandardCharsets.UTF_8))));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BinaryPoint(org.apache.lucene.document.BinaryPoint) LeafReader(org.apache.lucene.index.LeafReader) TextField(org.apache.lucene.document.TextField) PostingsEnum(org.apache.lucene.index.PostingsEnum) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 33 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.

the class LukeRequestHandler method getFirstLiveDoc.

// Just get a document with the term in it, the first one will do!
// Is there a better way to do this? Shouldn't actually be very costly
// to do it this way.
private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException {
    PostingsEnum postingsEnum = null;
    TermsEnum termsEnum = terms.iterator();
    BytesRef text;
    // Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way?
    for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) {
        text = termsEnum.next();
        if (text == null) {
            // Ran off the end of the terms enum without finding any live docs with that field in them.
            return null;
        }
        postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
        final Bits liveDocs = reader.getLiveDocs();
        if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            if (liveDocs != null && liveDocs.get(postingsEnum.docID())) {
                continue;
            }
            return reader.document(postingsEnum.docID());
        }
    }
    return null;
}
Also used : Bits(org.apache.lucene.util.Bits) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 34 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.

the class TestBlockJoin method testRandom.

public void testRandom() throws Exception {
    // We build two indices at once: one normalized (which
    // ToParentBlockJoinQuery/Collector,
    // ToChildBlockJoinQuery can query) and the other w/
    // the same docs, just fully denormalized:
    final Directory dir = newDirectory();
    final Directory joinDir = newDirectory();
    final int maxNumChildrenPerParent = 20;
    final int numParentDocs = TestUtil.nextInt(random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
    //final int numParentDocs = 30;
    // Values for parent fields:
    final String[][] parentFields = getRandomFields(numParentDocs / 2);
    // Values for child fields:
    final String[][] childFields = getRandomFields(numParentDocs);
    final boolean doDeletes = random().nextBoolean();
    final List<Integer> toDelete = new ArrayList<>();
    // TODO: parallel star join, nested join cases too!
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    final RandomIndexWriter joinW = new RandomIndexWriter(random(), joinDir);
    for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++) {
        Document parentDoc = new Document();
        Document parentJoinDoc = new Document();
        Field id = new StoredField("parentID", parentDocID);
        parentDoc.add(id);
        parentJoinDoc.add(id);
        parentJoinDoc.add(newStringField("isParent", "x", Field.Store.NO));
        id = new NumericDocValuesField("parentID", parentDocID);
        parentDoc.add(id);
        parentJoinDoc.add(id);
        parentJoinDoc.add(newStringField("isParent", "x", Field.Store.NO));
        for (int field = 0; field < parentFields.length; field++) {
            if (random().nextDouble() < 0.9) {
                String s = parentFields[field][random().nextInt(parentFields[field].length)];
                Field f = newStringField("parent" + field, s, Field.Store.NO);
                parentDoc.add(f);
                parentJoinDoc.add(f);
                f = new SortedDocValuesField("parent" + field, new BytesRef(s));
                parentDoc.add(f);
                parentJoinDoc.add(f);
            }
        }
        if (doDeletes) {
            parentDoc.add(new IntPoint("blockID", parentDocID));
            parentJoinDoc.add(new IntPoint("blockID", parentDocID));
        }
        final List<Document> joinDocs = new ArrayList<>();
        if (VERBOSE) {
            StringBuilder sb = new StringBuilder();
            sb.append("parentID=").append(parentDoc.get("parentID"));
            for (int fieldID = 0; fieldID < parentFields.length; fieldID++) {
                String s = parentDoc.get("parent" + fieldID);
                if (s != null) {
                    sb.append(" parent" + fieldID + "=" + s);
                }
            }
            System.out.println("  " + sb.toString());
        }
        final int numChildDocs = TestUtil.nextInt(random(), 1, maxNumChildrenPerParent);
        for (int childDocID = 0; childDocID < numChildDocs; childDocID++) {
            // Denormalize: copy all parent fields into child doc:
            Document childDoc = TestUtil.cloneDocument(parentDoc);
            Document joinChildDoc = new Document();
            joinDocs.add(joinChildDoc);
            Field childID = new StoredField("childID", childDocID);
            childDoc.add(childID);
            joinChildDoc.add(childID);
            childID = new NumericDocValuesField("childID", childDocID);
            childDoc.add(childID);
            joinChildDoc.add(childID);
            for (int childFieldID = 0; childFieldID < childFields.length; childFieldID++) {
                if (random().nextDouble() < 0.9) {
                    String s = childFields[childFieldID][random().nextInt(childFields[childFieldID].length)];
                    Field f = newStringField("child" + childFieldID, s, Field.Store.NO);
                    childDoc.add(f);
                    joinChildDoc.add(f);
                    f = new SortedDocValuesField("child" + childFieldID, new BytesRef(s));
                    childDoc.add(f);
                    joinChildDoc.add(f);
                }
            }
            if (VERBOSE) {
                StringBuilder sb = new StringBuilder();
                sb.append("childID=").append(joinChildDoc.get("childID"));
                for (int fieldID = 0; fieldID < childFields.length; fieldID++) {
                    String s = joinChildDoc.get("child" + fieldID);
                    if (s != null) {
                        sb.append(" child" + fieldID + "=" + s);
                    }
                }
                System.out.println("    " + sb.toString());
            }
            if (doDeletes) {
                joinChildDoc.add(new IntPoint("blockID", parentDocID));
            }
            w.addDocument(childDoc);
        }
        // Parent last:
        joinDocs.add(parentJoinDoc);
        joinW.addDocuments(joinDocs);
        if (doDeletes && random().nextInt(30) == 7) {
            toDelete.add(parentDocID);
        }
    }
    if (!toDelete.isEmpty()) {
        Query query = IntPoint.newSetQuery("blockID", toDelete);
        w.deleteDocuments(query);
        joinW.deleteDocuments(query);
    }
    final IndexReader r = w.getReader();
    w.close();
    final IndexReader joinR = joinW.getReader();
    joinW.close();
    if (VERBOSE) {
        System.out.println("TEST: reader=" + r);
        System.out.println("TEST: joinReader=" + joinR);
        Bits liveDocs = MultiFields.getLiveDocs(joinR);
        for (int docIDX = 0; docIDX < joinR.maxDoc(); docIDX++) {
            System.out.println("  docID=" + docIDX + " doc=" + joinR.document(docIDX) + " deleted?=" + (liveDocs != null && liveDocs.get(docIDX) == false));
        }
        PostingsEnum parents = MultiFields.getTermDocsEnum(joinR, "isParent", new BytesRef("x"));
        System.out.println("parent docIDs:");
        while (parents.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
            System.out.println("  " + parents.docID());
        }
    }
    final IndexSearcher s = newSearcher(r, false);
    final IndexSearcher joinS = newSearcher(joinR);
    final BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "x")));
    CheckJoinIndex.check(joinS.getIndexReader(), parentsFilter);
    final int iters = 200 * RANDOM_MULTIPLIER;
    for (int iter = 0; iter < iters; iter++) {
        if (VERBOSE) {
            System.out.println("TEST: iter=" + (1 + iter) + " of " + iters);
        }
        Query childQuery;
        if (random().nextInt(3) == 2) {
            final int childFieldID = random().nextInt(childFields.length);
            childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)]));
        } else if (random().nextInt(3) == 2) {
            BooleanQuery.Builder bq = new BooleanQuery.Builder();
            final int numClauses = TestUtil.nextInt(random(), 2, 4);
            boolean didMust = false;
            for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++) {
                Query clause;
                BooleanClause.Occur occur;
                if (!didMust && random().nextBoolean()) {
                    occur = random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                    clause = new TermQuery(randomChildTerm(childFields[0]));
                    didMust = true;
                } else {
                    occur = BooleanClause.Occur.SHOULD;
                    final int childFieldID = TestUtil.nextInt(random(), 1, childFields.length - 1);
                    clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)]));
                }
                bq.add(clause, occur);
            }
            childQuery = bq.build();
        } else {
            BooleanQuery.Builder bq = new BooleanQuery.Builder();
            bq.add(new TermQuery(randomChildTerm(childFields[0])), BooleanClause.Occur.MUST);
            final int childFieldID = TestUtil.nextInt(random(), 1, childFields.length - 1);
            bq.add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)])), random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
            childQuery = bq.build();
        }
        if (random().nextBoolean()) {
            childQuery = new RandomApproximationQuery(childQuery, random());
        }
        final ScoreMode agg = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
        final ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);
        // To run against the block-join index:
        final Query parentJoinQuery;
        // Same query as parentJoinQuery, but to run against
        // the fully denormalized index (so we can compare
        // results):
        final Query parentQuery;
        if (random().nextBoolean()) {
            parentQuery = childQuery;
            parentJoinQuery = childJoinQuery;
        } else {
            // AND parent field w/ child field
            final BooleanQuery.Builder bq = new BooleanQuery.Builder();
            final Term parentTerm = randomParentTerm(parentFields[0]);
            if (random().nextBoolean()) {
                bq.add(childJoinQuery, BooleanClause.Occur.MUST);
                bq.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
            } else {
                bq.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                bq.add(childJoinQuery, BooleanClause.Occur.MUST);
            }
            final BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
            if (random().nextBoolean()) {
                bq2.add(childQuery, BooleanClause.Occur.MUST);
                bq2.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
            } else {
                bq2.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                bq2.add(childQuery, BooleanClause.Occur.MUST);
            }
            parentJoinQuery = bq.build();
            parentQuery = bq2.build();
        }
        final Sort parentSort = getRandomSort("parent", parentFields.length);
        final Sort childSort = getRandomSort("child", childFields.length);
        if (VERBOSE) {
            System.out.println("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
        }
        // Merge both sorts:
        final List<SortField> sortFields = new ArrayList<>(Arrays.asList(parentSort.getSort()));
        sortFields.addAll(Arrays.asList(childSort.getSort()));
        final Sort parentAndChildSort = new Sort(sortFields.toArray(new SortField[sortFields.size()]));
        final TopDocs results = s.search(parentQuery, r.numDocs(), parentAndChildSort);
        if (VERBOSE) {
            System.out.println("\nTEST: normal index gets " + results.totalHits + " hits; sort=" + parentAndChildSort);
            final ScoreDoc[] hits = results.scoreDocs;
            for (int hitIDX = 0; hitIDX < hits.length; hitIDX++) {
                final Document doc = s.doc(hits[hitIDX].doc);
                //System.out.println("  score=" + hits[hitIDX].score + " parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")");
                System.out.println("  parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")");
                FieldDoc fd = (FieldDoc) hits[hitIDX];
                if (fd.fields != null) {
                    System.out.print("    " + fd.fields.length + " sort values: ");
                    for (Object o : fd.fields) {
                        if (o instanceof BytesRef) {
                            System.out.print(((BytesRef) o).utf8ToString() + " ");
                        } else {
                            System.out.print(o + " ");
                        }
                    }
                    System.out.println();
                }
            }
        }
        TopDocs joinedResults = joinS.search(parentJoinQuery, numParentDocs);
        SortedMap<Integer, TopDocs> joinResults = new TreeMap<>();
        for (ScoreDoc parentHit : joinedResults.scoreDocs) {
            ParentChildrenBlockJoinQuery childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQuery, parentHit.doc);
            TopDocs childTopDocs = joinS.search(childrenQuery, maxNumChildrenPerParent, childSort);
            final Document parentDoc = joinS.doc(parentHit.doc);
            joinResults.put(Integer.valueOf(parentDoc.get("parentID")), childTopDocs);
        }
        final int hitsPerGroup = TestUtil.nextInt(random(), 1, 20);
        if (VERBOSE) {
            System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.size()) + " groups; hitsPerGroup=" + hitsPerGroup);
            if (joinResults != null) {
                for (Map.Entry<Integer, TopDocs> entry : joinResults.entrySet()) {
                    System.out.println("  group parentID=" + entry.getKey() + " (docID=" + entry.getKey() + ")");
                    for (ScoreDoc childHit : entry.getValue().scoreDocs) {
                        final Document doc = joinS.doc(childHit.doc);
                        //              System.out.println("    score=" + childHit.score + " childID=" + doc.get("childID") + " (docID=" + childHit.doc + ")");
                        System.out.println("    childID=" + doc.get("childID") + " child0=" + doc.get("child0") + " (docID=" + childHit.doc + ")");
                    }
                }
            }
        }
        if (results.totalHits == 0) {
            assertEquals(0, joinResults.size());
        } else {
            compareHits(r, joinR, results, joinResults);
            TopDocs b = joinS.search(childJoinQuery, 10);
            for (ScoreDoc hit : b.scoreDocs) {
                Explanation explanation = joinS.explain(childJoinQuery, hit.doc);
                Document document = joinS.doc(hit.doc - 1);
                int childId = Integer.parseInt(document.get("childID"));
                //System.out.println("  hit docID=" + hit.doc + " childId=" + childId + " parentId=" + document.get("parentID"));
                assertTrue(explanation.isMatch());
                assertEquals(hit.score, explanation.getValue(), 0.0f);
                Matcher m = Pattern.compile("Score based on ([0-9]+) child docs in range from ([0-9]+) to ([0-9]+), best match:").matcher(explanation.getDescription());
                assertTrue("Block Join description not matches", m.matches());
                assertTrue("Matched children not positive", Integer.parseInt(m.group(1)) > 0);
                assertEquals("Wrong child range start", hit.doc - 1 - childId, Integer.parseInt(m.group(2)));
                assertEquals("Wrong child range end", hit.doc - 1, Integer.parseInt(m.group(3)));
                Explanation childWeightExplanation = explanation.getDetails()[0];
                if ("sum of:".equals(childWeightExplanation.getDescription())) {
                    childWeightExplanation = childWeightExplanation.getDetails()[0];
                }
                assertTrue("Wrong child weight description", childWeightExplanation.getDescription().startsWith("weight(child"));
            }
        }
        // Test joining in the opposite direction (parent to
        // child):
        // Get random query against parent documents:
        final Query parentQuery2;
        if (random().nextInt(3) == 2) {
            final int fieldID = random().nextInt(parentFields.length);
            parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][random().nextInt(parentFields[fieldID].length)]));
        } else if (random().nextInt(3) == 2) {
            BooleanQuery.Builder bq = new BooleanQuery.Builder();
            final int numClauses = TestUtil.nextInt(random(), 2, 4);
            boolean didMust = false;
            for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++) {
                Query clause;
                BooleanClause.Occur occur;
                if (!didMust && random().nextBoolean()) {
                    occur = random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                    clause = new TermQuery(randomParentTerm(parentFields[0]));
                    didMust = true;
                } else {
                    occur = BooleanClause.Occur.SHOULD;
                    final int fieldID = TestUtil.nextInt(random(), 1, parentFields.length - 1);
                    clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][random().nextInt(parentFields[fieldID].length)]));
                }
                bq.add(clause, occur);
            }
            parentQuery2 = bq.build();
        } else {
            BooleanQuery.Builder bq = new BooleanQuery.Builder();
            bq.add(new TermQuery(randomParentTerm(parentFields[0])), BooleanClause.Occur.MUST);
            final int fieldID = TestUtil.nextInt(random(), 1, parentFields.length - 1);
            bq.add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][random().nextInt(parentFields[fieldID].length)])), random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
            parentQuery2 = bq.build();
        }
        if (VERBOSE) {
            System.out.println("\nTEST: top down: parentQuery2=" + parentQuery2);
        }
        // Maps parent query to child docs:
        final ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter);
        // To run against the block-join index:
        Query childJoinQuery2;
        // Same query as parentJoinQuery, but to run against
        // the fully denormalized index (so we can compare
        // results):
        Query childQuery2;
        if (random().nextBoolean()) {
            childQuery2 = parentQuery2;
            childJoinQuery2 = parentJoinQuery2;
        } else {
            final Term childTerm = randomChildTerm(childFields[0]);
            if (random().nextBoolean()) {
                // filtered case
                childJoinQuery2 = parentJoinQuery2;
                childJoinQuery2 = new BooleanQuery.Builder().add(childJoinQuery2, Occur.MUST).add(new TermQuery(childTerm), Occur.FILTER).build();
            } else {
                // AND child field w/ parent query:
                final BooleanQuery.Builder bq = new BooleanQuery.Builder();
                if (random().nextBoolean()) {
                    bq.add(parentJoinQuery2, BooleanClause.Occur.MUST);
                    bq.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                } else {
                    bq.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                    bq.add(parentJoinQuery2, BooleanClause.Occur.MUST);
                }
                childJoinQuery2 = bq.build();
            }
            if (random().nextBoolean()) {
                // filtered case
                childQuery2 = parentQuery2;
                childQuery2 = new BooleanQuery.Builder().add(childQuery2, Occur.MUST).add(new TermQuery(childTerm), Occur.FILTER).build();
            } else {
                final BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
                if (random().nextBoolean()) {
                    bq2.add(parentQuery2, BooleanClause.Occur.MUST);
                    bq2.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                } else {
                    bq2.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                    bq2.add(parentQuery2, BooleanClause.Occur.MUST);
                }
                childQuery2 = bq2.build();
            }
        }
        final Sort childSort2 = getRandomSort("child", childFields.length);
        // Search denormalized index:
        if (VERBOSE) {
            System.out.println("TEST: run top down query=" + childQuery2 + " sort=" + childSort2);
        }
        final TopDocs results2 = s.search(childQuery2, r.numDocs(), childSort2);
        if (VERBOSE) {
            System.out.println("  " + results2.totalHits + " totalHits:");
            for (ScoreDoc sd : results2.scoreDocs) {
                final Document doc = s.doc(sd.doc);
                System.out.println("  childID=" + doc.get("childID") + " parentID=" + doc.get("parentID") + " docID=" + sd.doc);
            }
        }
        // Search join index:
        if (VERBOSE) {
            System.out.println("TEST: run top down join query=" + childJoinQuery2 + " sort=" + childSort2);
        }
        TopDocs joinResults2 = joinS.search(childJoinQuery2, joinR.numDocs(), childSort2);
        if (VERBOSE) {
            System.out.println("  " + joinResults2.totalHits + " totalHits:");
            for (ScoreDoc sd : joinResults2.scoreDocs) {
                final Document doc = joinS.doc(sd.doc);
                final Document parentDoc = getParentDoc(joinR, parentsFilter, sd.doc);
                System.out.println("  childID=" + doc.get("childID") + " parentID=" + parentDoc.get("parentID") + " docID=" + sd.doc);
            }
        }
        compareChildHits(r, joinR, results2, joinResults2);
    }
    r.close();
    joinR.close();
    dir.close();
    joinDir.close();
}
Also used : ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) PostingsEnum(org.apache.lucene.index.PostingsEnum) Term(org.apache.lucene.index.Term) IntPoint(org.apache.lucene.document.IntPoint) Bits(org.apache.lucene.util.Bits) Map(java.util.Map) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Matcher(java.util.regex.Matcher) Occur(org.apache.lucene.search.BooleanClause.Occur) StringField(org.apache.lucene.document.StringField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) TreeMap(java.util.TreeMap) IntPoint(org.apache.lucene.document.IntPoint) IndexReader(org.apache.lucene.index.IndexReader)

Example 35 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.

the class TermsIncludingScoreQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (needsScores == false) {
        // We don't need scores then quickly change the query:
        TermsQuery termsQuery = new TermsQuery(toField, terms, fromField, fromQuery, topReaderContextId);
        return searcher.rewrite(termsQuery).createWeight(searcher, false, boost);
    }
    return new Weight(TermsIncludingScoreQuery.this) {

        @Override
        public void extractTerms(Set<Term> terms) {
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            Terms terms = context.reader().terms(toField);
            if (terms != null) {
                TermsEnum segmentTermsEnum = terms.iterator();
                BytesRef spare = new BytesRef();
                PostingsEnum postingsEnum = null;
                for (int i = 0; i < TermsIncludingScoreQuery.this.terms.size(); i++) {
                    if (segmentTermsEnum.seekExact(TermsIncludingScoreQuery.this.terms.get(ords[i], spare))) {
                        postingsEnum = segmentTermsEnum.postings(postingsEnum, PostingsEnum.NONE);
                        if (postingsEnum.advance(doc) == doc) {
                            final float score = TermsIncludingScoreQuery.this.scores[ords[i]];
                            return Explanation.match(score, "Score based on join value " + segmentTermsEnum.term().utf8ToString());
                        }
                    }
                }
            }
            return Explanation.noMatch("Not a match");
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            Terms terms = context.reader().terms(toField);
            if (terms == null) {
                return null;
            }
            // what is the runtime...seems ok?
            final long cost = context.reader().maxDoc() * terms.size();
            TermsEnum segmentTermsEnum = terms.iterator();
            if (multipleValuesPerDocument) {
                return new MVInOrderScorer(this, segmentTermsEnum, context.reader().maxDoc(), cost);
            } else {
                return new SVInOrderScorer(this, segmentTermsEnum, context.reader().maxDoc(), cost);
            }
        }
    };
}
Also used : Set(java.util.Set) FixedBitSet(org.apache.lucene.util.FixedBitSet) Terms(org.apache.lucene.index.Terms) Weight(org.apache.lucene.search.Weight) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

PostingsEnum (org.apache.lucene.index.PostingsEnum)74 BytesRef (org.apache.lucene.util.BytesRef)55 TermsEnum (org.apache.lucene.index.TermsEnum)51 Terms (org.apache.lucene.index.Terms)43 Fields (org.apache.lucene.index.Fields)18 LeafReader (org.apache.lucene.index.LeafReader)17 Term (org.apache.lucene.index.Term)16 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)15 Document (org.apache.lucene.document.Document)13 ArrayList (java.util.ArrayList)11 IndexReader (org.apache.lucene.index.IndexReader)10 TextField (org.apache.lucene.document.TextField)9 Directory (org.apache.lucene.store.Directory)9 Bits (org.apache.lucene.util.Bits)9 IOException (java.io.IOException)8 DirectoryReader (org.apache.lucene.index.DirectoryReader)7 IndexWriter (org.apache.lucene.index.IndexWriter)6 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)6 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)5 XContentBuilder (org.elasticsearch.common.xcontent.XContentBuilder)5