Examples with DirectoryReader - org.apache.lucene.index.DirectoryReader

Example 96 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class SpatialTestCase method commit.

protected void commit() throws IOException {
    indexWriter.commit();
    DirectoryReader newReader = DirectoryReader.openIfChanged(indexReader);
    if (newReader != null) {
        IOUtils.close(indexReader);
        indexReader = newReader;
    }
    indexSearcher = newSearcher(indexReader);
}

Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader)

Example 97 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class MockDirectoryWrapper method close.

@Override
public synchronized void close() throws IOException {
    if (isOpen) {
        isOpen = false;
    } else {
        // but call it again on our wrapped dir
        in.close();
        return;
    }
    boolean success = false;
    try {
        // files that we tried to delete, but couldn't because readers were open.
        // all that matters is that we tried! (they will eventually go away)
        //   still open when we tried to delete
        maybeYield();
        if (openFiles == null) {
            openFiles = new HashMap<>();
            openFilesDeleted = new HashSet<>();
        }
        if (openFiles.size() > 0) {
            // print the first one as it's very verbose otherwise
            Exception cause = null;
            Iterator<Exception> stacktraces = openFileHandles.values().iterator();
            if (stacktraces.hasNext()) {
                cause = stacktraces.next();
            }
            // super() does not throw IOException currently:
            throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still " + openFiles.size() + " open files: " + openFiles, cause);
        }
        if (openLocks.size() > 0) {
            Exception cause = null;
            Iterator<RuntimeException> stacktraces = openLocks.values().iterator();
            if (stacktraces.hasNext()) {
                cause = stacktraces.next();
            }
            throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open locks: " + openLocks, cause);
        }
        randomIOExceptionRate = 0.0;
        randomIOExceptionRateOnOpen = 0.0;
        if ((getCheckIndexOnClose() || assertNoUnreferencedFilesOnClose) && DirectoryReader.indexExists(this)) {
            if (getCheckIndexOnClose()) {
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("\nNOTE: MockDirectoryWrapper: now crush");
                }
                // corrupt any unsynced-files
                crash();
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex");
                }
                TestUtil.checkIndex(this, getCrossCheckTermVectorsOnClose(), true, null);
            }
            // TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles
            if (assertNoUnreferencedFilesOnClose) {
                System.out.println("MDW: now assert no unref'd files at close");
                // now look for unreferenced files: discount ones that we tried to delete but could not
                Set<String> allFiles = new HashSet<>(Arrays.asList(listAll()));
                String[] startFiles = allFiles.toArray(new String[0]);
                IndexWriterConfig iwc = new IndexWriterConfig(null);
                iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
                // We must do this before opening writer otherwise writer will be angry if there are pending deletions:
                TestUtil.disableVirusChecker(in);
                new IndexWriter(in, iwc).rollback();
                String[] endFiles = in.listAll();
                Set<String> startSet = new TreeSet<>(Arrays.asList(startFiles));
                Set<String> endSet = new TreeSet<>(Arrays.asList(endFiles));
                startFiles = startSet.toArray(new String[0]);
                endFiles = endSet.toArray(new String[0]);
                if (!Arrays.equals(startFiles, endFiles)) {
                    List<String> removed = new ArrayList<>();
                    for (String fileName : startFiles) {
                        if (!endSet.contains(fileName)) {
                            removed.add(fileName);
                        }
                    }
                    List<String> added = new ArrayList<>();
                    for (String fileName : endFiles) {
                        if (!startSet.contains(fileName)) {
                            added.add(fileName);
                        }
                    }
                    String extras;
                    if (removed.size() != 0) {
                        extras = "\n\nThese files were removed: " + removed;
                    } else {
                        extras = "";
                    }
                    if (added.size() != 0) {
                        extras += "\n\nThese files were added (waaaaaaaaaat!): " + added;
                    }
                    throw new RuntimeException("unreferenced files: before delete:\n    " + Arrays.toString(startFiles) + "\n  after delete:\n    " + Arrays.toString(endFiles) + extras);
                }
                DirectoryReader ir1 = DirectoryReader.open(this);
                int numDocs1 = ir1.numDocs();
                ir1.close();
                new IndexWriter(this, new IndexWriterConfig(null)).close();
                DirectoryReader ir2 = DirectoryReader.open(this);
                int numDocs2 = ir2.numDocs();
                ir2.close();
                assert numDocs1 == numDocs2 : "numDocs changed after opening/closing IW: before=" + numDocs1 + " after=" + numDocs2;
            }
        }
        success = true;
    } finally {
        if (success) {
            IOUtils.close(in);
        } else {
            IOUtils.closeWhileHandlingException(in);
        }
    }
}

Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) NoSuchFileException(java.nio.file.NoSuchFileException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) IndexWriter(org.apache.lucene.index.IndexWriter) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 98 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class SimpleFacetsExample method drillSideways.

/** User drills down on 'Publish Date/2010', and we
   *  return facets for both 'Publish Date' and 'Author',
   *  using DrillSideways. */
private List<FacetResult> drillSideways() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    // Passing no baseQuery means we drill down on all
    // documents ("browse only"):
    DrillDownQuery q = new DrillDownQuery(config);
    // Now user drills down on Publish Date/2010:
    q.add("Publish Date", "2010");
    DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
    DrillSidewaysResult result = ds.search(q, 10);
    // Retrieve results
    List<FacetResult> facets = result.facets.getAllDims(10);
    indexReader.close();
    taxoReader.close();
    return facets;
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) DrillSidewaysResult(org.apache.lucene.facet.DrillSideways.DrillSidewaysResult) DirectoryReader(org.apache.lucene.index.DirectoryReader) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) DrillDownQuery(org.apache.lucene.facet.DrillDownQuery) DrillSideways(org.apache.lucene.facet.DrillSideways) FacetResult(org.apache.lucene.facet.FacetResult) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)

Example 99 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class SimpleFacetsExample method drillDown.

/** User drills down on 'Publish Date/2010', and we
   *  return facets for 'Author' */
private FacetResult drillDown() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    // Passing no baseQuery means we drill down on all
    // documents ("browse only"):
    DrillDownQuery q = new DrillDownQuery(config);
    // Now user drills down on Publish Date/2010:
    q.add("Publish Date", "2010");
    FacetsCollector fc = new FacetsCollector();
    FacetsCollector.search(searcher, q, 10, fc);
    // Retrieve results
    Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
    FacetResult result = facets.getTopChildren(10, "Author");
    indexReader.close();
    taxoReader.close();
    return result;
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FastTaxonomyFacetCounts(org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts) Facets(org.apache.lucene.facet.Facets) DirectoryReader(org.apache.lucene.index.DirectoryReader) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) DrillDownQuery(org.apache.lucene.facet.DrillDownQuery) FacetResult(org.apache.lucene.facet.FacetResult) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) FacetsCollector(org.apache.lucene.facet.FacetsCollector)

Example 100 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestGrouping method testRandom.

public void testRandom() throws Exception {
    int numberOfRuns = TestUtil.nextInt(random(), 3, 6);
    for (int iter = 0; iter < numberOfRuns; iter++) {
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter);
        }
        final int numDocs = TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
        //final int numDocs = _TestUtil.nextInt(random, 5, 20);
        final int numGroups = TestUtil.nextInt(random(), 1, numDocs);
        if (VERBOSE) {
            System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
        }
        final List<BytesRef> groups = new ArrayList<>();
        for (int i = 0; i < numGroups; i++) {
            String randomValue;
            do {
                // B/c of DV based impl we can't see the difference between an empty string and a null value.
                // For that reason we don't generate empty string
                // groups.
                randomValue = TestUtil.randomRealisticUnicodeString(random());
            //randomValue = TestUtil.randomSimpleString(random());
            } while ("".equals(randomValue));
            groups.add(new BytesRef(randomValue));
        }
        final String[] contentStrings = new String[TestUtil.nextInt(random(), 2, 20)];
        if (VERBOSE) {
            System.out.println("TEST: create fake content");
        }
        for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) {
            final StringBuilder sb = new StringBuilder();
            sb.append("real").append(random().nextInt(3)).append(' ');
            final int fakeCount = random().nextInt(10);
            for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) {
                sb.append("fake ");
            }
            contentStrings[contentIDX] = sb.toString();
            if (VERBOSE) {
                System.out.println("  content=" + sb.toString());
            }
        }
        Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
        Document doc = new Document();
        Document docNoGroup = new Document();
        Field idvGroupField = new SortedDocValuesField("group", new BytesRef());
        doc.add(idvGroupField);
        docNoGroup.add(idvGroupField);
        Field group = newStringField("group", "", Field.Store.NO);
        doc.add(group);
        Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
        doc.add(sort1);
        docNoGroup.add(sort1);
        Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
        doc.add(sort2);
        docNoGroup.add(sort2);
        Field content = newTextField("content", "", Field.Store.NO);
        doc.add(content);
        docNoGroup.add(content);
        NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
        doc.add(idDV);
        docNoGroup.add(idDV);
        final GroupDoc[] groupDocs = new GroupDoc[numDocs];
        for (int i = 0; i < numDocs; i++) {
            final BytesRef groupValue;
            if (random().nextInt(24) == 17) {
                // So we test the "doc doesn't have the group'd
                // field" case:
                groupValue = null;
            } else {
                groupValue = groups.get(random().nextInt(groups.size()));
            }
            final GroupDoc groupDoc = new GroupDoc(i, groupValue, groups.get(random().nextInt(groups.size())), groups.get(random().nextInt(groups.size())), contentStrings[random().nextInt(contentStrings.length)]);
            if (VERBOSE) {
                System.out.println("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString());
            }
            groupDocs[i] = groupDoc;
            if (groupDoc.group != null) {
                group.setStringValue(groupDoc.group.utf8ToString());
                idvGroupField.setBytesValue(BytesRef.deepCopyOf(groupDoc.group));
            } else {
                // TODO: not true
                // Must explicitly set empty string, else eg if
                // the segment has all docs missing the field then
                // we get null back instead of empty BytesRef:
                idvGroupField.setBytesValue(new BytesRef());
            }
            sort1.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort1));
            sort2.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort2));
            content.setStringValue(groupDoc.content);
            idDV.setLongValue(groupDoc.id);
            if (groupDoc.group == null) {
                w.addDocument(docNoGroup);
            } else {
                w.addDocument(doc);
            }
        }
        final GroupDoc[] groupDocsByID = new GroupDoc[groupDocs.length];
        System.arraycopy(groupDocs, 0, groupDocsByID, 0, groupDocs.length);
        final DirectoryReader r = w.getReader();
        w.close();
        NumericDocValues values = MultiDocValues.getNumericValues(r, "id");
        int[] docIDToID = new int[r.maxDoc()];
        for (int i = 0; i < r.maxDoc(); i++) {
            assertEquals(i, values.nextDoc());
            docIDToID[i] = (int) values.longValue();
        }
        DirectoryReader rBlocks = null;
        Directory dirBlocks = null;
        final IndexSearcher s = newSearcher(r);
        if (VERBOSE) {
            System.out.println("\nTEST: searcher=" + s);
        }
        final ShardState shards = new ShardState(s);
        Set<Integer> seenIDs = new HashSet<>();
        for (int contentID = 0; contentID < 3; contentID++) {
            final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                int idValue = docIDToID[hit.doc];
                final GroupDoc gd = groupDocs[idValue];
                seenIDs.add(idValue);
                assertTrue(gd.score == 0.0);
                gd.score = hit.score;
                assertEquals(gd.id, idValue);
            }
        }
        // make sure all groups were seen across the hits
        assertEquals(groupDocs.length, seenIDs.size());
        for (GroupDoc gd : groupDocs) {
            assertTrue(Float.isFinite(gd.score));
            assertTrue(gd.score >= 0.0);
        }
        // Build 2nd index, where docs are added in blocks by
        // group, so we can use single pass collector
        dirBlocks = newDirectory();
        rBlocks = getDocBlockReader(dirBlocks, groupDocs);
        final Query lastDocInBlock = new TermQuery(new Term("groupend", "x"));
        final IndexSearcher sBlocks = newSearcher(rBlocks);
        final ShardState shardsBlocks = new ShardState(sBlocks);
        // ReaderBlocks only increases maxDoc() vs reader, which
        // means a monotonic shift in scores, so we can
        // reliably remap them w/ Map:
        final Map<String, Map<Float, Float>> scoreMap = new HashMap<>();
        values = MultiDocValues.getNumericValues(rBlocks, "id");
        assertNotNull(values);
        int[] docIDToIDBlocks = new int[rBlocks.maxDoc()];
        for (int i = 0; i < rBlocks.maxDoc(); i++) {
            assertEquals(i, values.nextDoc());
            docIDToIDBlocks[i] = (int) values.longValue();
        }
        //System.out.println("fixup score2");
        for (int contentID = 0; contentID < 3; contentID++) {
            //System.out.println("  term=real" + contentID);
            final Map<Float, Float> termScoreMap = new HashMap<>();
            scoreMap.put("real" + contentID, termScoreMap);
            //System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) +
            //" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID)));
            final ScoreDoc[] hits = sBlocks.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                final GroupDoc gd = groupDocsByID[docIDToIDBlocks[hit.doc]];
                assertTrue(gd.score2 == 0.0);
                gd.score2 = hit.score;
                assertEquals(gd.id, docIDToIDBlocks[hit.doc]);
                //System.out.println("    score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks[hit.doc]);
                termScoreMap.put(gd.score, gd.score2);
            }
        }
        for (int searchIter = 0; searchIter < 100; searchIter++) {
            if (VERBOSE) {
                System.out.println("\nTEST: searchIter=" + searchIter);
            }
            final String searchTerm = "real" + random().nextInt(3);
            final boolean fillFields = random().nextBoolean();
            boolean getScores = random().nextBoolean();
            final boolean getMaxScores = random().nextBoolean();
            final Sort groupSort = getRandomSort();
            //final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)});
            final Sort docSort = getRandomSort();
            getScores |= (groupSort.needsScores() || docSort.needsScores());
            final int topNGroups = TestUtil.nextInt(random(), 1, 30);
            //final int topNGroups = 10;
            final int docsPerGroup = TestUtil.nextInt(random(), 1, 50);
            final int groupOffset = TestUtil.nextInt(random(), 0, (topNGroups - 1) / 2);
            //final int groupOffset = 0;
            final int docOffset = TestUtil.nextInt(random(), 0, docsPerGroup - 1);
            //final int docOffset = 0;
            final boolean doCache = random().nextBoolean();
            final boolean doAllGroups = random().nextBoolean();
            if (VERBOSE) {
                System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.docFreq(new Term("content", searchTerm)) + " dFBlock=" + rBlocks.docFreq(new Term("content", searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
            }
            String groupField = "group";
            if (VERBOSE) {
                System.out.println("  groupField=" + groupField);
            }
            final FirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, groupOffset + topNGroups);
            final CachingCollector cCache;
            final Collector c;
            final AllGroupsCollector<?> allGroupsCollector;
            if (doAllGroups) {
                allGroupsCollector = createAllGroupsCollector(c1, groupField);
            } else {
                allGroupsCollector = null;
            }
            final boolean useWrappingCollector = random().nextBoolean();
            if (doCache) {
                final double maxCacheMB = random().nextDouble();
                if (VERBOSE) {
                    System.out.println("TEST: maxCacheMB=" + maxCacheMB);
                }
                if (useWrappingCollector) {
                    if (doAllGroups) {
                        cCache = CachingCollector.create(c1, true, maxCacheMB);
                        c = MultiCollector.wrap(cCache, allGroupsCollector);
                    } else {
                        c = cCache = CachingCollector.create(c1, true, maxCacheMB);
                    }
                } else {
                    // Collect only into cache, then replay multiple times:
                    c = cCache = CachingCollector.create(true, maxCacheMB);
                }
            } else {
                cCache = null;
                if (doAllGroups) {
                    c = MultiCollector.wrap(c1, allGroupsCollector);
                } else {
                    c = c1;
                }
            }
            // Search top reader:
            final Query query = new TermQuery(new Term("content", searchTerm));
            s.search(query, c);
            if (doCache && !useWrappingCollector) {
                if (cCache.isCached()) {
                    // Replay for first-pass grouping
                    cCache.replay(c1);
                    if (doAllGroups) {
                        // Replay for all groups:
                        cCache.replay(allGroupsCollector);
                    }
                } else {
                    // Replay by re-running search:
                    s.search(query, c1);
                    if (doAllGroups) {
                        s.search(query, allGroupsCollector);
                    }
                }
            }
            // Get 1st pass top groups
            final Collection<SearchGroup<BytesRef>> topGroups = getSearchGroups(c1, groupOffset, fillFields);
            final TopGroups<BytesRef> groupsResult;
            if (VERBOSE) {
                System.out.println("TEST: first pass topGroups");
                if (topGroups == null) {
                    System.out.println("  null");
                } else {
                    for (SearchGroup<BytesRef> searchGroup : topGroups) {
                        System.out.println("  " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue) + ": " + Arrays.deepToString(searchGroup.sortValues));
                    }
                }
            }
            // Get 1st pass top groups using shards
            final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, true, true);
            final TopGroupsCollector<?> c2;
            if (topGroups != null) {
                if (VERBOSE) {
                    System.out.println("TEST: topGroups");
                    for (SearchGroup<BytesRef> searchGroup : topGroups) {
                        System.out.println("  " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
                    }
                }
                c2 = createSecondPassCollector(c1, groupSort, docSort, groupOffset, docOffset + docsPerGroup, getScores, getMaxScores, fillFields);
                if (doCache) {
                    if (cCache.isCached()) {
                        if (VERBOSE) {
                            System.out.println("TEST: cache is intact");
                        }
                        cCache.replay(c2);
                    } else {
                        if (VERBOSE) {
                            System.out.println("TEST: cache was too large");
                        }
                        s.search(query, c2);
                    }
                } else {
                    s.search(query, c2);
                }
                if (doAllGroups) {
                    TopGroups<BytesRef> tempTopGroups = getTopGroups(c2, docOffset);
                    groupsResult = new TopGroups<>(tempTopGroups, allGroupsCollector.getGroupCount());
                } else {
                    groupsResult = getTopGroups(c2, docOffset);
                }
            } else {
                c2 = null;
                groupsResult = null;
                if (VERBOSE) {
                    System.out.println("TEST:   no results");
                }
            }
            final TopGroups<BytesRef> expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
            if (VERBOSE) {
                if (expectedGroups == null) {
                    System.out.println("TEST: no expected groups");
                } else {
                    System.out.println("TEST: expected groups totalGroupedHitCount=" + expectedGroups.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : expectedGroups.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits + " scoreDocs.len=" + gd.scoreDocs.length);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + sd.doc + " score=" + sd.score);
                        }
                    }
                }
                if (groupsResult == null) {
                    System.out.println("TEST: no matched groups");
                } else {
                    System.out.println("TEST: matched groups totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : groupsResult.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToID[sd.doc] + " score=" + sd.score);
                        }
                    }
                    if (searchIter == 14) {
                        for (int docIDX = 0; docIDX < s.getIndexReader().maxDoc(); docIDX++) {
                            System.out.println("ID=" + docIDToID[docIDX] + " explain=" + s.explain(query, docIDX));
                        }
                    }
                }
                if (topGroupsShards == null) {
                    System.out.println("TEST: no matched-merged groups");
                } else {
                    System.out.println("TEST: matched-merged groups totalGroupedHitCount=" + topGroupsShards.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : topGroupsShards.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToID[sd.doc] + " score=" + sd.score);
                        }
                    }
                }
            }
            assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores, true);
            // Confirm merged shards match:
            assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores, true);
            if (topGroupsShards != null) {
                verifyShards(shards.docStarts, topGroupsShards);
            }
            final boolean needsScores = getScores || getMaxScores || docSort == null;
            final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset + topNGroups, needsScores, sBlocks.createNormalizedWeight(lastDocInBlock, false));
            final AllGroupsCollector<BytesRef> allGroupsCollector2;
            final Collector c4;
            if (doAllGroups) {
                // NOTE: must be "group" and not "group_dv"
                // (groupField) because we didn't index doc
                // values in the block index:
                allGroupsCollector2 = new AllGroupsCollector<>(new TermGroupSelector("group"));
                c4 = MultiCollector.wrap(c3, allGroupsCollector2);
            } else {
                allGroupsCollector2 = null;
                c4 = c3;
            }
            // Get block grouping result:
            sBlocks.search(query, c4);
            @SuppressWarnings({ "unchecked", "rawtypes" }) final TopGroups<BytesRef> tempTopGroupsBlocks = (TopGroups<BytesRef>) c3.getTopGroups(docSort, groupOffset, docOffset, docOffset + docsPerGroup, fillFields);
            final TopGroups<BytesRef> groupsResultBlocks;
            if (doAllGroups && tempTopGroupsBlocks != null) {
                assertEquals((int) tempTopGroupsBlocks.totalGroupCount, allGroupsCollector2.getGroupCount());
                groupsResultBlocks = new TopGroups<>(tempTopGroupsBlocks, allGroupsCollector2.getGroupCount());
            } else {
                groupsResultBlocks = tempTopGroupsBlocks;
            }
            if (VERBOSE) {
                if (groupsResultBlocks == null) {
                    System.out.println("TEST: no block groups");
                } else {
                    System.out.println("TEST: block groups totalGroupedHitCount=" + groupsResultBlocks.totalGroupedHitCount);
                    boolean first = true;
                    for (GroupDocs<BytesRef> gd : groupsResultBlocks.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToIDBlocks[sd.doc] + " score=" + sd.score);
                            if (first) {
                                System.out.println("explain: " + sBlocks.explain(query, sd.doc));
                                first = false;
                            }
                        }
                    }
                }
            }
            // Get shard'd block grouping result:
            final TopGroups<BytesRef> topGroupsBlockShards = searchShards(sBlocks, shardsBlocks.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, false, false);
            if (expectedGroups != null) {
                // Fixup scores for reader2
                for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                    for (ScoreDoc hit : groupDocsHits.scoreDocs) {
                        final GroupDoc gd = groupDocsByID[hit.doc];
                        assertEquals(gd.id, hit.doc);
                        //System.out.println("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score);
                        hit.score = gd.score2;
                    }
                }
                final SortField[] sortFields = groupSort.getSort();
                final Map<Float, Float> termScoreMap = scoreMap.get(searchTerm);
                for (int groupSortIDX = 0; groupSortIDX < sortFields.length; groupSortIDX++) {
                    if (sortFields[groupSortIDX].getType() == SortField.Type.SCORE) {
                        for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                            if (groupDocsHits.groupSortValues != null) {
                                //System.out.println("remap " + groupDocsHits.groupSortValues[groupSortIDX] + " to " + termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]));
                                groupDocsHits.groupSortValues[groupSortIDX] = termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]);
                                assertNotNull(groupDocsHits.groupSortValues[groupSortIDX]);
                            }
                        }
                    }
                }
                final SortField[] docSortFields = docSort.getSort();
                for (int docSortIDX = 0; docSortIDX < docSortFields.length; docSortIDX++) {
                    if (docSortFields[docSortIDX].getType() == SortField.Type.SCORE) {
                        for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                            for (ScoreDoc _hit : groupDocsHits.scoreDocs) {
                                FieldDoc hit = (FieldDoc) _hit;
                                if (hit.fields != null) {
                                    hit.fields[docSortIDX] = termScoreMap.get(hit.fields[docSortIDX]);
                                    assertNotNull(hit.fields[docSortIDX]);
                                }
                            }
                        }
                    }
                }
            }
            assertEquals(docIDToIDBlocks, expectedGroups, groupsResultBlocks, false, true, true, getScores, false);
            assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores, false);
        }
        r.close();
        dir.close();
        rBlocks.close();
        dirBlocks.close();
    }
}

Also used : Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) FieldDoc(org.apache.lucene.search.FieldDoc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) CachingCollector(org.apache.lucene.search.CachingCollector) HashSet(java.util.HashSet) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) StringField(org.apache.lucene.document.StringField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) MultiCollector(org.apache.lucene.search.MultiCollector) Collector(org.apache.lucene.search.Collector) CachingCollector(org.apache.lucene.search.CachingCollector) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery)

Aggregations

DirectoryReader (org.apache.lucene.index.DirectoryReader)362 Document (org.apache.lucene.document.Document)228 Directory (org.apache.lucene.store.Directory)206 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)149 IndexWriter (org.apache.lucene.index.IndexWriter)139 Term (org.apache.lucene.index.Term)134 IndexSearcher (org.apache.lucene.search.IndexSearcher)101 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)98 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)96 Test (org.junit.Test)64 StringField (org.apache.lucene.document.StringField)61 Analyzer (org.apache.lucene.analysis.Analyzer)54 BytesRef (org.apache.lucene.util.BytesRef)51 LeafReader (org.apache.lucene.index.LeafReader)49 ArrayList (java.util.ArrayList)46 Field (org.apache.lucene.document.Field)45 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)44 TermQuery (org.apache.lucene.search.TermQuery)42 IOException (java.io.IOException)37 TextField (org.apache.lucene.document.TextField)36