Examples with DirectoryTaxonomyWriter - org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter

Example 1 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestDrillSideways method testBasic.

public void testBasic() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    // Writes facet ords to a separate directory from the
    // main index:
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    FacetsConfig config = new FacetsConfig();
    config.setHierarchical("Publish Date", true);
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    writer.addDocument(config.build(taxoWriter, doc));
    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    //System.out.println("searcher=" + searcher);
    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    DrillSideways ds = getNewDrillSideways(searcher, config, taxoReader);
    //  case: drill-down on a single field; in this
    // case the drill-sideways + drill-down counts ==
    // drill-down of just the query:
    DrillDownQuery ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    DrillSidewaysResult r = ds.search(null, ddq, 10);
    assertEquals(2, r.hits.totalHits);
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
    // Same simple case, but no baseQuery (pure browse):
    // drill-down on a single field; in this case the
    // drill-sideways + drill-down counts == drill-down of
    // just the query:
    ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    r = ds.search(null, ddq, 10);
    assertEquals(2, r.hits.totalHits);
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
    // Another simple case: drill-down on single fields
    // but OR of two values
    ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    ddq.add("Author", "Bob");
    r = ds.search(null, ddq, 10);
    assertEquals(3, r.hits.totalHits);
    // Publish Date is only drill-down: Lisa and Bob
    // (drill-down) published twice in 2010 and once in 2012:
    assertEquals("dim=Publish Date path=[] value=3 childCount=2\n  2010 (2)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
    assertTrue(r.facets instanceof MultiFacets);
    List<FacetResult> allResults = r.facets.getAllDims(10);
    assertEquals(2, allResults.size());
    assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", allResults.get(0).toString());
    assertEquals("dim=Publish Date path=[] value=3 childCount=2\n  2010 (2)\n  2012 (1)\n", allResults.get(1).toString());
    // More interesting case: drill-down on two fields
    ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    ddq.add("Publish Date", "2010");
    r = ds.search(null, ddq, 10);
    assertEquals(1, r.hits.totalHits);
    // Publish Date is drill-sideways + drill-down: Lisa
    // (drill-down) published once in 2010 and once in 2012:
    assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down:
    // only Lisa & Bob published (once each) in 2010:
    assertEquals("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
    // Even more interesting case: drill down on two fields,
    // but one of them is OR
    ddq = new DrillDownQuery(config);
    // Drill down on Lisa or Bob:
    ddq.add("Author", "Lisa");
    ddq.add("Publish Date", "2010");
    ddq.add("Author", "Bob");
    r = ds.search(null, ddq, 10);
    assertEquals(2, r.hits.totalHits);
    // Publish Date is both drill-sideways + drill-down:
    // Lisa or Bob published twice in 2010 and once in 2012:
    assertEquals("dim=Publish Date path=[] value=3 childCount=2\n  2010 (2)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down:
    // only Lisa & Bob published (once each) in 2010:
    assertEquals("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
    // Test drilling down on invalid field:
    ddq = new DrillDownQuery(config);
    ddq.add("Foobar", "Baz");
    r = ds.search(null, ddq, 10);
    assertEquals(0, r.hits.totalHits);
    assertNull(r.facets.getTopChildren(10, "Publish Date"));
    assertNull(r.facets.getTopChildren(10, "Foobar"));
    // Test drilling down on valid term or'd with invalid term:
    ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    ddq.add("Author", "Tom");
    r = ds.search(null, ddq, 10);
    assertEquals(2, r.hits.totalHits);
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
    // LUCENE-4915: test drilling down on a dimension but
    // NOT facet counting it:
    ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    ddq.add("Author", "Tom");
    r = ds.search(null, ddq, 10);
    assertEquals(2, r.hits.totalHits);
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Test main query gets null scorer:
    ddq = new DrillDownQuery(config, new TermQuery(new Term("foobar", "baz")));
    ddq.add("Author", "Lisa");
    r = ds.search(null, ddq, 10);
    assertEquals(0, r.hits.totalHits);
    assertNull(r.facets.getTopChildren(10, "Publish Date"));
    assertNull(r.facets.getTopChildren(10, "Author"));
    writer.close();
    IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) DrillSidewaysResult(org.apache.lucene.facet.DrillSideways.DrillSidewaysResult) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)

Example 2 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestDrillSideways method testSometimesInvalidDrillDown.

public void testSometimesInvalidDrillDown() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    // Writes facet ords to a separate directory from the
    // main index:
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    FacetsConfig config = new FacetsConfig();
    config.setHierarchical("Publish Date", true);
    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    writer.addDocument(config.build(taxoWriter, doc));
    writer.commit();
    // 2nd segment has no Author:
    doc = new Document();
    doc.add(new FacetField("Foobar", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    writer.addDocument(config.build(taxoWriter, doc));
    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    //System.out.println("searcher=" + searcher);
    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    DrillDownQuery ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    DrillSidewaysResult r = getNewDrillSideways(searcher, config, taxoReader).search(null, ddq, 10);
    assertEquals(1, r.hits.totalHits);
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("dim=Publish Date path=[] value=1 childCount=1\n  2010 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published once, and Bob
    // published once:
    assertEquals("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
    writer.close();
    IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
}

Also used : DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) DrillSidewaysResult(org.apache.lucene.facet.DrillSideways.DrillSidewaysResult) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)

Example 3 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestDrillSideways method testScorer.

public void testScorer() throws Exception {
    // LUCENE-6001 some scorers, eg ReqExlScorer, can hit NPE if cost is called after nextDoc
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    // Writes facet ords to a separate directory from the
    // main index:
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    FacetsConfig config = new FacetsConfig();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newTextField("field", "foo bar", Field.Store.NO));
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("dim", "a"));
    writer.addDocument(config.build(taxoWriter, doc));
    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    DrillSideways ds = getNewDrillSideways(searcher, config, taxoReader);
    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    bq.add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.MUST);
    bq.add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.MUST_NOT);
    DrillDownQuery ddq = new DrillDownQuery(config, bq.build());
    ddq.add("field", "foo");
    ddq.add("author", bq.build());
    ddq.add("dim", bq.build());
    DrillSidewaysResult r = ds.search(null, ddq, 10);
    assertEquals(0, r.hits.totalHits);
    writer.close();
    IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) DrillSidewaysResult(org.apache.lucene.facet.DrillSideways.DrillSidewaysResult) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)

Example 4 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestDrillSideways method testRandom.

public void testRandom() throws Exception {
    while (aChance == 0.0) {
        aChance = random().nextDouble();
    }
    while (bChance == 0.0) {
        bChance = random().nextDouble();
    }
    while (cChance == 0.0) {
        cChance = random().nextDouble();
    }
    //aChance = .01;
    //bChance = 0.5;
    //cChance = 1.0;
    double sum = aChance + bChance + cChance;
    aChance /= sum;
    bChance /= sum;
    cChance /= sum;
    int numDims = TestUtil.nextInt(random(), 2, 5);
    //int numDims = 3;
    int numDocs = atLeast(3000);
    //int numDocs = 20;
    if (VERBOSE) {
        System.out.println("numDims=" + numDims + " numDocs=" + numDocs + " aChance=" + aChance + " bChance=" + bChance + " cChance=" + cChance);
    }
    String[][] dimValues = new String[numDims][];
    int valueCount = 2;
    for (int dim = 0; dim < numDims; dim++) {
        Set<String> values = new HashSet<>();
        while (values.size() < valueCount) {
            String s = TestUtil.randomRealisticUnicodeString(random());
            //String s = _TestUtil.randomString(random());
            if (s.length() > 0) {
                values.add(s);
            }
        }
        dimValues[dim] = values.toArray(new String[values.size()]);
        valueCount *= 2;
    }
    List<Doc> docs = new ArrayList<>();
    for (int i = 0; i < numDocs; i++) {
        Doc doc = new Doc();
        doc.id = "" + i;
        doc.contentToken = randomContentToken(false);
        doc.dims = new int[numDims];
        doc.dims2 = new int[numDims];
        for (int dim = 0; dim < numDims; dim++) {
            if (random().nextInt(5) == 3) {
                // This doc is missing this dim:
                doc.dims[dim] = -1;
            } else if (dimValues[dim].length <= 4) {
                int dimUpto = 0;
                doc.dims[dim] = dimValues[dim].length - 1;
                while (dimUpto < dimValues[dim].length) {
                    if (random().nextBoolean()) {
                        doc.dims[dim] = dimUpto;
                        break;
                    }
                    dimUpto++;
                }
            } else {
                doc.dims[dim] = random().nextInt(dimValues[dim].length);
            }
            if (random().nextInt(5) == 3) {
                // 2nd value:
                doc.dims2[dim] = random().nextInt(dimValues[dim].length);
            } else {
                doc.dims2[dim] = -1;
            }
        }
        docs.add(doc);
    }
    Directory d = newDirectory();
    Directory td = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setInfoStream(InfoStream.NO_OUTPUT);
    RandomIndexWriter w = new RandomIndexWriter(random(), d, iwc);
    DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE);
    FacetsConfig config = new FacetsConfig();
    for (int i = 0; i < numDims; i++) {
        config.setMultiValued("dim" + i, true);
    }
    boolean doUseDV = random().nextBoolean();
    for (Doc rawDoc : docs) {
        Document doc = new Document();
        doc.add(newStringField("id", rawDoc.id, Field.Store.YES));
        doc.add(new SortedDocValuesField("id", new BytesRef(rawDoc.id)));
        doc.add(newStringField("content", rawDoc.contentToken, Field.Store.NO));
        if (VERBOSE) {
            System.out.println("  doc id=" + rawDoc.id + " token=" + rawDoc.contentToken);
        }
        for (int dim = 0; dim < numDims; dim++) {
            int dimValue = rawDoc.dims[dim];
            if (dimValue != -1) {
                if (doUseDV) {
                    doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue]));
                } else {
                    doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue]));
                }
                doc.add(new StringField("dim" + dim, dimValues[dim][dimValue], Field.Store.YES));
                if (VERBOSE) {
                    System.out.println("    dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue]));
                }
            }
            int dimValue2 = rawDoc.dims2[dim];
            if (dimValue2 != -1) {
                if (doUseDV) {
                    doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue2]));
                } else {
                    doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue2]));
                }
                doc.add(new StringField("dim" + dim, dimValues[dim][dimValue2], Field.Store.YES));
                if (VERBOSE) {
                    System.out.println("      dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue2]));
                }
            }
        }
        w.addDocument(config.build(tw, doc));
    }
    if (random().nextBoolean()) {
        // Randomly delete a few docs:
        int numDel = TestUtil.nextInt(random(), 1, (int) (numDocs * 0.05));
        if (VERBOSE) {
            System.out.println("delete " + numDel);
        }
        int delCount = 0;
        while (delCount < numDel) {
            Doc doc = docs.get(random().nextInt(docs.size()));
            if (!doc.deleted) {
                if (VERBOSE) {
                    System.out.println("  delete id=" + doc.id);
                }
                doc.deleted = true;
                w.deleteDocuments(new Term("id", doc.id));
                delCount++;
            }
        }
    }
    if (random().nextBoolean()) {
        if (VERBOSE) {
            System.out.println("TEST: forceMerge(1)...");
        }
        w.forceMerge(1);
    }
    IndexReader r = w.getReader();
    final SortedSetDocValuesReaderState sortedSetDVState;
    IndexSearcher s = newSearcher(r);
    if (doUseDV) {
        sortedSetDVState = new DefaultSortedSetDocValuesReaderState(s.getIndexReader());
    } else {
        sortedSetDVState = null;
    }
    if (VERBOSE) {
        System.out.println("r.numDocs() = " + r.numDocs());
    }
    // NRT open
    TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
    int numIters = atLeast(10);
    for (int iter = 0; iter < numIters; iter++) {
        String contentToken = random().nextInt(30) == 17 ? null : randomContentToken(true);
        int numDrillDown = TestUtil.nextInt(random(), 1, Math.min(4, numDims));
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " baseQuery=" + contentToken + " numDrillDown=" + numDrillDown + " useSortedSetDV=" + doUseDV);
        }
        String[][] drillDowns = new String[numDims][];
        int count = 0;
        boolean anyMultiValuedDrillDowns = false;
        while (count < numDrillDown) {
            int dim = random().nextInt(numDims);
            if (drillDowns[dim] == null) {
                if (random().nextBoolean()) {
                    // Drill down on one value:
                    drillDowns[dim] = new String[] { dimValues[dim][random().nextInt(dimValues[dim].length)] };
                } else {
                    int orCount = TestUtil.nextInt(random(), 1, Math.min(5, dimValues[dim].length));
                    drillDowns[dim] = new String[orCount];
                    anyMultiValuedDrillDowns |= orCount > 1;
                    for (int i = 0; i < orCount; i++) {
                        while (true) {
                            String value = dimValues[dim][random().nextInt(dimValues[dim].length)];
                            for (int j = 0; j < i; j++) {
                                if (value.equals(drillDowns[dim][j])) {
                                    value = null;
                                    break;
                                }
                            }
                            if (value != null) {
                                drillDowns[dim][i] = value;
                                break;
                            }
                        }
                    }
                }
                if (VERBOSE) {
                    BytesRef[] values = new BytesRef[drillDowns[dim].length];
                    for (int i = 0; i < values.length; i++) {
                        values[i] = new BytesRef(drillDowns[dim][i]);
                    }
                    System.out.println("  dim" + dim + "=" + Arrays.toString(values));
                }
                count++;
            }
        }
        Query baseQuery;
        if (contentToken == null) {
            baseQuery = new MatchAllDocsQuery();
        } else {
            baseQuery = new TermQuery(new Term("content", contentToken));
        }
        DrillDownQuery ddq = new DrillDownQuery(config, baseQuery);
        for (int dim = 0; dim < numDims; dim++) {
            if (drillDowns[dim] != null) {
                for (String value : drillDowns[dim]) {
                    ddq.add("dim" + dim, value);
                }
            }
        }
        Query filter;
        if (random().nextInt(7) == 6) {
            if (VERBOSE) {
                System.out.println("  only-even filter");
            }
            filter = new Query() {

                @Override
                public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
                    return new ConstantScoreWeight(this, boost) {

                        @Override
                        public Scorer scorer(LeafReaderContext context) throws IOException {
                            DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
                            return new ConstantScoreScorer(this, score(), new TwoPhaseIterator(approximation) {

                                @Override
                                public boolean matches() throws IOException {
                                    int docID = approximation.docID();
                                    return (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0;
                                }

                                @Override
                                public float matchCost() {
                                    return 1000f;
                                }
                            });
                        }
                    };
                }

                @Override
                public String toString(String field) {
                    return "drillSidewaysTestFilter";
                }

                @Override
                public boolean equals(Object o) {
                    return o == this;
                }

                @Override
                public int hashCode() {
                    return System.identityHashCode(this);
                }
            };
        } else {
            filter = null;
        }
        // Verify docs are always collected in order.  If we
        // had an AssertingScorer it could catch it when
        // Weight.scoresDocsOutOfOrder lies!:
        getNewDrillSideways(s, config, tr).search(ddq, new SimpleCollector() {

            int lastDocID;

            @Override
            public void collect(int doc) {
                assert doc > lastDocID;
                lastDocID = doc;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                lastDocID = -1;
            }

            @Override
            public boolean needsScores() {
                return false;
            }
        });
        // subScorers are on the same docID:
        if (!anyMultiValuedDrillDowns) {
            // Can only do this test when there are no OR'd
            // drill-down values, because in that case it's
            // easily possible for one of the DD terms to be on
            // a future docID:
            getNewDrillSidewaysScoreSubdocsAtOnce(s, config, tr).search(ddq, new AssertingSubDocsAtOnceCollector());
        }
        TestFacetResult expected = slowDrillSidewaysSearch(s, docs, contentToken, drillDowns, dimValues, filter);
        Sort sort = new Sort(new SortField("id", SortField.Type.STRING));
        DrillSideways ds;
        if (doUseDV) {
            ds = getNewDrillSideways(s, config, sortedSetDVState);
        } else {
            ds = getNewDrillSidewaysBuildFacetsResult(s, config, tr);
        }
        // Retrieve all facets:
        DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true);
        TopDocs hits = s.search(baseQuery, numDocs);
        Map<String, Float> scores = new HashMap<>();
        for (ScoreDoc sd : hits.scoreDocs) {
            scores.put(s.doc(sd.doc).get("id"), sd.score);
        }
        if (VERBOSE) {
            System.out.println("  verify all facets");
        }
        verifyEquals(dimValues, s, expected, actual, scores, doUseDV);
        // Make sure drill down doesn't change score:
        Query q = ddq;
        if (filter != null) {
            q = new BooleanQuery.Builder().add(q, Occur.MUST).add(filter, Occur.FILTER).build();
        }
        TopDocs ddqHits = s.search(q, numDocs);
        assertEquals(expected.hits.size(), ddqHits.totalHits);
        for (int i = 0; i < expected.hits.size(); i++) {
            // Score should be IDENTICAL:
            assertEquals(scores.get(expected.hits.get(i).id), ddqHits.scoreDocs[i].score, 0.0f);
        }
    }
    w.close();
    IOUtils.close(r, tr, tw, d, td);
}

Also used : Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) Scorer(org.apache.lucene.search.Scorer) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) SimpleCollector(org.apache.lucene.search.SimpleCollector) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) ScoreDoc(org.apache.lucene.search.ScoreDoc) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Sort(org.apache.lucene.search.Sort) HashSet(java.util.HashSet) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) SortedSetDocValuesReaderState(org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState) DefaultSortedSetDocValuesReaderState(org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState) TwoPhaseIterator(org.apache.lucene.search.TwoPhaseIterator) Term(org.apache.lucene.index.Term) Weight(org.apache.lucene.search.Weight) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight) DrillSidewaysResult(org.apache.lucene.facet.DrillSideways.DrillSidewaysResult) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) TopDocs(org.apache.lucene.search.TopDocs) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight) DefaultSortedSetDocValuesReaderState(org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState) IndexReader(org.apache.lucene.index.IndexReader) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 5 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestDrillSideways method testMultipleRequestsPerDim.

public void testMultipleRequestsPerDim() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    // Writes facet ords to a separate directory from the
    // main index:
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    FacetsConfig config = new FacetsConfig();
    config.setHierarchical("dim", true);
    Document doc = new Document();
    doc.add(new FacetField("dim", "a", "x"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("dim", "a", "y"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("dim", "a", "z"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("dim", "b"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("dim", "c"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("dim", "d"));
    writer.addDocument(config.build(taxoWriter, doc));
    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    //System.out.println("searcher=" + searcher);
    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    DrillDownQuery ddq = new DrillDownQuery(config);
    ddq.add("dim", "a");
    DrillSidewaysResult r = getNewDrillSideways(searcher, config, taxoReader).search(null, ddq, 10);
    assertEquals(3, r.hits.totalHits);
    assertEquals("dim=dim path=[] value=6 childCount=4\n  a (3)\n  b (1)\n  c (1)\n  d (1)\n", r.facets.getTopChildren(10, "dim").toString());
    assertEquals("dim=dim path=[a] value=3 childCount=3\n  x (1)\n  y (1)\n  z (1)\n", r.facets.getTopChildren(10, "dim", "a").toString());
    writer.close();
    IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
}

Aggregations

DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)85 Directory (org.apache.lucene.store.Directory)73 DirectoryTaxonomyReader (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)52 Document (org.apache.lucene.document.Document)46 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 FacetsConfig (org.apache.lucene.facet.FacetsConfig)35 FacetField (org.apache.lucene.facet.FacetField)31 Test (org.junit.Test)28 IndexSearcher (org.apache.lucene.search.IndexSearcher)27 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)26 IndexWriter (org.apache.lucene.index.IndexWriter)25 Facets (org.apache.lucene.facet.Facets)22 SlowRAMDirectory (org.apache.lucene.facet.SlowRAMDirectory)21 FacetsCollector (org.apache.lucene.facet.FacetsCollector)17 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)15 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)15 FacetResult (org.apache.lucene.facet.FacetResult)14 TaxonomyReader (org.apache.lucene.facet.taxonomy.TaxonomyReader)13 DirectoryReader (org.apache.lucene.index.DirectoryReader)12 TaxonomyWriter (org.apache.lucene.facet.taxonomy.TaxonomyWriter)9