Search in sources :

Example 1 with LabelAndValue

use of org.apache.lucene.facet.LabelAndValue in project lucene-solr by apache.

the class FloatTaxonomyFacets method getTopChildren.

@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
    if (topN <= 0) {
        throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
    }
    DimConfig dimConfig = verifyDim(dim);
    FacetLabel cp = new FacetLabel(dim, path);
    int dimOrd = taxoReader.getOrdinal(cp);
    if (dimOrd == -1) {
        return null;
    }
    TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
    float bottomValue = 0;
    int ord = children[dimOrd];
    float sumValues = 0;
    int childCount = 0;
    TopOrdAndFloatQueue.OrdAndValue reuse = null;
    while (ord != TaxonomyReader.INVALID_ORDINAL) {
        if (values[ord] > 0) {
            sumValues += values[ord];
            childCount++;
            if (values[ord] > bottomValue) {
                if (reuse == null) {
                    reuse = new TopOrdAndFloatQueue.OrdAndValue();
                }
                reuse.ord = ord;
                reuse.value = values[ord];
                reuse = q.insertWithOverflow(reuse);
                if (q.size() == topN) {
                    bottomValue = q.top().value;
                }
            }
        }
        ord = siblings[ord];
    }
    if (sumValues == 0) {
        return null;
    }
    if (dimConfig.multiValued) {
        if (dimConfig.requireDimCount) {
            sumValues = values[dimOrd];
        } else {
            // Our sum'd count is not correct, in general:
            sumValues = -1;
        }
    } else {
    // Our sum'd dim count is accurate, so we keep it
    }
    LabelAndValue[] labelValues = new LabelAndValue[q.size()];
    for (int i = labelValues.length - 1; i >= 0; i--) {
        TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();
        FacetLabel child = taxoReader.getPath(ordAndValue.ord);
        labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
    }
    return new FacetResult(dim, path, sumValues, labelValues, childCount);
}
Also used : TopOrdAndFloatQueue(org.apache.lucene.facet.TopOrdAndFloatQueue) FacetResult(org.apache.lucene.facet.FacetResult) LabelAndValue(org.apache.lucene.facet.LabelAndValue) DimConfig(org.apache.lucene.facet.FacetsConfig.DimConfig)

Example 2 with LabelAndValue

use of org.apache.lucene.facet.LabelAndValue in project lucene-solr by apache.

the class IntTaxonomyFacets method getTopChildren.

@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
    if (topN <= 0) {
        throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
    }
    DimConfig dimConfig = verifyDim(dim);
    FacetLabel cp = new FacetLabel(dim, path);
    int dimOrd = taxoReader.getOrdinal(cp);
    if (dimOrd == -1) {
        return null;
    }
    TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
    int bottomValue = 0;
    int ord = children[dimOrd];
    int totValue = 0;
    int childCount = 0;
    TopOrdAndIntQueue.OrdAndValue reuse = null;
    while (ord != TaxonomyReader.INVALID_ORDINAL) {
        if (values[ord] > 0) {
            totValue += values[ord];
            childCount++;
            if (values[ord] > bottomValue) {
                if (reuse == null) {
                    reuse = new TopOrdAndIntQueue.OrdAndValue();
                }
                reuse.ord = ord;
                reuse.value = values[ord];
                reuse = q.insertWithOverflow(reuse);
                if (q.size() == topN) {
                    bottomValue = q.top().value;
                }
            }
        }
        ord = siblings[ord];
    }
    if (totValue == 0) {
        return null;
    }
    if (dimConfig.multiValued) {
        if (dimConfig.requireDimCount) {
            totValue = values[dimOrd];
        } else {
            // Our sum'd value is not correct, in general:
            totValue = -1;
        }
    } else {
    // Our sum'd dim value is accurate, so we keep it
    }
    LabelAndValue[] labelValues = new LabelAndValue[q.size()];
    for (int i = labelValues.length - 1; i >= 0; i--) {
        TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
        FacetLabel child = taxoReader.getPath(ordAndValue.ord);
        labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
    }
    return new FacetResult(dim, path, totValue, labelValues, childCount);
}
Also used : TopOrdAndIntQueue(org.apache.lucene.facet.TopOrdAndIntQueue) FacetResult(org.apache.lucene.facet.FacetResult) LabelAndValue(org.apache.lucene.facet.LabelAndValue) DimConfig(org.apache.lucene.facet.FacetsConfig.DimConfig)

Example 3 with LabelAndValue

use of org.apache.lucene.facet.LabelAndValue in project lucene-solr by apache.

the class SortedSetDocValuesFacetCounts method getDim.

private final FacetResult getDim(String dim, OrdRange ordRange, int topN) throws IOException {
    TopOrdAndIntQueue q = null;
    int bottomCount = 0;
    int dimCount = 0;
    int childCount = 0;
    TopOrdAndIntQueue.OrdAndValue reuse = null;
    //System.out.println("getDim : " + ordRange.start + " - " + ordRange.end);
    for (int ord = ordRange.start; ord <= ordRange.end; ord++) {
        //System.out.println("  ord=" + ord + " count=" + counts[ord]);
        if (counts[ord] > 0) {
            dimCount += counts[ord];
            childCount++;
            if (counts[ord] > bottomCount) {
                if (reuse == null) {
                    reuse = new TopOrdAndIntQueue.OrdAndValue();
                }
                reuse.ord = ord;
                reuse.value = counts[ord];
                if (q == null) {
                    // Lazy init, so we don't create this for the
                    // sparse case unnecessarily
                    q = new TopOrdAndIntQueue(topN);
                }
                reuse = q.insertWithOverflow(reuse);
                if (q.size() == topN) {
                    bottomCount = q.top().value;
                }
            }
        }
    }
    if (q == null) {
        return null;
    }
    LabelAndValue[] labelValues = new LabelAndValue[q.size()];
    for (int i = labelValues.length - 1; i >= 0; i--) {
        TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
        final BytesRef term = dv.lookupOrd(ordAndValue.ord);
        String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
        labelValues[i] = new LabelAndValue(parts[1], ordAndValue.value);
    }
    return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
}
Also used : TopOrdAndIntQueue(org.apache.lucene.facet.TopOrdAndIntQueue) FacetResult(org.apache.lucene.facet.FacetResult) LabelAndValue(org.apache.lucene.facet.LabelAndValue) BytesRef(org.apache.lucene.util.BytesRef)

Example 4 with LabelAndValue

use of org.apache.lucene.facet.LabelAndValue in project lucene-solr by apache.

the class TestOrdinalMappingLeafReader method verifyResults.

private void verifyResults(Directory indexDir, Directory taxoDir) throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = newSearcher(indexReader);
    FacetsCollector collector = new FacetsCollector();
    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, collector);
    // tag facets
    Facets tagFacets = new FastTaxonomyFacetCounts("$tags", taxoReader, facetConfig, collector);
    FacetResult result = tagFacets.getTopChildren(10, "tag");
    for (LabelAndValue lv : result.labelValues) {
        if (VERBOSE) {
            System.out.println(lv);
        }
        assertEquals(NUM_DOCS, lv.value.intValue());
    }
    // id facets
    Facets idFacets = new FastTaxonomyFacetCounts(taxoReader, facetConfig, collector);
    FacetResult idResult = idFacets.getTopChildren(10, "id");
    assertEquals(NUM_DOCS, idResult.childCount);
    // each "id" appears twice
    assertEquals(NUM_DOCS * 2, idResult.value);
    BinaryDocValues bdv = MultiDocValues.getBinaryValues(indexReader, "bdv");
    BinaryDocValues cbdv = MultiDocValues.getBinaryValues(indexReader, "cbdv");
    for (int i = 0; i < indexReader.maxDoc(); i++) {
        assertEquals(i, bdv.nextDoc());
        assertEquals(i, cbdv.nextDoc());
        assertEquals(Integer.parseInt(cbdv.binaryValue().utf8ToString()), Integer.parseInt(bdv.binaryValue().utf8ToString()) * 2);
    }
    IOUtils.close(indexReader, taxoReader);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Facets(org.apache.lucene.facet.Facets) DirectoryReader(org.apache.lucene.index.DirectoryReader) FacetResult(org.apache.lucene.facet.FacetResult) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) LabelAndValue(org.apache.lucene.facet.LabelAndValue) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) FacetsCollector(org.apache.lucene.facet.FacetsCollector)

Example 5 with LabelAndValue

use of org.apache.lucene.facet.LabelAndValue in project lucene-solr by apache.

the class TestSortedSetDocValuesFacets method testRandom.

public void testRandom() throws Exception {
    String[] tokens = getRandomTokens(10);
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), indexDir);
    FacetsConfig config = new FacetsConfig();
    int numDocs = atLeast(1000);
    int numDims = TestUtil.nextInt(random(), 1, 7);
    List<TestDoc> testDocs = getRandomDocs(tokens, numDocs, numDims);
    for (TestDoc testDoc : testDocs) {
        Document doc = new Document();
        doc.add(newStringField("content", testDoc.content, Field.Store.NO));
        for (int j = 0; j < numDims; j++) {
            if (testDoc.dims[j] != null) {
                doc.add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j]));
            }
        }
        w.addDocument(config.build(doc));
    }
    // NRT open
    IndexSearcher searcher = newSearcher(w.getReader());
    // Per-top-reader state:
    SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
    ExecutorService exec = randomExecutorServiceOrNull();
    int iters = atLeast(100);
    for (int iter = 0; iter < iters; iter++) {
        String searchToken = tokens[random().nextInt(tokens.length)];
        if (VERBOSE) {
            System.out.println("\nTEST: iter content=" + searchToken);
        }
        FacetsCollector fc = new FacetsCollector();
        FacetsCollector.search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
        Facets facets;
        if (exec != null) {
            facets = new ConcurrentSortedSetDocValuesFacetCounts(state, fc, exec);
        } else {
            facets = new SortedSetDocValuesFacetCounts(state, fc);
        }
        // Slow, yet hopefully bug-free, faceting:
        @SuppressWarnings({ "rawtypes", "unchecked" }) Map<String, Integer>[] expectedCounts = new HashMap[numDims];
        for (int i = 0; i < numDims; i++) {
            expectedCounts[i] = new HashMap<>();
        }
        for (TestDoc doc : testDocs) {
            if (doc.content.equals(searchToken)) {
                for (int j = 0; j < numDims; j++) {
                    if (doc.dims[j] != null) {
                        Integer v = expectedCounts[j].get(doc.dims[j]);
                        if (v == null) {
                            expectedCounts[j].put(doc.dims[j], 1);
                        } else {
                            expectedCounts[j].put(doc.dims[j], v.intValue() + 1);
                        }
                    }
                }
            }
        }
        List<FacetResult> expected = new ArrayList<>();
        for (int i = 0; i < numDims; i++) {
            List<LabelAndValue> labelValues = new ArrayList<>();
            int totCount = 0;
            for (Map.Entry<String, Integer> ent : expectedCounts[i].entrySet()) {
                labelValues.add(new LabelAndValue(ent.getKey(), ent.getValue()));
                totCount += ent.getValue();
            }
            sortLabelValues(labelValues);
            if (totCount > 0) {
                expected.add(new FacetResult("dim" + i, new String[0], totCount, labelValues.toArray(new LabelAndValue[labelValues.size()]), labelValues.size()));
            }
        }
        // Sort by highest value, tie break by value:
        sortFacetResults(expected);
        List<FacetResult> actual = facets.getAllDims(10);
        // Messy: fixup ties
        //sortTies(actual);
        assertEquals(expected, actual);
    }
    if (exec != null) {
        exec.shutdownNow();
    }
    w.close();
    IOUtils.close(searcher.getIndexReader(), indexDir, taxoDir);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Facets(org.apache.lucene.facet.Facets) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) LabelAndValue(org.apache.lucene.facet.LabelAndValue) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery) FacetsConfig(org.apache.lucene.facet.FacetsConfig) Term(org.apache.lucene.index.Term) FacetsCollector(org.apache.lucene.facet.FacetsCollector) ExecutorService(java.util.concurrent.ExecutorService) FacetResult(org.apache.lucene.facet.FacetResult) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Aggregations

LabelAndValue (org.apache.lucene.facet.LabelAndValue)29 FacetResult (org.apache.lucene.facet.FacetResult)28 Facets (org.apache.lucene.facet.Facets)23 FacetsCollector (org.apache.lucene.facet.FacetsCollector)22 IndexSearcher (org.apache.lucene.search.IndexSearcher)12 IOException (java.io.IOException)11 ArrayList (java.util.ArrayList)11 DefaultSortedSetDocValuesReaderState (org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState)11 SortedSetDocValuesFacetCounts (org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts)11 SortedSetDocValuesReaderState (org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState)11 DirectoryTaxonomyReader (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)9 Document (org.apache.lucene.document.Document)7 FacetsConfig (org.apache.lucene.facet.FacetsConfig)7 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)7 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)7 Directory (org.apache.lucene.store.Directory)7 DirectoryReader (org.apache.lucene.index.DirectoryReader)6 TermQuery (org.apache.lucene.search.TermQuery)5 HashMap (java.util.HashMap)4 Map (java.util.Map)4