Search in sources :

Example 26 with FacetLabel

use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.

the class TestAddTaxonomy method testConcurrency.

public void testConcurrency() throws Exception {
    // tests that addTaxonomy and addCategory work in parallel
    final int numCategories = atLeast(10000);
    // build an input taxonomy index
    Directory src = newDirectory();
    DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(src);
    for (int i = 0; i < numCategories; i++) {
        tw.addCategory(new FacetLabel("a", Integer.toString(i)));
    }
    tw.close();
    // now add the taxonomy to an empty taxonomy, while adding the categories
    // again, in parallel -- in the end, no duplicate categories should exist.
    Directory dest = newDirectory();
    final DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
    Thread t = new Thread() {

        @Override
        public void run() {
            for (int i = 0; i < numCategories; i++) {
                try {
                    destTW.addCategory(new FacetLabel("a", Integer.toString(i)));
                } catch (IOException e) {
                    // shouldn't happen - if it does, let the test fail on uncaught exception.
                    throw new RuntimeException(e);
                }
            }
        }
    };
    t.start();
    OrdinalMap map = new MemoryOrdinalMap();
    destTW.addTaxonomy(src, map);
    t.join();
    destTW.close();
    // now validate
    DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dest);
    // +2 to account for the root category + "a"
    assertEquals(numCategories + 2, dtr.getSize());
    HashSet<FacetLabel> categories = new HashSet<>();
    for (int i = 1; i < dtr.getSize(); i++) {
        FacetLabel cat = dtr.getPath(i);
        assertTrue("category " + cat + " already existed", categories.add(cat));
    }
    dtr.close();
    IOUtils.close(src, dest);
}
Also used : MemoryOrdinalMap(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap) FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) IOException(java.io.IOException) DiskOrdinalMap(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap) MemoryOrdinalMap(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap) OrdinalMap(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 27 with FacetLabel

use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.

the class FacetsConfig method processFacetFields.

private void processFacetFields(TaxonomyWriter taxoWriter, Map<String, List<FacetField>> byField, Document doc) throws IOException {
    for (Map.Entry<String, List<FacetField>> ent : byField.entrySet()) {
        String indexFieldName = ent.getKey();
        //System.out.println("  indexFieldName=" + indexFieldName + " fields=" + ent.getValue());
        IntsRefBuilder ordinals = new IntsRefBuilder();
        for (FacetField facetField : ent.getValue()) {
            FacetsConfig.DimConfig ft = getDimConfig(facetField.dim);
            if (facetField.path.length > 1 && ft.hierarchical == false) {
                throw new IllegalArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.length + " components");
            }
            FacetLabel cp = new FacetLabel(facetField.dim, facetField.path);
            checkTaxoWriter(taxoWriter);
            int ordinal = taxoWriter.addCategory(cp);
            ordinals.append(ordinal);
            if (ft.multiValued && (ft.hierarchical || ft.requireDimCount)) {
                //System.out.println("  add parents");
                // Add all parents too:
                int parent = taxoWriter.getParent(ordinal);
                while (parent > 0) {
                    ordinals.append(parent);
                    parent = taxoWriter.getParent(parent);
                }
                if (ft.requireDimCount == false) {
                    // Remove last (dimension) ord:
                    ordinals.setLength(ordinals.length() - 1);
                }
            }
            // Drill down:
            for (int i = 1; i <= cp.length; i++) {
                doc.add(new StringField(indexFieldName, pathToString(cp.components, i), Field.Store.NO));
            }
        }
        // Facet counts:
        // DocValues are considered stored fields:
        doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals.get())));
    }
}
Also used : FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) AssociationFacetField(org.apache.lucene.facet.taxonomy.AssociationFacetField) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) StringField(org.apache.lucene.document.StringField) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 28 with FacetLabel

use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.

the class FacetsConfig method processAssocFacetFields.

private void processAssocFacetFields(TaxonomyWriter taxoWriter, Map<String, List<AssociationFacetField>> byField, Document doc) throws IOException {
    for (Map.Entry<String, List<AssociationFacetField>> ent : byField.entrySet()) {
        byte[] bytes = new byte[16];
        int upto = 0;
        String indexFieldName = ent.getKey();
        for (AssociationFacetField field : ent.getValue()) {
            // NOTE: we don't add parents for associations
            checkTaxoWriter(taxoWriter);
            FacetLabel label = new FacetLabel(field.dim, field.path);
            int ordinal = taxoWriter.addCategory(label);
            if (upto + 4 > bytes.length) {
                bytes = ArrayUtil.grow(bytes, upto + 4);
            }
            // big-endian:
            bytes[upto++] = (byte) (ordinal >> 24);
            bytes[upto++] = (byte) (ordinal >> 16);
            bytes[upto++] = (byte) (ordinal >> 8);
            bytes[upto++] = (byte) ordinal;
            if (upto + field.assoc.length > bytes.length) {
                bytes = ArrayUtil.grow(bytes, upto + field.assoc.length);
            }
            System.arraycopy(field.assoc.bytes, field.assoc.offset, bytes, upto, field.assoc.length);
            upto += field.assoc.length;
            // Drill down:
            for (int i = 1; i <= label.length; i++) {
                doc.add(new StringField(indexFieldName, pathToString(label.components, i), Field.Store.NO));
            }
        }
        doc.add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto)));
    }
}
Also used : FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) StringField(org.apache.lucene.document.StringField) ArrayList(java.util.ArrayList) List(java.util.List) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) AssociationFacetField(org.apache.lucene.facet.taxonomy.AssociationFacetField) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 29 with FacetLabel

use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.

the class DirectoryTaxonomyWriter method internalAddCategory.

/**
   * Add a new category into the index (and the cache), and return its new
   * ordinal.
   * <p>
   * Actually, we might also need to add some of the category's ancestors
   * before we can add the category itself (while keeping the invariant that a
   * parent is always added to the taxonomy before its child). We do this by
   * recursion.
   */
private int internalAddCategory(FacetLabel cp) throws IOException {
    // Find our parent's ordinal (recursively adding the parent category
    // to the taxonomy if it's not already there). Then add the parent
    // ordinal as payloads (rather than a stored field; payloads can be
    // more efficiently read into memory in bulk by LuceneTaxonomyReader)
    int parent;
    if (cp.length > 1) {
        FacetLabel parentPath = cp.subpath(cp.length - 1);
        parent = findCategory(parentPath);
        if (parent < 0) {
            parent = internalAddCategory(parentPath);
        }
    } else if (cp.length == 1) {
        parent = TaxonomyReader.ROOT_ORDINAL;
    } else {
        parent = TaxonomyReader.INVALID_ORDINAL;
    }
    int id = addCategoryDocument(cp, parent);
    return id;
}
Also used : FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel)

Example 30 with FacetLabel

use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.

the class DirectoryTaxonomyWriter method perhapsFillCache.

// we need to guarantee that if several threads call this concurrently, only
// one executes it, and after it returns, the cache is updated and is either
// complete or not.
private synchronized void perhapsFillCache() throws IOException {
    if (cacheMisses.get() < cacheMissesUntilFill) {
        return;
    }
    if (!shouldFillCache) {
        // we already filled the cache once, there's no need to re-fill it
        return;
    }
    shouldFillCache = false;
    initReaderManager();
    boolean aborted = false;
    DirectoryReader reader = readerManager.acquire();
    try {
        PostingsEnum postingsEnum = null;
        for (LeafReaderContext ctx : reader.leaves()) {
            Terms terms = ctx.reader().terms(Consts.FULL);
            if (terms != null) {
                // cannot really happen, but be on the safe side
                // TODO: share per-segment TermsEnum here!
                TermsEnum termsEnum = terms.iterator();
                while (termsEnum.next() != null) {
                    if (!cache.isFull()) {
                        BytesRef t = termsEnum.term();
                        // Since we guarantee uniqueness of categories, each term has exactly
                        // one document. Also, since we do not allow removing categories (and
                        // hence documents), there are no deletions in the index. Therefore, it
                        // is sufficient to call next(), and then doc(), exactly once with no
                        // 'validation' checks.
                        FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(t.utf8ToString()));
                        postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
                        boolean res = cache.put(cp, postingsEnum.nextDoc() + ctx.docBase);
                        assert !res : "entries should not have been evicted from the cache";
                    } else {
                        // the cache is full and the next put() will evict entries from it, therefore abort the iteration.
                        aborted = true;
                        break;
                    }
                }
            }
            if (aborted) {
                break;
            }
        }
    } finally {
        readerManager.release(reader);
    }
    cacheIsComplete = !aborted;
    if (cacheIsComplete) {
        synchronized (this) {
            // everything is in the cache, so no need to keep readerManager open.
            // this block is executed in a sync block so that it works well with
            // initReaderManager called in parallel.
            readerManager.close();
            readerManager = null;
            initializedReaderManager = false;
        }
    }
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) Terms(org.apache.lucene.index.Terms) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Aggregations

FacetLabel (org.apache.lucene.facet.taxonomy.FacetLabel)43 Directory (org.apache.lucene.store.Directory)32 Test (org.junit.Test)25 RAMDirectory (org.apache.lucene.store.RAMDirectory)13 TaxonomyReader (org.apache.lucene.facet.taxonomy.TaxonomyReader)7 MemoryOrdinalMap (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap)7 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)6 IOException (java.io.IOException)5 Random (java.util.Random)5 DiskOrdinalMap (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap)5 OrdinalMap (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap)5 HashMap (java.util.HashMap)4 Map (java.util.Map)4 DirectoryReader (org.apache.lucene.index.DirectoryReader)4 IndexWriter (org.apache.lucene.index.IndexWriter)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)3 Document (org.apache.lucene.document.Document)3