use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.
the class TestAddTaxonomy method testConcurrency.
public void testConcurrency() throws Exception {
// tests that addTaxonomy and addCategory work in parallel
final int numCategories = atLeast(10000);
// build an input taxonomy index
Directory src = newDirectory();
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(src);
for (int i = 0; i < numCategories; i++) {
tw.addCategory(new FacetLabel("a", Integer.toString(i)));
}
tw.close();
// now add the taxonomy to an empty taxonomy, while adding the categories
// again, in parallel -- in the end, no duplicate categories should exist.
Directory dest = newDirectory();
final DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
Thread t = new Thread() {
@Override
public void run() {
for (int i = 0; i < numCategories; i++) {
try {
destTW.addCategory(new FacetLabel("a", Integer.toString(i)));
} catch (IOException e) {
// shouldn't happen - if it does, let the test fail on uncaught exception.
throw new RuntimeException(e);
}
}
}
};
t.start();
OrdinalMap map = new MemoryOrdinalMap();
destTW.addTaxonomy(src, map);
t.join();
destTW.close();
// now validate
DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dest);
// +2 to account for the root category + "a"
assertEquals(numCategories + 2, dtr.getSize());
HashSet<FacetLabel> categories = new HashSet<>();
for (int i = 1; i < dtr.getSize(); i++) {
FacetLabel cat = dtr.getPath(i);
assertTrue("category " + cat + " already existed", categories.add(cat));
}
dtr.close();
IOUtils.close(src, dest);
}
use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.
the class FacetsConfig method processFacetFields.
private void processFacetFields(TaxonomyWriter taxoWriter, Map<String, List<FacetField>> byField, Document doc) throws IOException {
for (Map.Entry<String, List<FacetField>> ent : byField.entrySet()) {
String indexFieldName = ent.getKey();
//System.out.println(" indexFieldName=" + indexFieldName + " fields=" + ent.getValue());
IntsRefBuilder ordinals = new IntsRefBuilder();
for (FacetField facetField : ent.getValue()) {
FacetsConfig.DimConfig ft = getDimConfig(facetField.dim);
if (facetField.path.length > 1 && ft.hierarchical == false) {
throw new IllegalArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.length + " components");
}
FacetLabel cp = new FacetLabel(facetField.dim, facetField.path);
checkTaxoWriter(taxoWriter);
int ordinal = taxoWriter.addCategory(cp);
ordinals.append(ordinal);
if (ft.multiValued && (ft.hierarchical || ft.requireDimCount)) {
//System.out.println(" add parents");
// Add all parents too:
int parent = taxoWriter.getParent(ordinal);
while (parent > 0) {
ordinals.append(parent);
parent = taxoWriter.getParent(parent);
}
if (ft.requireDimCount == false) {
// Remove last (dimension) ord:
ordinals.setLength(ordinals.length() - 1);
}
}
// Drill down:
for (int i = 1; i <= cp.length; i++) {
doc.add(new StringField(indexFieldName, pathToString(cp.components, i), Field.Store.NO));
}
}
// Facet counts:
// DocValues are considered stored fields:
doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals.get())));
}
}
use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.
the class FacetsConfig method processAssocFacetFields.
private void processAssocFacetFields(TaxonomyWriter taxoWriter, Map<String, List<AssociationFacetField>> byField, Document doc) throws IOException {
for (Map.Entry<String, List<AssociationFacetField>> ent : byField.entrySet()) {
byte[] bytes = new byte[16];
int upto = 0;
String indexFieldName = ent.getKey();
for (AssociationFacetField field : ent.getValue()) {
// NOTE: we don't add parents for associations
checkTaxoWriter(taxoWriter);
FacetLabel label = new FacetLabel(field.dim, field.path);
int ordinal = taxoWriter.addCategory(label);
if (upto + 4 > bytes.length) {
bytes = ArrayUtil.grow(bytes, upto + 4);
}
// big-endian:
bytes[upto++] = (byte) (ordinal >> 24);
bytes[upto++] = (byte) (ordinal >> 16);
bytes[upto++] = (byte) (ordinal >> 8);
bytes[upto++] = (byte) ordinal;
if (upto + field.assoc.length > bytes.length) {
bytes = ArrayUtil.grow(bytes, upto + field.assoc.length);
}
System.arraycopy(field.assoc.bytes, field.assoc.offset, bytes, upto, field.assoc.length);
upto += field.assoc.length;
// Drill down:
for (int i = 1; i <= label.length; i++) {
doc.add(new StringField(indexFieldName, pathToString(label.components, i), Field.Store.NO));
}
}
doc.add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto)));
}
}
use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.
the class DirectoryTaxonomyWriter method internalAddCategory.
/**
* Add a new category into the index (and the cache), and return its new
* ordinal.
* <p>
* Actually, we might also need to add some of the category's ancestors
* before we can add the category itself (while keeping the invariant that a
* parent is always added to the taxonomy before its child). We do this by
* recursion.
*/
private int internalAddCategory(FacetLabel cp) throws IOException {
// Find our parent's ordinal (recursively adding the parent category
// to the taxonomy if it's not already there). Then add the parent
// ordinal as payloads (rather than a stored field; payloads can be
// more efficiently read into memory in bulk by LuceneTaxonomyReader)
int parent;
if (cp.length > 1) {
FacetLabel parentPath = cp.subpath(cp.length - 1);
parent = findCategory(parentPath);
if (parent < 0) {
parent = internalAddCategory(parentPath);
}
} else if (cp.length == 1) {
parent = TaxonomyReader.ROOT_ORDINAL;
} else {
parent = TaxonomyReader.INVALID_ORDINAL;
}
int id = addCategoryDocument(cp, parent);
return id;
}
use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.
the class DirectoryTaxonomyWriter method perhapsFillCache.
// we need to guarantee that if several threads call this concurrently, only
// one executes it, and after it returns, the cache is updated and is either
// complete or not.
private synchronized void perhapsFillCache() throws IOException {
if (cacheMisses.get() < cacheMissesUntilFill) {
return;
}
if (!shouldFillCache) {
// we already filled the cache once, there's no need to re-fill it
return;
}
shouldFillCache = false;
initReaderManager();
boolean aborted = false;
DirectoryReader reader = readerManager.acquire();
try {
PostingsEnum postingsEnum = null;
for (LeafReaderContext ctx : reader.leaves()) {
Terms terms = ctx.reader().terms(Consts.FULL);
if (terms != null) {
// cannot really happen, but be on the safe side
// TODO: share per-segment TermsEnum here!
TermsEnum termsEnum = terms.iterator();
while (termsEnum.next() != null) {
if (!cache.isFull()) {
BytesRef t = termsEnum.term();
// Since we guarantee uniqueness of categories, each term has exactly
// one document. Also, since we do not allow removing categories (and
// hence documents), there are no deletions in the index. Therefore, it
// is sufficient to call next(), and then doc(), exactly once with no
// 'validation' checks.
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(t.utf8ToString()));
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
boolean res = cache.put(cp, postingsEnum.nextDoc() + ctx.docBase);
assert !res : "entries should not have been evicted from the cache";
} else {
// the cache is full and the next put() will evict entries from it, therefore abort the iteration.
aborted = true;
break;
}
}
}
if (aborted) {
break;
}
}
} finally {
readerManager.release(reader);
}
cacheIsComplete = !aborted;
if (cacheIsComplete) {
synchronized (this) {
// everything is in the cache, so no need to keep readerManager open.
// this block is executed in a sync block so that it works well with
// initReaderManager called in parallel.
readerManager.close();
readerManager = null;
initializedReaderManager = false;
}
}
}
Aggregations