use of org.apache.lucene.spatial.prefix.tree.CellIterator in project lucene-solr by apache.
the class TermQueryPrefixTreeStrategy method makeQuery.
@Override
public Query makeQuery(SpatialArgs args) {
final SpatialOperation op = args.getOperation();
if (op != SpatialOperation.Intersects)
throw new UnsupportedSpatialOperation(op);
Shape shape = args.getShape();
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
//--get a List of BytesRef for each term we want (no parents, no leaf bytes))
final int GUESS_NUM_TERMS;
if (shape instanceof Point)
//perfect guess
GUESS_NUM_TERMS = detailLevel;
else
//should this be a method on SpatialPrefixTree?
GUESS_NUM_TERMS = 4096;
//shared byte array for all terms
BytesRefBuilder masterBytes = new BytesRefBuilder();
List<BytesRef> terms = new ArrayList<>(GUESS_NUM_TERMS);
CellIterator cells = grid.getTreeCellIterator(shape, detailLevel);
while (cells.hasNext()) {
Cell cell = cells.next();
if (!cell.isLeaf())
continue;
//null because we want a new BytesRef
BytesRef term = cell.getTokenBytesNoLeaf(null);
//We copy out the bytes because it may be re-used across the iteration. This also gives us the opportunity
// to use one contiguous block of memory for the bytes of all terms we need.
masterBytes.grow(masterBytes.length() + term.length);
masterBytes.append(term);
//don't need; will reset later
term.bytes = null;
term.offset = masterBytes.length() - term.length;
terms.add(term);
}
//doing this now because if we did earlier, it's possible the bytes needed to grow()
for (BytesRef byteRef : terms) {
byteRef.bytes = masterBytes.bytes();
}
//TODO an automatonQuery might be faster?
return new TermInSetQuery(getFieldName(), terms);
}
use of org.apache.lucene.spatial.prefix.tree.CellIterator in project lucene-solr by apache.
the class WithinPrefixTreeQuery method getDocIdSet.
@Override
protected DocIdSet getDocIdSet(LeafReaderContext context) throws IOException {
return new VisitorTemplate(context) {
private FixedBitSet inside;
private FixedBitSet outside;
@Override
protected void start() {
inside = new FixedBitSet(maxDoc);
outside = new FixedBitSet(maxDoc);
}
@Override
protected DocIdSet finish() {
inside.andNot(outside);
return new BitDocIdSet(inside);
}
@Override
protected CellIterator findSubCellsToVisit(Cell cell) {
//use buffered query shape instead of orig. Works with null too.
return cell.getNextLevelCells(bufferedQueryShape);
}
@Override
protected boolean visitPrefix(Cell cell) throws IOException {
//cell.relate is based on the bufferedQueryShape; we need to examine what
// the relation is against the queryShape
SpatialRelation visitRelation = cell.getShape().relate(queryShape);
if (cell.getLevel() == detailLevel) {
collectDocs(visitRelation.intersects() ? inside : outside);
return false;
} else if (visitRelation == SpatialRelation.WITHIN) {
collectDocs(inside);
return false;
} else if (visitRelation == SpatialRelation.DISJOINT) {
collectDocs(outside);
return false;
}
return true;
}
@Override
protected void visitLeaf(Cell cell) throws IOException {
if (allCellsIntersectQuery(cell))
collectDocs(inside);
else
collectDocs(outside);
}
/** Returns true if the provided cell, and all its sub-cells down to
* detailLevel all intersect the queryShape.
*/
private boolean allCellsIntersectQuery(Cell cell) {
SpatialRelation relate = cell.getShape().relate(queryShape);
if (cell.getLevel() == detailLevel)
return relate.intersects();
if (relate == SpatialRelation.WITHIN)
return true;
if (relate == SpatialRelation.DISJOINT)
return false;
// Note: Generating all these cells just to determine intersection is not ideal.
// The real solution is LUCENE-4869.
CellIterator subCells = cell.getNextLevelCells(null);
while (subCells.hasNext()) {
Cell subCell = subCells.next();
if (//recursion
!allCellsIntersectQuery(subCell))
return false;
}
return true;
}
@Override
protected void visitScanned(Cell cell) throws IOException {
//collects as we want, even if not a leaf
visitLeaf(cell);
// if (cell.isLeaf()) {
// visitLeaf(cell);
// } else {
// visitPrefix(cell);
// }
}
}.getDocIdSet();
}
use of org.apache.lucene.spatial.prefix.tree.CellIterator in project lucene-solr by apache.
the class HeatmapFacetCounter method calcFacets.
/**
* Calculates spatial 2D facets (aggregated counts) in a grid, sometimes called a heatmap.
* Facet computation is implemented by navigating the underlying indexed terms efficiently. If you don't know exactly
* what facetLevel to go to for a given input box but you have some sense of how many cells there should be relative
* to the size of the shape, then consider using the logic that {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy}
* uses when approximating what level to go to when indexing a shape given a distErrPct.
*
* @param context the IndexReader's context
* @param topAcceptDocs a Bits to limit counted docs. If null, live docs are counted.
* @param inputShape the shape to gather grid squares for; typically a {@link Rectangle}.
* The <em>actual</em> heatmap area will usually be larger since the cells on the edge that overlap
* are returned. We always return a rectangle of integers even if the inputShape isn't a rectangle
* -- the non-intersecting cells will all be 0.
* If null is given, the entire world is assumed.
* @param facetLevel the target depth (detail) of cells.
* @param maxCells the maximum number of cells to return. If the cells exceed this count, an
*/
public static Heatmap calcFacets(PrefixTreeStrategy strategy, IndexReaderContext context, Bits topAcceptDocs, Shape inputShape, final int facetLevel, int maxCells) throws IOException {
if (maxCells > (MAX_ROWS_OR_COLUMNS * MAX_ROWS_OR_COLUMNS)) {
throw new IllegalArgumentException("maxCells (" + maxCells + ") should be <= " + MAX_ROWS_OR_COLUMNS);
}
if (inputShape == null) {
inputShape = strategy.getSpatialContext().getWorldBounds();
}
final Rectangle inputRect = inputShape.getBoundingBox();
//First get the rect of the cell at the bottom-left at depth facetLevel
final SpatialPrefixTree grid = strategy.getGrid();
final SpatialContext ctx = grid.getSpatialContext();
final Point cornerPt = ctx.makePoint(inputRect.getMinX(), inputRect.getMinY());
final CellIterator cellIterator = grid.getTreeCellIterator(cornerPt, facetLevel);
Cell cornerCell = null;
while (cellIterator.hasNext()) {
cornerCell = cellIterator.next();
}
assert cornerCell != null && cornerCell.getLevel() == facetLevel : "Cell not at target level: " + cornerCell;
final Rectangle cornerRect = (Rectangle) cornerCell.getShape();
assert cornerRect.hasArea();
//Now calculate the number of columns and rows necessary to cover the inputRect
//note: we might change this below...
double heatMinX = cornerRect.getMinX();
final double cellWidth = cornerRect.getWidth();
final Rectangle worldRect = ctx.getWorldBounds();
final int columns = calcRowsOrCols(cellWidth, heatMinX, inputRect.getWidth(), inputRect.getMinX(), worldRect.getWidth());
final double heatMinY = cornerRect.getMinY();
final double cellHeight = cornerRect.getHeight();
final int rows = calcRowsOrCols(cellHeight, heatMinY, inputRect.getHeight(), inputRect.getMinY(), worldRect.getHeight());
assert rows > 0 && columns > 0;
if (columns > MAX_ROWS_OR_COLUMNS || rows > MAX_ROWS_OR_COLUMNS || columns * rows > maxCells) {
throw new IllegalArgumentException("Too many cells (" + columns + " x " + rows + ") for level " + facetLevel + " shape " + inputRect);
}
//Create resulting heatmap bounding rectangle & Heatmap object.
final double halfCellWidth = cellWidth / 2.0;
// if X world-wraps, use world bounds' range
if (columns * cellWidth + halfCellWidth > worldRect.getWidth()) {
heatMinX = worldRect.getMinX();
}
double heatMaxX = heatMinX + columns * cellWidth;
if (Math.abs(heatMaxX - worldRect.getMaxX()) < halfCellWidth) {
//numeric conditioning issue
heatMaxX = worldRect.getMaxX();
} else if (heatMaxX > worldRect.getMaxX()) {
//wraps dateline (won't happen if !geo)
heatMaxX = heatMaxX - worldRect.getMaxX() + worldRect.getMinX();
}
final double halfCellHeight = cellHeight / 2.0;
double heatMaxY = heatMinY + rows * cellHeight;
if (Math.abs(heatMaxY - worldRect.getMaxY()) < halfCellHeight) {
//numeric conditioning issue
heatMaxY = worldRect.getMaxY();
}
final Heatmap heatmap = new Heatmap(columns, rows, ctx.makeRectangle(heatMinX, heatMaxX, heatMinY, heatMaxY));
if (topAcceptDocs instanceof Bits.MatchNoBits) {
// short-circuit
return heatmap;
}
//All ancestor cell counts (of facetLevel) will be captured during facet visiting and applied later. If the data is
// just points then there won't be any ancestors.
//Facet count of ancestors covering all of the heatmap:
// single-element array so it can be accumulated in the inner class
int[] allCellsAncestorCount = new int[1];
//All other ancestors:
Map<Rectangle, Integer> ancestors = new HashMap<>();
//Now lets count some facets!
PrefixTreeFacetCounter.compute(strategy, context, topAcceptDocs, inputShape, facetLevel, new PrefixTreeFacetCounter.FacetVisitor() {
@Override
public void visit(Cell cell, int count) {
final double heatMinX = heatmap.region.getMinX();
final Rectangle rect = (Rectangle) cell.getShape();
if (cell.getLevel() == facetLevel) {
//heatmap level; count it directly
//convert to col & row
int column;
if (rect.getMinX() >= heatMinX) {
column = (int) Math.round((rect.getMinX() - heatMinX) / cellWidth);
} else {
// due to dateline wrap
column = (int) Math.round((rect.getMinX() + 360 - heatMinX) / cellWidth);
}
int row = (int) Math.round((rect.getMinY() - heatMinY) / cellHeight);
// allows adjacent cells to overlap on the seam), so we need to skip them
if (column < 0 || column >= heatmap.columns || row < 0 || row >= heatmap.rows) {
return;
}
// increment
heatmap.counts[column * heatmap.rows + row] += count;
} else if (rect.relate(heatmap.region) == SpatialRelation.CONTAINS) {
//containing ancestor
allCellsAncestorCount[0] += count;
} else {
// ancestor
// note: not particularly efficient (possible put twice, and Integer wrapper); oh well
Integer existingCount = ancestors.put(rect, count);
if (existingCount != null) {
ancestors.put(rect, count + existingCount);
}
}
}
});
// Apply allCellsAncestorCount
if (allCellsAncestorCount[0] > 0) {
for (int i = 0; i < heatmap.counts.length; i++) {
heatmap.counts[i] += allCellsAncestorCount[0];
}
}
// Apply ancestors
// note: This approach isn't optimized for a ton of ancestor cells. We'll potentially increment the same cells
// multiple times in separate passes if any ancestors overlap. IF this poses a problem, we could optimize it
// with additional complication by keeping track of intervals in a sorted tree structure (possible TreeMap/Set)
// and iterate them cleverly such that we just make one pass at this stage.
//output of intersectInterval
int[] pair = new int[2];
for (Map.Entry<Rectangle, Integer> entry : ancestors.entrySet()) {
// from a cell (thus doesn't cross DL)
Rectangle rect = entry.getKey();
final int count = entry.getValue();
//note: we approach this in a way that eliminates int overflow/underflow (think huge cell, tiny heatmap)
intersectInterval(heatMinY, heatMaxY, cellHeight, rows, rect.getMinY(), rect.getMaxY(), pair);
final int startRow = pair[0];
final int endRow = pair[1];
if (!heatmap.region.getCrossesDateLine()) {
intersectInterval(heatMinX, heatMaxX, cellWidth, columns, rect.getMinX(), rect.getMaxX(), pair);
final int startCol = pair[0];
final int endCol = pair[1];
incrementRange(heatmap, startCol, endCol, startRow, endRow, count);
} else {
// note: the cell rect might intersect 2 disjoint parts of the heatmap, so we do the left & right separately
final int leftColumns = (int) Math.round((180 - heatMinX) / cellWidth);
final int rightColumns = heatmap.columns - leftColumns;
//left half of dateline:
if (rect.getMaxX() > heatMinX) {
intersectInterval(heatMinX, 180, cellWidth, leftColumns, rect.getMinX(), rect.getMaxX(), pair);
final int startCol = pair[0];
final int endCol = pair[1];
incrementRange(heatmap, startCol, endCol, startRow, endRow, count);
}
//right half of dateline
if (rect.getMinX() < heatMaxX) {
intersectInterval(-180, heatMaxX, cellWidth, rightColumns, rect.getMinX(), rect.getMaxX(), pair);
final int startCol = pair[0] + leftColumns;
final int endCol = pair[1] + leftColumns;
incrementRange(heatmap, startCol, endCol, startRow, endRow, count);
}
}
}
return heatmap;
}
use of org.apache.lucene.spatial.prefix.tree.CellIterator in project lucene-solr by apache.
the class NumberRangeFacetsTest method test.
@Repeat(iterations = 20)
@Test
public void test() throws IOException {
//generate test data
List<Shape> indexedShapes = new ArrayList<>();
final int numIndexedShapes = random().nextInt(15);
for (int i = 0; i < numIndexedShapes; i++) {
indexedShapes.add(randomShape());
}
//Main index loop:
for (int i = 0; i < indexedShapes.size(); i++) {
Shape shape = indexedShapes.get(i);
adoc("" + i, shape);
if (random().nextInt(10) == 0)
//intermediate commit, produces extra segments
commit();
}
//delete some documents randomly
for (int id = 0; id < indexedShapes.size(); id++) {
if (random().nextInt(10) == 0) {
deleteDoc("" + id);
indexedShapes.set(id, null);
}
}
commit();
//Main query loop:
for (int queryIdx = 0; queryIdx < 10; queryIdx++) {
preQueryHavoc();
// We need to have a facet range window to do the facets between (a start time & end time). We randomly
// pick a date, decide the level we want to facet on, and then pick a right end time that is up to 2 thousand
// values later.
int calFieldFacet = randomCalWindowField - 1;
if (calFieldFacet > 1 && rarely()) {
calFieldFacet--;
}
final Calendar leftCal = randomCalendar();
leftCal.add(calFieldFacet, -1 * randomInt(1000));
Calendar rightCal = (Calendar) leftCal.clone();
rightCal.add(calFieldFacet, randomInt(2000));
// Pick facet detail level based on cal field.
int detailLevel = tree.getTreeLevelForCalendarField(calFieldFacet);
if (detailLevel < 0) {
//no exact match
detailLevel = -1 * detailLevel;
}
//Randomly pick a filter/acceptDocs
Bits topAcceptDocs = null;
List<Integer> acceptFieldIds = new ArrayList<>();
if (usually()) {
// replace the list.
for (int i = 0; i < indexedShapes.size(); i++) {
if (indexedShapes.get(i) == null) {
// we deleted this one
continue;
}
acceptFieldIds.add(i);
}
Collections.shuffle(acceptFieldIds, random());
acceptFieldIds = acceptFieldIds.subList(0, randomInt(acceptFieldIds.size()));
if (!acceptFieldIds.isEmpty()) {
List<BytesRef> terms = new ArrayList<>();
for (Integer acceptDocId : acceptFieldIds) {
terms.add(new BytesRef(acceptDocId.toString()));
}
topAcceptDocs = searchForDocBits(new TermInSetQuery("id", terms));
}
}
//Lets do it!
NumberRangePrefixTree.NRShape facetRange = tree.toRangeShape(tree.toShape(leftCal), tree.toShape(rightCal));
Facets facets = ((NumberRangePrefixTreeStrategy) strategy).calcFacets(indexSearcher.getTopReaderContext(), topAcceptDocs, facetRange, detailLevel);
//System.out.println("Q: " + queryIdx + " " + facets);
//Verify results. We do it by looping over indexed shapes and reducing the facet counts.
Shape facetShapeRounded = facetRange.roundToLevel(detailLevel);
for (int indexedShapeId = 0; indexedShapeId < indexedShapes.size(); indexedShapeId++) {
if (topAcceptDocs != null && !acceptFieldIds.contains(indexedShapeId)) {
// this doc was filtered out via acceptDocs
continue;
}
Shape indexedShape = indexedShapes.get(indexedShapeId);
if (indexedShape == null) {
//was deleted
continue;
}
Shape indexedShapeRounded = ((NumberRangePrefixTree.NRShape) indexedShape).roundToLevel(detailLevel);
if (!indexedShapeRounded.relate(facetShapeRounded).intersects()) {
// no intersection at all
continue;
}
// walk the cells
final CellIterator cellIterator = tree.getTreeCellIterator(indexedShape, detailLevel);
while (cellIterator.hasNext()) {
Cell cell = cellIterator.next();
if (!cell.getShape().relate(facetShapeRounded).intersects()) {
//no intersection; prune
cellIterator.remove();
continue;
}
assert cell.getLevel() <= detailLevel;
if (cell.getLevel() == detailLevel) {
//count it
UnitNRShape shape = (UnitNRShape) cell.getShape();
//get parent
final UnitNRShape parentShape = shape.getShapeAtLevel(detailLevel - 1);
final Facets.FacetParentVal facetParentVal = facets.parents.get(parentShape);
assertNotNull(facetParentVal);
int index = shape.getValAtLevel(shape.getLevel());
assertNotNull(facetParentVal.childCounts);
assert facetParentVal.childCounts[index] > 0;
facetParentVal.childCounts[index]--;
} else if (cell.isLeaf()) {
//count it, and remove/prune.
if (cell.getLevel() < detailLevel - 1) {
assert facets.topLeaves > 0;
facets.topLeaves--;
} else {
UnitNRShape shape = (UnitNRShape) cell.getShape();
//get parent
final UnitNRShape parentShape = shape.getShapeAtLevel(detailLevel - 1);
final Facets.FacetParentVal facetParentVal = facets.parents.get(parentShape);
assertNotNull(facetParentVal);
assert facetParentVal.parentLeaves > 0;
facetParentVal.parentLeaves--;
}
cellIterator.remove();
}
}
}
// At this point; all counts should be down to zero.
assertTrue(facets.topLeaves == 0);
for (Facets.FacetParentVal facetParentVal : facets.parents.values()) {
assertTrue(facetParentVal.parentLeaves == 0);
if (facetParentVal.childCounts != null) {
for (int childCount : facetParentVal.childCounts) {
assertTrue(childCount == 0);
}
}
}
}
}
use of org.apache.lucene.spatial.prefix.tree.CellIterator in project lucene-solr by apache.
the class RandomSpatialOpFuzzyPrefixTreeTest method gridSnap.
// private Rectangle inset(Rectangle r) {
// //typically inset by 1 (whole numbers are easy to read)
// double d = Math.min(1.0, grid.getDistanceForLevel(grid.getMaxLevels()) / 4);
// return ctx.makeRectangle(r.getMinX() + d, r.getMaxX() - d, r.getMinY() + d, r.getMaxY() - d);
// }
protected Shape gridSnap(Shape snapMe) {
if (snapMe == null)
return null;
if (snapMe instanceof ShapePair) {
ShapePair me = (ShapePair) snapMe;
return new ShapePair(gridSnap(me.shape1), gridSnap(me.shape2), me.biasContainsThenWithin);
}
if (snapMe instanceof Point) {
snapMe = snapMe.getBoundingBox();
}
//The next 4 lines mimic PrefixTreeStrategy.createIndexableFields()
double distErrPct = ((PrefixTreeStrategy) strategy).getDistErrPct();
double distErr = SpatialArgs.calcDistanceFromErrPct(snapMe, distErrPct, ctx);
int detailLevel = grid.getLevelForDistance(distErr);
CellIterator cells = grid.getTreeCellIterator(snapMe, detailLevel);
//calc bounding box of cells.
List<Shape> cellShapes = new ArrayList<>(1024);
while (cells.hasNext()) {
Cell cell = cells.next();
if (!cell.isLeaf())
continue;
cellShapes.add(cell.getShape());
}
return new ShapeCollection<>(cellShapes, ctx).getBoundingBox();
}
Aggregations