use of org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree in project lucene-solr by apache.
the class SpatialDocMaker method makeRPTStrategy.
protected RecursivePrefixTreeStrategy makeRPTStrategy(String spatialField, Config config, Map<String, String> configMap, SpatialContext ctx) {
//A factory for the prefix tree grid
SpatialPrefixTree grid = SpatialPrefixTreeFactory.makeSPT(configMap, null, ctx);
RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, spatialField);
strategy.setPointsOnly(config.get("spatial.docPointsOnly", false));
final boolean pruneLeafyBranches = config.get("spatial.pruneLeafyBranches", true);
if (grid instanceof PackedQuadPrefixTree) {
((PackedQuadPrefixTree) grid).setPruneLeafyBranches(pruneLeafyBranches);
//always leave it to packed grid, even though it isn't the same
strategy.setPruneLeafyBranches(false);
} else {
strategy.setPruneLeafyBranches(pruneLeafyBranches);
}
int prefixGridScanLevel = config.get("query.spatial.prefixGridScanLevel", -4);
if (prefixGridScanLevel < 0)
prefixGridScanLevel = grid.getMaxLevels() + prefixGridScanLevel;
strategy.setPrefixGridScanLevel(prefixGridScanLevel);
//doc & query; a default
double distErrPct = config.get("spatial.distErrPct", .025);
strategy.setDistErrPct(distErrPct);
return strategy;
}
use of org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree in project lucene-solr by apache.
the class SpatialExample method init.
protected void init() {
//Typical geospatial context
// These can also be constructed from SpatialContextFactory
this.ctx = SpatialContext.GEO;
//results in sub-meter precision for geohash
int maxLevels = 11;
//TODO demo lookup by detail distance
// This can also be constructed from SpatialPrefixTreeFactory
SpatialPrefixTree grid = new GeohashPrefixTree(ctx, maxLevels);
this.strategy = new RecursivePrefixTreeStrategy(grid, "myGeoField");
this.directory = new RAMDirectory();
}
use of org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree in project lucene-solr by apache.
the class DistanceStrategyTest method parameters.
@ParametersFactory(argumentFormatting = "strategy=%s")
public static Iterable<Object[]> parameters() {
List<Object[]> ctorArgs = new ArrayList<>();
SpatialContext ctx = SpatialContext.GEO;
SpatialPrefixTree grid;
SpatialStrategy strategy;
grid = new QuadPrefixTree(ctx, 25);
strategy = new RecursivePrefixTreeStrategy(grid, "recursive_quad");
ctorArgs.add(new Object[] { strategy.getFieldName(), strategy });
grid = new GeohashPrefixTree(ctx, 12);
strategy = new TermQueryPrefixTreeStrategy(grid, "termquery_geohash");
ctorArgs.add(new Object[] { strategy.getFieldName(), strategy });
grid = new PackedQuadPrefixTree(ctx, 25);
strategy = new RecursivePrefixTreeStrategy(grid, "recursive_packedquad");
ctorArgs.add(new Object[] { strategy.getFieldName(), strategy });
strategy = PointVectorStrategy.newInstance(ctx, "pointvector");
ctorArgs.add(new Object[] { strategy.getFieldName(), strategy });
// Can't test this without un-inverting since PVS legacy config didn't have docValues.
// However, note that Solr's tests use UninvertingReader and thus test this.
// strategy = PointVectorStrategy.newLegacyInstance(ctx, "pointvector_legacy");
// ctorArgs.add(new Object[]{strategy.getFieldName(), strategy});
strategy = BBoxStrategy.newInstance(ctx, "bbox");
ctorArgs.add(new Object[] { strategy.getFieldName(), strategy });
strategy = new SerializedDVStrategy(ctx, "serialized");
ctorArgs.add(new Object[] { strategy.getFieldName(), strategy });
return ctorArgs;
}
use of org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree in project lucene-solr by apache.
the class PrefixTreeFacetCounter method compute.
/** Lower-level per-leaf segment method. */
public static void compute(final PrefixTreeStrategy strategy, final LeafReaderContext context, final Bits acceptDocs, final Shape queryShape, final int facetLevel, final FacetVisitor facetVisitor) throws IOException {
if (acceptDocs != null && acceptDocs.length() != context.reader().maxDoc()) {
throw new IllegalArgumentException("acceptDocs bits length " + acceptDocs.length() + " != leaf maxdoc " + context.reader().maxDoc());
}
final SpatialPrefixTree tree = strategy.getGrid();
//scanLevel is an optimization knob of AbstractVisitingPrefixTreeFilter. It's unlikely
// another scanLevel would be much faster and it tends to be a risky knob (can help a little, can hurt a ton).
// TODO use RPT's configured scan level? Do we know better here? Hard to say.
final int scanLevel = tree.getMaxLevels();
//AbstractVisitingPrefixTreeFilter is a Lucene Filter. We don't need a filter; we use it for its great prefix-tree
// traversal code. TODO consider refactoring if/when it makes sense (more use cases than this)
new AbstractVisitingPrefixTreeQuery(queryShape, strategy.getFieldName(), tree, facetLevel, scanLevel) {
@Override
public String toString(String field) {
//un-used
return "anonPrefixTreeQuery";
}
@Override
public DocIdSet getDocIdSet(LeafReaderContext contexts) throws IOException {
//same thing, FYI. (constant)
assert facetLevel == super.detailLevel;
return new VisitorTemplate(context) {
@Override
protected void start() throws IOException {
facetVisitor.startOfSegment();
}
@Override
protected DocIdSet finish() throws IOException {
//unused;
return null;
}
@Override
protected boolean visitPrefix(Cell cell) throws IOException {
// At facetLevel...
if (cell.getLevel() == facetLevel) {
// Count docs
//we're not a leaf but we treat it as such at facet level
visitLeaf(cell);
//don't descend further; this is enough detail
return false;
}
//TODO this opt should move to VisitorTemplate (which contains an optimization TODO to this effect)
if (cell.getLevel() == facetLevel - 1 || termsEnum.docFreq() == 1) {
if (!hasDocsAtThisTerm()) {
return false;
}
}
return true;
}
@Override
protected void visitLeaf(Cell cell) throws IOException {
final int count = countDocsAtThisTerm();
if (count > 0) {
facetVisitor.visit(cell, count);
}
}
private int countDocsAtThisTerm() throws IOException {
if (acceptDocs == null) {
return termsEnum.docFreq();
}
int count = 0;
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (acceptDocs.get(postingsEnum.docID()) == false) {
continue;
}
count++;
}
return count;
}
private boolean hasDocsAtThisTerm() throws IOException {
if (acceptDocs == null) {
return true;
}
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
int nextDoc = postingsEnum.nextDoc();
while (nextDoc != DocIdSetIterator.NO_MORE_DOCS && acceptDocs.get(nextDoc) == false) {
nextDoc = postingsEnum.nextDoc();
}
return nextDoc != DocIdSetIterator.NO_MORE_DOCS;
}
}.getDocIdSet();
}
}.getDocIdSet(context);
}
use of org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree in project lucene-solr by apache.
the class HeatmapFacetCounter method calcFacets.
/**
* Calculates spatial 2D facets (aggregated counts) in a grid, sometimes called a heatmap.
* Facet computation is implemented by navigating the underlying indexed terms efficiently. If you don't know exactly
* what facetLevel to go to for a given input box but you have some sense of how many cells there should be relative
* to the size of the shape, then consider using the logic that {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy}
* uses when approximating what level to go to when indexing a shape given a distErrPct.
*
* @param context the IndexReader's context
* @param topAcceptDocs a Bits to limit counted docs. If null, live docs are counted.
* @param inputShape the shape to gather grid squares for; typically a {@link Rectangle}.
* The <em>actual</em> heatmap area will usually be larger since the cells on the edge that overlap
* are returned. We always return a rectangle of integers even if the inputShape isn't a rectangle
* -- the non-intersecting cells will all be 0.
* If null is given, the entire world is assumed.
* @param facetLevel the target depth (detail) of cells.
* @param maxCells the maximum number of cells to return. If the cells exceed this count, an
*/
public static Heatmap calcFacets(PrefixTreeStrategy strategy, IndexReaderContext context, Bits topAcceptDocs, Shape inputShape, final int facetLevel, int maxCells) throws IOException {
if (maxCells > (MAX_ROWS_OR_COLUMNS * MAX_ROWS_OR_COLUMNS)) {
throw new IllegalArgumentException("maxCells (" + maxCells + ") should be <= " + MAX_ROWS_OR_COLUMNS);
}
if (inputShape == null) {
inputShape = strategy.getSpatialContext().getWorldBounds();
}
final Rectangle inputRect = inputShape.getBoundingBox();
//First get the rect of the cell at the bottom-left at depth facetLevel
final SpatialPrefixTree grid = strategy.getGrid();
final SpatialContext ctx = grid.getSpatialContext();
final Point cornerPt = ctx.makePoint(inputRect.getMinX(), inputRect.getMinY());
final CellIterator cellIterator = grid.getTreeCellIterator(cornerPt, facetLevel);
Cell cornerCell = null;
while (cellIterator.hasNext()) {
cornerCell = cellIterator.next();
}
assert cornerCell != null && cornerCell.getLevel() == facetLevel : "Cell not at target level: " + cornerCell;
final Rectangle cornerRect = (Rectangle) cornerCell.getShape();
assert cornerRect.hasArea();
//Now calculate the number of columns and rows necessary to cover the inputRect
//note: we might change this below...
double heatMinX = cornerRect.getMinX();
final double cellWidth = cornerRect.getWidth();
final Rectangle worldRect = ctx.getWorldBounds();
final int columns = calcRowsOrCols(cellWidth, heatMinX, inputRect.getWidth(), inputRect.getMinX(), worldRect.getWidth());
final double heatMinY = cornerRect.getMinY();
final double cellHeight = cornerRect.getHeight();
final int rows = calcRowsOrCols(cellHeight, heatMinY, inputRect.getHeight(), inputRect.getMinY(), worldRect.getHeight());
assert rows > 0 && columns > 0;
if (columns > MAX_ROWS_OR_COLUMNS || rows > MAX_ROWS_OR_COLUMNS || columns * rows > maxCells) {
throw new IllegalArgumentException("Too many cells (" + columns + " x " + rows + ") for level " + facetLevel + " shape " + inputRect);
}
//Create resulting heatmap bounding rectangle & Heatmap object.
final double halfCellWidth = cellWidth / 2.0;
// if X world-wraps, use world bounds' range
if (columns * cellWidth + halfCellWidth > worldRect.getWidth()) {
heatMinX = worldRect.getMinX();
}
double heatMaxX = heatMinX + columns * cellWidth;
if (Math.abs(heatMaxX - worldRect.getMaxX()) < halfCellWidth) {
//numeric conditioning issue
heatMaxX = worldRect.getMaxX();
} else if (heatMaxX > worldRect.getMaxX()) {
//wraps dateline (won't happen if !geo)
heatMaxX = heatMaxX - worldRect.getMaxX() + worldRect.getMinX();
}
final double halfCellHeight = cellHeight / 2.0;
double heatMaxY = heatMinY + rows * cellHeight;
if (Math.abs(heatMaxY - worldRect.getMaxY()) < halfCellHeight) {
//numeric conditioning issue
heatMaxY = worldRect.getMaxY();
}
final Heatmap heatmap = new Heatmap(columns, rows, ctx.makeRectangle(heatMinX, heatMaxX, heatMinY, heatMaxY));
if (topAcceptDocs instanceof Bits.MatchNoBits) {
// short-circuit
return heatmap;
}
//All ancestor cell counts (of facetLevel) will be captured during facet visiting and applied later. If the data is
// just points then there won't be any ancestors.
//Facet count of ancestors covering all of the heatmap:
// single-element array so it can be accumulated in the inner class
int[] allCellsAncestorCount = new int[1];
//All other ancestors:
Map<Rectangle, Integer> ancestors = new HashMap<>();
//Now lets count some facets!
PrefixTreeFacetCounter.compute(strategy, context, topAcceptDocs, inputShape, facetLevel, new PrefixTreeFacetCounter.FacetVisitor() {
@Override
public void visit(Cell cell, int count) {
final double heatMinX = heatmap.region.getMinX();
final Rectangle rect = (Rectangle) cell.getShape();
if (cell.getLevel() == facetLevel) {
//heatmap level; count it directly
//convert to col & row
int column;
if (rect.getMinX() >= heatMinX) {
column = (int) Math.round((rect.getMinX() - heatMinX) / cellWidth);
} else {
// due to dateline wrap
column = (int) Math.round((rect.getMinX() + 360 - heatMinX) / cellWidth);
}
int row = (int) Math.round((rect.getMinY() - heatMinY) / cellHeight);
// allows adjacent cells to overlap on the seam), so we need to skip them
if (column < 0 || column >= heatmap.columns || row < 0 || row >= heatmap.rows) {
return;
}
// increment
heatmap.counts[column * heatmap.rows + row] += count;
} else if (rect.relate(heatmap.region) == SpatialRelation.CONTAINS) {
//containing ancestor
allCellsAncestorCount[0] += count;
} else {
// ancestor
// note: not particularly efficient (possible put twice, and Integer wrapper); oh well
Integer existingCount = ancestors.put(rect, count);
if (existingCount != null) {
ancestors.put(rect, count + existingCount);
}
}
}
});
// Apply allCellsAncestorCount
if (allCellsAncestorCount[0] > 0) {
for (int i = 0; i < heatmap.counts.length; i++) {
heatmap.counts[i] += allCellsAncestorCount[0];
}
}
// Apply ancestors
// note: This approach isn't optimized for a ton of ancestor cells. We'll potentially increment the same cells
// multiple times in separate passes if any ancestors overlap. IF this poses a problem, we could optimize it
// with additional complication by keeping track of intervals in a sorted tree structure (possible TreeMap/Set)
// and iterate them cleverly such that we just make one pass at this stage.
//output of intersectInterval
int[] pair = new int[2];
for (Map.Entry<Rectangle, Integer> entry : ancestors.entrySet()) {
// from a cell (thus doesn't cross DL)
Rectangle rect = entry.getKey();
final int count = entry.getValue();
//note: we approach this in a way that eliminates int overflow/underflow (think huge cell, tiny heatmap)
intersectInterval(heatMinY, heatMaxY, cellHeight, rows, rect.getMinY(), rect.getMaxY(), pair);
final int startRow = pair[0];
final int endRow = pair[1];
if (!heatmap.region.getCrossesDateLine()) {
intersectInterval(heatMinX, heatMaxX, cellWidth, columns, rect.getMinX(), rect.getMaxX(), pair);
final int startCol = pair[0];
final int endCol = pair[1];
incrementRange(heatmap, startCol, endCol, startRow, endRow, count);
} else {
// note: the cell rect might intersect 2 disjoint parts of the heatmap, so we do the left & right separately
final int leftColumns = (int) Math.round((180 - heatMinX) / cellWidth);
final int rightColumns = heatmap.columns - leftColumns;
//left half of dateline:
if (rect.getMaxX() > heatMinX) {
intersectInterval(heatMinX, 180, cellWidth, leftColumns, rect.getMinX(), rect.getMaxX(), pair);
final int startCol = pair[0];
final int endCol = pair[1];
incrementRange(heatmap, startCol, endCol, startRow, endRow, count);
}
//right half of dateline
if (rect.getMinX() < heatMaxX) {
intersectInterval(-180, heatMaxX, cellWidth, rightColumns, rect.getMinX(), rect.getMaxX(), pair);
final int startCol = pair[0] + leftColumns;
final int endCol = pair[1] + leftColumns;
incrementRange(heatmap, startCol, endCol, startRow, endRow, count);
}
}
}
return heatmap;
}
Aggregations