use of org.apache.phoenix.schema.stats.GuidePostsInfo in project phoenix by apache.
the class TestUtil method getGuidePostsList.
public static Collection<GuidePostsInfo> getGuidePostsList(Connection conn, String tableName, String pkCol, byte[] lowerRange, byte[] upperRange, String whereClauseSuffix) throws SQLException {
String whereClauseStart = (lowerRange == null && upperRange == null ? "" : " WHERE " + ((lowerRange != null ? (pkCol + " >= ? " + (upperRange != null ? " AND " : "")) : "") + (upperRange != null ? (pkCol + " < ?") : "")));
String whereClause = whereClauseSuffix == null ? whereClauseStart : whereClauseStart.length() == 0 ? (" WHERE " + whereClauseSuffix) : (" AND " + whereClauseSuffix);
String query = "SELECT /*+ NO_INDEX */ COUNT(*) FROM " + tableName + whereClause;
PhoenixPreparedStatement pstmt = conn.prepareStatement(query).unwrap(PhoenixPreparedStatement.class);
if (lowerRange != null) {
pstmt.setBytes(1, lowerRange);
}
if (upperRange != null) {
pstmt.setBytes(lowerRange != null ? 2 : 1, upperRange);
}
pstmt.execute();
TableRef tableRef = pstmt.getQueryPlan().getTableRef();
PhoenixConnection pconn = conn.unwrap(PhoenixConnection.class);
PTable table = tableRef.getTable();
GuidePostsInfo info = pconn.getQueryServices().getTableStats(new GuidePostsKey(table.getName().getBytes(), SchemaUtil.getEmptyColumnFamily(table)));
return Collections.singletonList(info);
}
use of org.apache.phoenix.schema.stats.GuidePostsInfo in project phoenix by apache.
the class SkipScanBigFilterTest method testIntersect.
private void testIntersect(byte[][] regionBoundaries, byte[][] guidePosts) throws Exception {
String ddl = "create table PERF.BIG_OLAP_DOC (\n" + "client_id integer not null\n" + ",customer_id integer\n" + ",time_id integer not null\n" + ",conversion_type_id integer not null\n" + ",device_type varchar(16)\n" + ",keyword_id bigint not null\n" + ",creative_id bigint not null\n" + ",placement_id bigint not null\n" + ",product_target_id bigint not null\n" + ",network varchar(7)\n" + ",impressions decimal(18, 4)\n" + ",publisher_clicks decimal(18, 4)\n" + ",publisher_cost decimal(18, 4)\n" + ",conversions decimal(18, 4)\n" + ",revenue decimal(18, 4)\n" + " constraint perf_fact_pk primary key (client_id, time_id, conversion_type_id, device_type, keyword_id, creative_id, placement_id, product_target_id))SALT_BUCKETS=10";
Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
Connection conn = DriverManager.getConnection(getUrl(), props);
StringBuilder ddlBuf = new StringBuilder(ddl + " SPLIT ON (");
for (int i = 0; i < regionBoundaries.length; i++) {
ddlBuf.append("?,");
}
ddlBuf.setCharAt(ddlBuf.length() - 1, ')');
;
PreparedStatement stmt = conn.prepareStatement(ddlBuf.toString());
int i = 1;
for (byte[] boundary : regionBoundaries) {
stmt.setBytes(i++, boundary);
}
stmt.execute();
final PTable table = conn.unwrap(PhoenixConnection.class).getTable(new PTableKey(null, "PERF.BIG_OLAP_DOC"));
GuidePostsInfoBuilder gpWriter = new GuidePostsInfoBuilder();
for (byte[] gp : guidePosts) {
gpWriter.trackGuidePost(new ImmutableBytesWritable(gp), 1000, 0, 0);
}
GuidePostsInfo info = gpWriter.build();
PhoenixConnection pConn = conn.unwrap(PhoenixConnection.class);
pConn.addTable(table, System.currentTimeMillis());
((ConnectionlessQueryServicesImpl) pConn.getQueryServices()).addTableStats(new GuidePostsKey(table.getName().getBytes(), QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES), info);
String query = "SELECT count(1) cnt,\n" + " coalesce(SUM(impressions), 0.0) AS \"impressions\",\n" + " coalesce(SUM(publisher_clicks), 0.0) AS \"pub_clicks\",\n" + " coalesce(SUM(publisher_cost), 0.0) AS \"pub_cost\",\n" + " coalesce(SUM(conversions), 0.0) AS \"conversions\",\n" + " coalesce(SUM(revenue), 0.0) AS \"revenue\" \n" + " FROM perf.big_olap_doc\n" + " WHERE time_id between 3000 and 3700\n" + " AND network in ('SEARCH')\n" + " AND conversion_type_id = 1\n" + " AND client_id = 10724\n" + " AND device_type in ('MOBILE','DESKTOP','OTHER','TABLET')\n" + " AND keyword_id in (\n" + "613214369, 613217307, 613247509, 613248897, 613250382, 613250387, 613252322, 613260252, 613261753, 613261754, 613261759, \n" + "613261770, 613261873, 613261884, 613261885, 613261888, 613261889, 613261892, 613261897, 613261913, 613261919, 613261927, \n" + "614496021, 843606367, 843606967, 843607021, 843607033, 843607089, 1038731600, 1038731672, 1038731673, 1038731675, \n" + "1038731684, 1038731693, 1046990487, 1046990488, 1046990499, 1046990505, 1046990506, 1049724722, 1051109548, 1051311275, \n" + "1051311904, 1060574377, 1060574395, 1060574506, 1060574562, 1115915938, 1115915939, 1115915941, 1116310571, 1367495544, \n" + "1367495545, 1367497297, 1367497298, 1367497299, 1367497300, 1367497303, 1367497313, 1367497813, 1367497816, 1367497818, \n" + "1367497821, 1367497822, 1367497823, 1624976423, 1624976451, 1624976457, 3275636061, 3275640505, 3275645765, 3275645807, \n" + "3275649138, 3275651456, 3275651460, 3275651478, 3275651479, 3275654566, 3275654568, 3275654570, 3275654575, 3275659612, \n" + "3275659616, 3275659620, 3275668880, 3275669693, 3275675627, 3275675634, 3275677479, 3275677504, 3275678855, 3275679524, \n" + "3275679532, 3275680014, 3275682307, 3275682308, 3275682309, 3275682310, 3275682420, 3275682423, 3275682436, 3275682448, \n" + "3275682460, 3275682462, 3275682474, 3275684831, 3275688903, 3275694023, 3275694025, 3275694027, 3275695054, 3275695056,\n" + "3275695062, 3275699512, 3275699514, 3275699518, 3275701682, 3275701683, 3275701685, 3275701688, 3275703633, 3275703634, \n" + "3275703635, 3275703636, 3275703638, 3275703639, 3275704860, 3275704861, 3275764577, 3275797149, 3275798566, 3275798567, \n" + "3275798568, 3275798592, 3275931147, 3275942728, 3275945337, 3275945338, 3275945339, 3275945340, 3275945342, 3275945344, \n" + "3275946319, 3275946322, 3275946324, 3275946643, 3275949495, 3275949498, 3275949500, 3275950250, 3275955128, 3275955129, \n" + "3275955130, 3427017435, 3427017450, 3438304254, 3438304257, 3447068169, 3505227849, 3505227890, 3505556908, 3506351285, \n" + "3506351389, 3506351398, 3506351468, 3510037138, 3510038610, 3545590644, 3545594378, 3545595073, 3545595318, 3545595506, \n" + "3545597841, 3545598818, 3545599658, 3545599663, 3545601215, 3556080898, 3556080980, 3556080999, 3556081323, 3565122663, \n" + "3565122679, 3565122801, 3565122858, 3565122908, 3565122929, 3565122952, 3565122984, 3565123028, 3565123047, 3565123048, \n" + "3565123203, 3565123230, 3949988054, 3949988056, 3949988070, 3972992248, 3972992252, 3972992254, 3972992257, 3972992263, \n" + "3972992267, 3972992268, 3972992269, 3972992270, 3972992274, 3972992275, 3972992277, 3972992281, 3972992293, 3972992298, \n" + "3972992299, 3972992305, 3972992307, 3972992313, 3972992316, 3972992322, 3972992338, 3978471261, 3978471272, 4266318185, \n" + "4298107404, 4308853119, 4308853123, 4308853500, 4451174646, 4451174656, 4451174701, 4569827278, 4569827284, 4569827287, \n" + "4569827379, 4569827523, 4569827524, 4896589676, 4979049725, 5054587609, 5136433884, 5362640372, 5393109964, 5393405364, \n" + "5393405365, 5393405620, 5393405625, 5393405675, 5393405677, 5393405858, 5393405970)";
QueryPlan plan = conn.createStatement().unwrap(PhoenixStatement.class).compileQuery(query);
plan.iterator();
}
use of org.apache.phoenix.schema.stats.GuidePostsInfo in project phoenix by apache.
the class BaseResultIterators method getParallelScans.
/**
* Compute the list of parallel scans to run for a given query. The inner scans
* may be concatenated together directly, while the other ones may need to be
* merge sorted, depending on the query.
* Also computes an estimated bytes scanned, rows scanned, and last update time
* of statistics. To compute correctly, we need to handle a couple of edge cases:
* 1) if a guidepost is equal to the start key of the scan.
* 2) If a guidepost is equal to the end region key.
* In both cases, we set a flag (delayAddingEst) which indicates that the previous
* gp should be use in our stats calculation. The normal case is that a gp is
* encountered which is in the scan range in which case it is simply added to
* our calculation.
* For the last update time, we use the min timestamp of the gp that are in
* range of the scans that will be issued. If we find no gp in the range, we use
* the gp in the first or last region of the scan. If we encounter a region with
* no gp, then we return a null value as an indication that we don't know with
* certainty when the stats were updated last. This handles the case of a split
* occurring for a large ingest with stats never having been calculated for the
* new region.
* @return list of parallel scans to run for a given query.
* @throws SQLException
*/
private List<List<Scan>> getParallelScans(byte[] startKey, byte[] stopKey) throws SQLException {
List<HRegionLocation> regionLocations = getRegionBoundaries(scanGrouper);
List<byte[]> regionBoundaries = toBoundaries(regionLocations);
ScanRanges scanRanges = context.getScanRanges();
PTable table = getTable();
boolean isSalted = table.getBucketNum() != null;
boolean isLocalIndex = table.getIndexType() == IndexType.LOCAL;
GuidePostsInfo gps = getGuidePosts();
// case when stats wasn't collected
hasGuidePosts = gps != GuidePostsInfo.NO_GUIDEPOST;
// Case when stats collection did run but there possibly wasn't enough data. In such a
// case we generate an empty guide post with the byte estimate being set as guide post
// width.
boolean emptyGuidePost = gps.isEmptyGuidePost();
byte[] startRegionBoundaryKey = startKey;
byte[] stopRegionBoundaryKey = stopKey;
int columnsInCommon = 0;
ScanRanges prefixScanRanges = ScanRanges.EVERYTHING;
boolean traverseAllRegions = isSalted || isLocalIndex;
if (isLocalIndex) {
// as we should always have a data plan when a local index is being used.
if (dataPlan != null && dataPlan.getTableRef().getTable().getType() != PTableType.INDEX) {
// Sanity check
prefixScanRanges = computePrefixScanRanges(dataPlan.getContext().getScanRanges(), columnsInCommon = computeColumnsInCommon());
KeyRange prefixRange = prefixScanRanges.getScanRange();
if (!prefixRange.lowerUnbound()) {
startRegionBoundaryKey = prefixRange.getLowerRange();
}
if (!prefixRange.upperUnbound()) {
stopRegionBoundaryKey = prefixRange.getUpperRange();
}
}
} else if (!traverseAllRegions) {
byte[] scanStartRow = scan.getStartRow();
if (scanStartRow.length != 0 && Bytes.compareTo(scanStartRow, startKey) > 0) {
startRegionBoundaryKey = startKey = scanStartRow;
}
byte[] scanStopRow = scan.getStopRow();
if (stopKey.length == 0 || (scanStopRow.length != 0 && Bytes.compareTo(scanStopRow, stopKey) < 0)) {
stopRegionBoundaryKey = stopKey = scanStopRow;
}
}
int regionIndex = 0;
int startRegionIndex = 0;
int stopIndex = regionBoundaries.size();
if (startRegionBoundaryKey.length > 0) {
startRegionIndex = regionIndex = getIndexContainingInclusive(regionBoundaries, startRegionBoundaryKey);
}
if (stopRegionBoundaryKey.length > 0) {
stopIndex = Math.min(stopIndex, regionIndex + getIndexContainingExclusive(regionBoundaries.subList(regionIndex, stopIndex), stopRegionBoundaryKey));
if (isLocalIndex) {
stopKey = regionLocations.get(stopIndex).getRegionInfo().getEndKey();
}
}
List<List<Scan>> parallelScans = Lists.newArrayListWithExpectedSize(stopIndex - regionIndex + 1);
ImmutableBytesWritable currentKey = new ImmutableBytesWritable(startKey);
int gpsSize = gps.getGuidePostsCount();
int estGuidepostsPerRegion = gpsSize == 0 ? 1 : gpsSize / regionLocations.size() + 1;
int keyOffset = 0;
ImmutableBytesWritable currentGuidePost = ByteUtil.EMPTY_IMMUTABLE_BYTE_ARRAY;
List<Scan> scans = Lists.newArrayListWithExpectedSize(estGuidepostsPerRegion);
ImmutableBytesWritable guidePosts = gps.getGuidePosts();
ByteArrayInputStream stream = null;
DataInput input = null;
PrefixByteDecoder decoder = null;
int guideIndex = 0;
GuidePostEstimate estimates = new GuidePostEstimate();
boolean gpsForFirstRegion = false;
boolean intersectWithGuidePosts = true;
// Maintain min ts for gps in first or last region outside of
// gps that are in the scan range. We'll use this if we find
// no gps in range.
long fallbackTs = Long.MAX_VALUE;
// Determination of whether of not we found a guidepost in
// every region between the start and stop key. If not, then
// we cannot definitively say at what time the guideposts
// were collected.
boolean gpsAvailableForAllRegions = true;
try {
boolean delayAddingEst = false;
ImmutableBytesWritable firstRegionStartKey = null;
if (gpsSize > 0) {
stream = new ByteArrayInputStream(guidePosts.get(), guidePosts.getOffset(), guidePosts.getLength());
input = new DataInputStream(stream);
decoder = new PrefixByteDecoder(gps.getMaxLength());
firstRegionStartKey = new ImmutableBytesWritable(regionLocations.get(regionIndex).getRegionInfo().getStartKey());
try {
int c;
// Continue walking guideposts until we get past the currentKey
while ((c = currentKey.compareTo(currentGuidePost = PrefixByteCodec.decode(decoder, input))) >= 0) {
// the first region.
if (!gpsForFirstRegion && firstRegionStartKey.compareTo(currentGuidePost) <= 0) {
gpsForFirstRegion = true;
}
// the min ts as a fallback for the time at which stas were calculated.
if (gpsForFirstRegion) {
fallbackTs = Math.min(fallbackTs, gps.getGuidePostTimestamps()[guideIndex]);
}
// Special case for gp == startKey in which case we want to
// count this gp (if it's in range) though we go past it.
delayAddingEst = (c == 0);
guideIndex++;
}
} catch (EOFException e) {
// expected. Thrown when we have decoded all guide posts.
intersectWithGuidePosts = false;
}
}
byte[] endRegionKey = regionLocations.get(stopIndex).getRegionInfo().getEndKey();
byte[] currentKeyBytes = currentKey.copyBytes();
intersectWithGuidePosts &= guideIndex < gpsSize;
// Merge bisect with guideposts for all but the last region
while (regionIndex <= stopIndex) {
HRegionLocation regionLocation = regionLocations.get(regionIndex);
HRegionInfo regionInfo = regionLocation.getRegionInfo();
byte[] currentGuidePostBytes = currentGuidePost.copyBytes();
byte[] endKey;
if (regionIndex == stopIndex) {
endKey = stopKey;
} else {
endKey = regionBoundaries.get(regionIndex);
}
if (isLocalIndex) {
// based on the start/stop key.
if (columnsInCommon > 0 && prefixScanRanges.useSkipScanFilter()) {
byte[] regionStartKey = regionInfo.getStartKey();
ImmutableBytesWritable ptr = context.getTempPtr();
clipKeyRangeBytes(prefixScanRanges.getSchema(), 0, columnsInCommon, regionStartKey, ptr, false);
regionStartKey = ByteUtil.copyKeyBytesIfNecessary(ptr);
// Prune this region if there's no intersection
if (!prefixScanRanges.intersectRegion(regionStartKey, regionInfo.getEndKey(), false)) {
currentKeyBytes = endKey;
regionIndex++;
continue;
}
}
keyOffset = ScanUtil.getRowKeyOffset(regionInfo.getStartKey(), regionInfo.getEndKey());
}
byte[] initialKeyBytes = currentKeyBytes;
int gpsComparedToEndKey = -1;
boolean everNotDelayed = false;
while (intersectWithGuidePosts && (endKey.length == 0 || (gpsComparedToEndKey = currentGuidePost.compareTo(endKey)) <= 0)) {
Scan newScan = scanRanges.intersectScan(scan, currentKeyBytes, currentGuidePostBytes, keyOffset, false);
if (newScan != null) {
ScanUtil.setLocalIndexAttributes(newScan, keyOffset, regionInfo.getStartKey(), regionInfo.getEndKey(), newScan.getStartRow(), newScan.getStopRow());
// gp estimates now that we know they are in range.
if (delayAddingEst) {
updateEstimates(gps, guideIndex - 1, estimates);
}
// current gp estimates.
if (!(delayAddingEst = gpsComparedToEndKey == 0)) {
updateEstimates(gps, guideIndex, estimates);
}
} else {
delayAddingEst = false;
}
everNotDelayed |= !delayAddingEst;
scans = addNewScan(parallelScans, scans, newScan, currentGuidePostBytes, false, regionLocation);
currentKeyBytes = currentGuidePostBytes;
try {
currentGuidePost = PrefixByteCodec.decode(decoder, input);
currentGuidePostBytes = currentGuidePost.copyBytes();
guideIndex++;
} catch (EOFException e) {
// We have read all guide posts
intersectWithGuidePosts = false;
}
}
boolean gpsInThisRegion = initialKeyBytes != currentKeyBytes;
if (!useStatsForParallelization) {
/*
* If we are not using stats for generating parallel scans, we need to reset the
* currentKey back to what it was at the beginning of the loop.
*/
currentKeyBytes = initialKeyBytes;
}
Scan newScan = scanRanges.intersectScan(scan, currentKeyBytes, endKey, keyOffset, true);
if (newScan != null) {
ScanUtil.setLocalIndexAttributes(newScan, keyOffset, regionInfo.getStartKey(), regionInfo.getEndKey(), newScan.getStartRow(), newScan.getStopRow());
// Boundary case of no GP in region after delaying adding of estimates
if (!gpsInThisRegion && delayAddingEst) {
updateEstimates(gps, guideIndex - 1, estimates);
gpsInThisRegion = true;
delayAddingEst = false;
}
} else if (!gpsInThisRegion) {
delayAddingEst = false;
}
scans = addNewScan(parallelScans, scans, newScan, endKey, true, regionLocation);
currentKeyBytes = endKey;
// We have a guide post in the region if the above loop was entered
// or if the current key is less than the region end key (since the loop
// may not have been entered if our scan end key is smaller than the
// first guide post in that region).
boolean gpsAfterStopKey = false;
gpsAvailableForAllRegions &= // GP in this region
(gpsInThisRegion && everNotDelayed) || // GP in first region (before start key)
(regionIndex == startRegionIndex && gpsForFirstRegion) || (gpsAfterStopKey = (// GP in last region (after stop key)
regionIndex == stopIndex && intersectWithGuidePosts && (// then check if gp is in the region
endRegionKey.length == 0 || currentGuidePost.compareTo(endRegionKey) < 0)));
if (gpsAfterStopKey) {
// If gp after stop key, but still in last region, track min ts as fallback
fallbackTs = Math.min(fallbackTs, gps.getGuidePostTimestamps()[guideIndex]);
}
regionIndex++;
}
if (scanRanges.isPointLookup()) {
this.estimatedRows = Long.valueOf(scanRanges.getPointLookupCount());
this.estimatedSize = this.estimatedRows * SchemaUtil.estimateRowSize(table);
this.estimateInfoTimestamp = computeMinTimestamp(gpsAvailableForAllRegions, estimates, fallbackTs);
} else if (emptyGuidePost) {
// In case of an empty guide post, we estimate the number of rows scanned by
// using the estimated row size
this.estimatedRows = (gps.getByteCounts()[0] / SchemaUtil.estimateRowSize(table));
this.estimatedSize = gps.getByteCounts()[0];
this.estimateInfoTimestamp = gps.getGuidePostTimestamps()[0];
} else if (hasGuidePosts) {
this.estimatedRows = estimates.rowsEstimate;
this.estimatedSize = estimates.bytesEstimate;
this.estimateInfoTimestamp = computeMinTimestamp(gpsAvailableForAllRegions, estimates, fallbackTs);
} else {
this.estimatedRows = null;
this.estimatedSize = null;
this.estimateInfoTimestamp = null;
}
if (!scans.isEmpty()) {
// Add any remaining scans
parallelScans.add(scans);
}
} finally {
if (stream != null)
Closeables.closeQuietly(stream);
}
sampleScans(parallelScans, this.plan.getStatement().getTableSamplingRate());
return parallelScans;
}
Aggregations