use of org.apache.phoenix.util.PrefixByteDecoder in project phoenix by apache.
the class StatisticsWriter method addStats.
/**
* Update a list of statistics for a given region. If the UPDATE STATISTICS <tablename> query is issued then we use
* Upsert queries to update the table If the region gets splitted or the major compaction happens we update using
* HTable.put()
*
* @param tracker
* - the statistics tracker
* @param cfKey
* - the family for which the stats is getting collected.
* @param mutations
* - list of mutations that collects all the mutations to commit in a batch
* @throws IOException
* if we fail to do any of the puts. Any single failure will prevent any future attempts for the
* remaining list of stats to update
*/
@SuppressWarnings("deprecation")
public void addStats(StatisticsCollector tracker, ImmutableBytesPtr cfKey, List<Mutation> mutations) throws IOException {
if (tracker == null) {
return;
}
boolean useMaxTimeStamp = clientTimeStamp == DefaultStatisticsCollector.NO_TIMESTAMP;
long timeStamp = clientTimeStamp;
if (useMaxTimeStamp) {
// When using max timestamp, we write the update time later because we only know the ts
// now
timeStamp = tracker.getMaxTimeStamp();
mutations.add(getLastStatsUpdatedTimePut(timeStamp));
}
GuidePostsInfo gps = tracker.getGuidePosts(cfKey);
if (gps != null) {
long[] byteCounts = gps.getByteCounts();
long[] rowCounts = gps.getRowCounts();
ImmutableBytesWritable keys = gps.getGuidePosts();
boolean hasGuidePosts = keys.getLength() > 0;
if (hasGuidePosts) {
int guidePostCount = 0;
try (ByteArrayInputStream stream = new ByteArrayInputStream(keys.get(), keys.getOffset(), keys.getLength())) {
DataInput input = new DataInputStream(stream);
PrefixByteDecoder decoder = new PrefixByteDecoder(gps.getMaxLength());
do {
ImmutableBytesWritable ptr = decoder.decode(input);
addGuidepost(cfKey, mutations, ptr, byteCounts[guidePostCount], rowCounts[guidePostCount], timeStamp);
guidePostCount++;
} while (decoder != null);
} catch (EOFException e) {
// Ignore as this signifies we're done
}
// If we've written guideposts with a guidepost key, then delete the
// empty guidepost indicator that may have been written by other
// regions.
byte[] rowKey = StatisticsUtil.getRowKey(tableName, cfKey, ByteUtil.EMPTY_IMMUTABLE_BYTE_ARRAY);
Delete delete = new Delete(rowKey, timeStamp);
mutations.add(delete);
} else {
/*
* When there is not enough data in the region, we create a guide post with empty
* key with the estimated amount of data in it as the guide post width. We can't
* determine the expected number of rows here since we don't have the PTable and the
* associated schema available to make the row size estimate. We instead will
* compute it on the client side when reading out guideposts from the SYSTEM.STATS
* table in StatisticsUtil#readStatistics(HTableInterface statsHTable,
* GuidePostsKey key, long clientTimeStamp).
*/
addGuidepost(cfKey, mutations, ByteUtil.EMPTY_IMMUTABLE_BYTE_ARRAY, guidePostDepth, 0, timeStamp);
}
}
}
use of org.apache.phoenix.util.PrefixByteDecoder in project phoenix by apache.
the class BaseResultIterators method getParallelScans.
/**
* Compute the list of parallel scans to run for a given query. The inner scans
* may be concatenated together directly, while the other ones may need to be
* merge sorted, depending on the query.
* Also computes an estimated bytes scanned, rows scanned, and last update time
* of statistics. To compute correctly, we need to handle a couple of edge cases:
* 1) if a guidepost is equal to the start key of the scan.
* 2) If a guidepost is equal to the end region key.
* In both cases, we set a flag (delayAddingEst) which indicates that the previous
* gp should be use in our stats calculation. The normal case is that a gp is
* encountered which is in the scan range in which case it is simply added to
* our calculation.
* For the last update time, we use the min timestamp of the gp that are in
* range of the scans that will be issued. If we find no gp in the range, we use
* the gp in the first or last region of the scan. If we encounter a region with
* no gp, then we return a null value as an indication that we don't know with
* certainty when the stats were updated last. This handles the case of a split
* occurring for a large ingest with stats never having been calculated for the
* new region.
* @return list of parallel scans to run for a given query.
* @throws SQLException
*/
private List<List<Scan>> getParallelScans(byte[] startKey, byte[] stopKey) throws SQLException {
List<HRegionLocation> regionLocations = getRegionBoundaries(scanGrouper);
List<byte[]> regionBoundaries = toBoundaries(regionLocations);
ScanRanges scanRanges = context.getScanRanges();
PTable table = getTable();
boolean isSalted = table.getBucketNum() != null;
boolean isLocalIndex = table.getIndexType() == IndexType.LOCAL;
GuidePostsInfo gps = getGuidePosts();
// case when stats wasn't collected
hasGuidePosts = gps != GuidePostsInfo.NO_GUIDEPOST;
// Case when stats collection did run but there possibly wasn't enough data. In such a
// case we generate an empty guide post with the byte estimate being set as guide post
// width.
boolean emptyGuidePost = gps.isEmptyGuidePost();
byte[] startRegionBoundaryKey = startKey;
byte[] stopRegionBoundaryKey = stopKey;
int columnsInCommon = 0;
ScanRanges prefixScanRanges = ScanRanges.EVERYTHING;
boolean traverseAllRegions = isSalted || isLocalIndex;
if (isLocalIndex) {
// as we should always have a data plan when a local index is being used.
if (dataPlan != null && dataPlan.getTableRef().getTable().getType() != PTableType.INDEX) {
// Sanity check
prefixScanRanges = computePrefixScanRanges(dataPlan.getContext().getScanRanges(), columnsInCommon = computeColumnsInCommon());
KeyRange prefixRange = prefixScanRanges.getScanRange();
if (!prefixRange.lowerUnbound()) {
startRegionBoundaryKey = prefixRange.getLowerRange();
}
if (!prefixRange.upperUnbound()) {
stopRegionBoundaryKey = prefixRange.getUpperRange();
}
}
} else if (!traverseAllRegions) {
byte[] scanStartRow = scan.getStartRow();
if (scanStartRow.length != 0 && Bytes.compareTo(scanStartRow, startKey) > 0) {
startRegionBoundaryKey = startKey = scanStartRow;
}
byte[] scanStopRow = scan.getStopRow();
if (stopKey.length == 0 || (scanStopRow.length != 0 && Bytes.compareTo(scanStopRow, stopKey) < 0)) {
stopRegionBoundaryKey = stopKey = scanStopRow;
}
}
int regionIndex = 0;
int startRegionIndex = 0;
int stopIndex = regionBoundaries.size();
if (startRegionBoundaryKey.length > 0) {
startRegionIndex = regionIndex = getIndexContainingInclusive(regionBoundaries, startRegionBoundaryKey);
}
if (stopRegionBoundaryKey.length > 0) {
stopIndex = Math.min(stopIndex, regionIndex + getIndexContainingExclusive(regionBoundaries.subList(regionIndex, stopIndex), stopRegionBoundaryKey));
if (isLocalIndex) {
stopKey = regionLocations.get(stopIndex).getRegionInfo().getEndKey();
}
}
List<List<Scan>> parallelScans = Lists.newArrayListWithExpectedSize(stopIndex - regionIndex + 1);
ImmutableBytesWritable currentKey = new ImmutableBytesWritable(startKey);
int gpsSize = gps.getGuidePostsCount();
int estGuidepostsPerRegion = gpsSize == 0 ? 1 : gpsSize / regionLocations.size() + 1;
int keyOffset = 0;
ImmutableBytesWritable currentGuidePost = ByteUtil.EMPTY_IMMUTABLE_BYTE_ARRAY;
List<Scan> scans = Lists.newArrayListWithExpectedSize(estGuidepostsPerRegion);
ImmutableBytesWritable guidePosts = gps.getGuidePosts();
ByteArrayInputStream stream = null;
DataInput input = null;
PrefixByteDecoder decoder = null;
int guideIndex = 0;
GuidePostEstimate estimates = new GuidePostEstimate();
boolean gpsForFirstRegion = false;
boolean intersectWithGuidePosts = true;
// Maintain min ts for gps in first or last region outside of
// gps that are in the scan range. We'll use this if we find
// no gps in range.
long fallbackTs = Long.MAX_VALUE;
// Determination of whether of not we found a guidepost in
// every region between the start and stop key. If not, then
// we cannot definitively say at what time the guideposts
// were collected.
boolean gpsAvailableForAllRegions = true;
try {
boolean delayAddingEst = false;
ImmutableBytesWritable firstRegionStartKey = null;
if (gpsSize > 0) {
stream = new ByteArrayInputStream(guidePosts.get(), guidePosts.getOffset(), guidePosts.getLength());
input = new DataInputStream(stream);
decoder = new PrefixByteDecoder(gps.getMaxLength());
firstRegionStartKey = new ImmutableBytesWritable(regionLocations.get(regionIndex).getRegionInfo().getStartKey());
try {
int c;
// Continue walking guideposts until we get past the currentKey
while ((c = currentKey.compareTo(currentGuidePost = PrefixByteCodec.decode(decoder, input))) >= 0) {
// the first region.
if (!gpsForFirstRegion && firstRegionStartKey.compareTo(currentGuidePost) <= 0) {
gpsForFirstRegion = true;
}
// the min ts as a fallback for the time at which stas were calculated.
if (gpsForFirstRegion) {
fallbackTs = Math.min(fallbackTs, gps.getGuidePostTimestamps()[guideIndex]);
}
// Special case for gp == startKey in which case we want to
// count this gp (if it's in range) though we go past it.
delayAddingEst = (c == 0);
guideIndex++;
}
} catch (EOFException e) {
// expected. Thrown when we have decoded all guide posts.
intersectWithGuidePosts = false;
}
}
byte[] endRegionKey = regionLocations.get(stopIndex).getRegionInfo().getEndKey();
byte[] currentKeyBytes = currentKey.copyBytes();
intersectWithGuidePosts &= guideIndex < gpsSize;
// Merge bisect with guideposts for all but the last region
while (regionIndex <= stopIndex) {
HRegionLocation regionLocation = regionLocations.get(regionIndex);
HRegionInfo regionInfo = regionLocation.getRegionInfo();
byte[] currentGuidePostBytes = currentGuidePost.copyBytes();
byte[] endKey;
if (regionIndex == stopIndex) {
endKey = stopKey;
} else {
endKey = regionBoundaries.get(regionIndex);
}
if (isLocalIndex) {
// based on the start/stop key.
if (columnsInCommon > 0 && prefixScanRanges.useSkipScanFilter()) {
byte[] regionStartKey = regionInfo.getStartKey();
ImmutableBytesWritable ptr = context.getTempPtr();
clipKeyRangeBytes(prefixScanRanges.getSchema(), 0, columnsInCommon, regionStartKey, ptr, false);
regionStartKey = ByteUtil.copyKeyBytesIfNecessary(ptr);
// Prune this region if there's no intersection
if (!prefixScanRanges.intersectRegion(regionStartKey, regionInfo.getEndKey(), false)) {
currentKeyBytes = endKey;
regionIndex++;
continue;
}
}
keyOffset = ScanUtil.getRowKeyOffset(regionInfo.getStartKey(), regionInfo.getEndKey());
}
byte[] initialKeyBytes = currentKeyBytes;
int gpsComparedToEndKey = -1;
boolean everNotDelayed = false;
while (intersectWithGuidePosts && (endKey.length == 0 || (gpsComparedToEndKey = currentGuidePost.compareTo(endKey)) <= 0)) {
Scan newScan = scanRanges.intersectScan(scan, currentKeyBytes, currentGuidePostBytes, keyOffset, false);
if (newScan != null) {
ScanUtil.setLocalIndexAttributes(newScan, keyOffset, regionInfo.getStartKey(), regionInfo.getEndKey(), newScan.getStartRow(), newScan.getStopRow());
// gp estimates now that we know they are in range.
if (delayAddingEst) {
updateEstimates(gps, guideIndex - 1, estimates);
}
// current gp estimates.
if (!(delayAddingEst = gpsComparedToEndKey == 0)) {
updateEstimates(gps, guideIndex, estimates);
}
} else {
delayAddingEst = false;
}
everNotDelayed |= !delayAddingEst;
scans = addNewScan(parallelScans, scans, newScan, currentGuidePostBytes, false, regionLocation);
currentKeyBytes = currentGuidePostBytes;
try {
currentGuidePost = PrefixByteCodec.decode(decoder, input);
currentGuidePostBytes = currentGuidePost.copyBytes();
guideIndex++;
} catch (EOFException e) {
// We have read all guide posts
intersectWithGuidePosts = false;
}
}
boolean gpsInThisRegion = initialKeyBytes != currentKeyBytes;
if (!useStatsForParallelization) {
/*
* If we are not using stats for generating parallel scans, we need to reset the
* currentKey back to what it was at the beginning of the loop.
*/
currentKeyBytes = initialKeyBytes;
}
Scan newScan = scanRanges.intersectScan(scan, currentKeyBytes, endKey, keyOffset, true);
if (newScan != null) {
ScanUtil.setLocalIndexAttributes(newScan, keyOffset, regionInfo.getStartKey(), regionInfo.getEndKey(), newScan.getStartRow(), newScan.getStopRow());
// Boundary case of no GP in region after delaying adding of estimates
if (!gpsInThisRegion && delayAddingEst) {
updateEstimates(gps, guideIndex - 1, estimates);
gpsInThisRegion = true;
delayAddingEst = false;
}
} else if (!gpsInThisRegion) {
delayAddingEst = false;
}
scans = addNewScan(parallelScans, scans, newScan, endKey, true, regionLocation);
currentKeyBytes = endKey;
// We have a guide post in the region if the above loop was entered
// or if the current key is less than the region end key (since the loop
// may not have been entered if our scan end key is smaller than the
// first guide post in that region).
boolean gpsAfterStopKey = false;
gpsAvailableForAllRegions &= // GP in this region
(gpsInThisRegion && everNotDelayed) || // GP in first region (before start key)
(regionIndex == startRegionIndex && gpsForFirstRegion) || (gpsAfterStopKey = (// GP in last region (after stop key)
regionIndex == stopIndex && intersectWithGuidePosts && (// then check if gp is in the region
endRegionKey.length == 0 || currentGuidePost.compareTo(endRegionKey) < 0)));
if (gpsAfterStopKey) {
// If gp after stop key, but still in last region, track min ts as fallback
fallbackTs = Math.min(fallbackTs, gps.getGuidePostTimestamps()[guideIndex]);
}
regionIndex++;
}
if (scanRanges.isPointLookup()) {
this.estimatedRows = Long.valueOf(scanRanges.getPointLookupCount());
this.estimatedSize = this.estimatedRows * SchemaUtil.estimateRowSize(table);
this.estimateInfoTimestamp = computeMinTimestamp(gpsAvailableForAllRegions, estimates, fallbackTs);
} else if (emptyGuidePost) {
// In case of an empty guide post, we estimate the number of rows scanned by
// using the estimated row size
this.estimatedRows = (gps.getByteCounts()[0] / SchemaUtil.estimateRowSize(table));
this.estimatedSize = gps.getByteCounts()[0];
this.estimateInfoTimestamp = gps.getGuidePostTimestamps()[0];
} else if (hasGuidePosts) {
this.estimatedRows = estimates.rowsEstimate;
this.estimatedSize = estimates.bytesEstimate;
this.estimateInfoTimestamp = computeMinTimestamp(gpsAvailableForAllRegions, estimates, fallbackTs);
} else {
this.estimatedRows = null;
this.estimatedSize = null;
this.estimateInfoTimestamp = null;
}
if (!scans.isEmpty()) {
// Add any remaining scans
parallelScans.add(scans);
}
} finally {
if (stream != null)
Closeables.closeQuietly(stream);
}
sampleScans(parallelScans, this.plan.getStatement().getTableSamplingRate());
return parallelScans;
}
Aggregations