use of com.mapr.db.scan.ScanRange in project drill by apache.
the class JsonTableGroupScan method getRegionsToScan.
protected NavigableMap<TabletFragmentInfo, String> getRegionsToScan(int scanRangeSizeMB) {
// If regionsToScan already computed, just return.
double estimatedRowCount = ROWCOUNT_UNKNOWN;
if (doNotAccessRegionsToScan == null) {
final Table t = this.formatPlugin.getJsonTableCache().getTable(scanSpec.getTableName(), scanSpec.getIndexDesc(), getUserName());
final MetaTable metaTable = t.getMetaTable();
QueryCondition scanSpecCondition = scanSpec.getCondition();
List<ScanRange> scanRanges = (scanSpecCondition == null) ? metaTable.getScanRanges(scanRangeSizeMB) : metaTable.getScanRanges(scanSpecCondition, scanRangeSizeMB);
logger.debug("getRegionsToScan() with scanSpec {}: table={}, index={}, condition={}, sizeMB={}, #ScanRanges={}", System.identityHashCode(scanSpec), scanSpec.getTableName(), scanSpec.getIndexName(), scanSpec.getCondition() == null ? "null" : scanSpec.getCondition(), scanRangeSizeMB, scanRanges == null ? "null" : scanRanges.size());
final TreeMap<TabletFragmentInfo, String> regionsToScan = new TreeMap<>();
if (isIndexScan()) {
String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName());
if (stats.isStatsAvailable()) {
estimatedRowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier);
}
} else {
if (stats.isStatsAvailable()) {
estimatedRowCount = stats.getRowCount(scanSpec.getCondition(), null);
}
}
// If limit pushdown has occurred - factor it in the rowcount
if (this.maxRecordsToRead > 0) {
estimatedRowCount = Math.min(estimatedRowCount, this.maxRecordsToRead);
}
// If the estimated row count > 0 then scan ranges must be > 0
Preconditions.checkState(estimatedRowCount == ROWCOUNT_UNKNOWN || estimatedRowCount == 0 || (scanRanges != null && scanRanges.size() > 0), String.format("#Scan ranges should be greater than 0 since estimated rowcount=[%f]", estimatedRowCount));
if (scanRanges != null && scanRanges.size() > 0) {
// set the start-row of the scanspec as the start-row of the first scan range
ScanRange firstRange = scanRanges.get(0);
QueryCondition firstCondition = firstRange.getCondition();
byte[] firstStartRow = ((ConditionImpl) firstCondition).getRowkeyRanges().get(0).getStartRow();
scanSpec.setStartRow(firstStartRow);
// set the stop-row of ScanSpec as the stop-row of the last scan range
ScanRange lastRange = scanRanges.get(scanRanges.size() - 1);
QueryCondition lastCondition = lastRange.getCondition();
List<RowkeyRange> rowkeyRanges = ((ConditionImpl) lastCondition).getRowkeyRanges();
byte[] lastStopRow = rowkeyRanges.get(rowkeyRanges.size() - 1).getStopRow();
scanSpec.setStopRow(lastStopRow);
for (ScanRange range : scanRanges) {
TabletInfoImpl tabletInfoImpl = (TabletInfoImpl) range;
regionsToScan.put(new TabletFragmentInfo(tabletInfoImpl), range.getLocations()[0]);
}
}
setRegionsToScan(regionsToScan);
}
return doNotAccessRegionsToScan;
}
use of com.mapr.db.scan.ScanRange in project drill by apache.
the class JsonTableRangePartitionFunction method initialize.
public void initialize(MapRDBFormatPlugin plugin) {
// get the table handle from the table cache
Table table = plugin.getJsonTableCache().getTable(tableName, userName);
// Get all scan ranges for the primary table.
// The reason is the row keys could typically belong to any one of the tablets of the table, so
// there is no use trying to get only limited set of scan ranges.
// NOTE: here we use the restrictedScanRangeSizeMB because the range partitioning should be parallelized
// based on the number of scan ranges on the RestrictedJsonTableGroupScan.
List<ScanRange> ranges = table.getMetaTable().getScanRanges(plugin.getRestrictedScanRangeSizeMB());
this.startKeys = Lists.newArrayList();
this.stopKeys = Lists.newArrayList();
logger.debug("Num scan ranges for table {} = {}", table.getName(), ranges.size());
int count = 0;
for (ScanRange r : ranges) {
QueryCondition condition = r.getCondition();
List<RowkeyRange> rowkeyRanges = ((ConditionImpl) condition).getRowkeyRanges();
byte[] start = rowkeyRanges.get(0).getStartRow();
byte[] stop = rowkeyRanges.get(rowkeyRanges.size() - 1).getStopRow();
Preconditions.checkNotNull(start, String.format("Encountered a null start key at position %d for scan range condition %s.", count, condition.toString()));
Preconditions.checkNotNull(stop, String.format("Encountered a null stop key at position %d for scan range condition %s.", count, condition.toString()));
if (count > 0) {
// after the first start key, rest should be non-empty
Preconditions.checkState(!(Bytes.equals(start, MapRConstants.EMPTY_BYTE_ARRAY)), String.format("Encountered an empty start key at position %d", count));
}
if (count < ranges.size() - 1) {
// except for the last stop key, rest should be non-empty
Preconditions.checkState(!(Bytes.equals(stop, MapRConstants.EMPTY_BYTE_ARRAY)), String.format("Encountered an empty stop key at position %d", count));
}
startKeys.add(start);
stopKeys.add(stop);
count++;
}
// check validity; only need to check one of the lists since they are populated together
Preconditions.checkArgument(startKeys.size() > 0, "Found empty list of start/stopKeys.");
Preconditions.checkState(startKeys.size() == ranges.size(), String.format("Mismatch between the lengths: num start keys = %d, num scan ranges = %d", startKeys.size(), ranges.size()));
Preconditions.checkState(stopKeys.size() == ranges.size(), String.format("Mismatch between the lengths: num stop keys = %d, num scan ranges = %d", stopKeys.size(), ranges.size()));
}
Aggregations