use of com.mapr.db.Table in project drill by apache.
the class JsonTableGroupScan method getRegionsToScan.
protected NavigableMap<TabletFragmentInfo, String> getRegionsToScan(int scanRangeSizeMB) {
// If regionsToScan already computed, just return.
double estimatedRowCount = ROWCOUNT_UNKNOWN;
if (doNotAccessRegionsToScan == null) {
final Table t = this.formatPlugin.getJsonTableCache().getTable(scanSpec.getTableName(), scanSpec.getIndexDesc(), getUserName());
final MetaTable metaTable = t.getMetaTable();
QueryCondition scanSpecCondition = scanSpec.getCondition();
List<ScanRange> scanRanges = (scanSpecCondition == null) ? metaTable.getScanRanges(scanRangeSizeMB) : metaTable.getScanRanges(scanSpecCondition, scanRangeSizeMB);
logger.debug("getRegionsToScan() with scanSpec {}: table={}, index={}, condition={}, sizeMB={}, #ScanRanges={}", System.identityHashCode(scanSpec), scanSpec.getTableName(), scanSpec.getIndexName(), scanSpec.getCondition() == null ? "null" : scanSpec.getCondition(), scanRangeSizeMB, scanRanges == null ? "null" : scanRanges.size());
final TreeMap<TabletFragmentInfo, String> regionsToScan = new TreeMap<>();
if (isIndexScan()) {
String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName());
if (stats.isStatsAvailable()) {
estimatedRowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier);
}
} else {
if (stats.isStatsAvailable()) {
estimatedRowCount = stats.getRowCount(scanSpec.getCondition(), null);
}
}
// If limit pushdown has occurred - factor it in the rowcount
if (this.maxRecordsToRead > 0) {
estimatedRowCount = Math.min(estimatedRowCount, this.maxRecordsToRead);
}
// If the estimated row count > 0 then scan ranges must be > 0
Preconditions.checkState(estimatedRowCount == ROWCOUNT_UNKNOWN || estimatedRowCount == 0 || (scanRanges != null && scanRanges.size() > 0), String.format("#Scan ranges should be greater than 0 since estimated rowcount=[%f]", estimatedRowCount));
if (scanRanges != null && scanRanges.size() > 0) {
// set the start-row of the scanspec as the start-row of the first scan range
ScanRange firstRange = scanRanges.get(0);
QueryCondition firstCondition = firstRange.getCondition();
byte[] firstStartRow = ((ConditionImpl) firstCondition).getRowkeyRanges().get(0).getStartRow();
scanSpec.setStartRow(firstStartRow);
// set the stop-row of ScanSpec as the stop-row of the last scan range
ScanRange lastRange = scanRanges.get(scanRanges.size() - 1);
QueryCondition lastCondition = lastRange.getCondition();
List<RowkeyRange> rowkeyRanges = ((ConditionImpl) lastCondition).getRowkeyRanges();
byte[] lastStopRow = rowkeyRanges.get(rowkeyRanges.size() - 1).getStopRow();
scanSpec.setStopRow(lastStopRow);
for (ScanRange range : scanRanges) {
TabletInfoImpl tabletInfoImpl = (TabletInfoImpl) range;
regionsToScan.put(new TabletFragmentInfo(tabletInfoImpl), range.getLocations()[0]);
}
}
setRegionsToScan(regionsToScan);
}
return doNotAccessRegionsToScan;
}
use of com.mapr.db.Table in project drill by apache.
the class JsonTableGroupScan method getFirstKeyEstimatedStatsInternal.
/**
* Get the estimated statistics after applying the {@link QueryCondition} condition
* @param condition filter to apply
* @param index to use for generating the estimate
* @param scanRel the current scan rel
* @return {@link MapRDBStatisticsPayload} statistics
*/
private MapRDBStatisticsPayload getFirstKeyEstimatedStatsInternal(QueryCondition condition, IndexDesc index, RelNode scanRel) {
// If no index is specified, get it from the primary table
if (index == null && scanSpec.isSecondaryIndex()) {
// table = MapRDB.getTable(scanSpec.getPrimaryTablePath());
throw new UnsupportedOperationException("getFirstKeyEstimatedStats should be invoked on primary table");
}
// Get the index table or primary table and use the DB API to get the estimated number of rows. For size estimates,
// we assume that all the columns would be read from the disk.
final Table table = this.formatPlugin.getJsonTableCache().getTable(scanSpec.getTableName(), index, getUserName());
if (table != null) {
// Factor reflecting confidence in the DB estimates. If a table has few tablets, the tablet-level stats
// might be off. The decay scalingFactor will reduce estimates when one tablet represents a significant percentage
// of the entire table.
double scalingFactor = 1.0;
boolean isFullScan = false;
final MetaTable metaTable = table.getMetaTable();
com.mapr.db.scan.ScanStats stats = (condition == null) ? metaTable.getScanStats() : metaTable.getScanStats(condition);
if (index == null && condition != null) {
// Given table condition might not be on leading column. Check if the rowcount matches full table rows.
// In that case no leading key present or does not prune enough. Treat it like so.
com.mapr.db.scan.ScanStats noConditionPTabStats = metaTable.getScanStats();
if (stats.getEstimatedNumRows() == noConditionPTabStats.getEstimatedNumRows()) {
isFullScan = true;
}
}
// should be selected. So the scalingFactor should not reduce the returned rows
if (condition != null && !isFullScan) {
double forcedScalingFactor = PrelUtil.getSettings(scanRel.getCluster()).getIndexStatsRowCountScalingFactor();
// For 2 or less matching tablets, the error is assumed to be 50%. The Sqrt gives the decaying scalingFactor
if (stats.getTabletCount() > 2) {
double accuracy = 1.0 - (2.0 / stats.getTabletCount());
scalingFactor = Math.min(1.0, 1.0 / Math.sqrt(1.0 / accuracy));
} else {
scalingFactor = 0.5;
}
if (forcedScalingFactor < 1.0 && metaTable.getScanStats().getTabletCount() < PluginConstants.JSON_TABLE_NUM_TABLETS_PER_INDEX_DEFAULT) {
// User forced confidence scalingFactor for small tables (assumed as less than 32 tablets (~512 MB))
scalingFactor = forcedScalingFactor;
}
}
logger.info("index_plan_info: getEstimatedRowCount obtained from DB Client for {}: indexName: {}, indexInfo: {}, " + "condition: {} rowCount: {}, avgRowSize: {}, estimatedSize {}, tabletCount {}, totalTabletCount {}, " + "scalingFactor {}", this, (index == null ? "null" : index.getIndexName()), (index == null ? "null" : index.getIndexInfo()), (condition == null ? "null" : condition.toString()), stats.getEstimatedNumRows(), (stats.getEstimatedNumRows() == 0 ? 0 : stats.getEstimatedSize() / stats.getEstimatedNumRows()), stats.getEstimatedSize(), stats.getTabletCount(), metaTable.getScanStats().getTabletCount(), scalingFactor);
return new MapRDBStatisticsPayload(scalingFactor * stats.getEstimatedNumRows(), scalingFactor * stats.getEstimatedNumRows(), ((stats.getEstimatedNumRows() == 0 ? 0 : (double) stats.getEstimatedSize() / stats.getEstimatedNumRows())));
} else {
logger.info("index_plan_info: getEstimatedRowCount: {} indexName: {}, indexInfo: {}, " + "condition: {} rowCount: UNKNOWN, avgRowSize: UNKNOWN", this, (index == null ? "null" : index.getIndexName()), (index == null ? "null" : index.getIndexInfo()), (condition == null ? "null" : condition.toString()));
return new MapRDBStatisticsPayload(ROWCOUNT_UNKNOWN, ROWCOUNT_UNKNOWN, AVG_ROWSIZE_UNKNOWN);
}
}
use of com.mapr.db.Table in project drill by apache.
the class JsonTableGroupScan method getAverageRowSizeStats.
/**
* Get the estimated average rowsize. DO NOT call this API directly.
* Call the stats API instead which modifies the counts based on preference options.
* @param index to use for generating the estimate
* @return row count post filtering
*/
public MapRDBStatisticsPayload getAverageRowSizeStats(IndexDescriptor index) {
IndexDesc indexDesc = null;
double avgRowSize = AVG_ROWSIZE_UNKNOWN;
if (index != null) {
indexDesc = (IndexDesc) ((MapRDBIndexDescriptor) index).getOriginalDesc();
}
// If no index is specified, get it from the primary table
if (indexDesc == null && scanSpec.isSecondaryIndex()) {
throw new UnsupportedOperationException("getAverageRowSizeStats should be invoked on primary table");
}
// Get the index table or primary table and use the DB API to get the estimated number of rows. For size estimates,
// we assume that all the columns would be read from the disk.
final Table table = this.formatPlugin.getJsonTableCache().getTable(scanSpec.getTableName(), indexDesc, getUserName());
if (table != null) {
final MetaTable metaTable = table.getMetaTable();
if (metaTable != null) {
avgRowSize = metaTable.getAverageRowSize();
}
}
logger.debug("index_plan_info: getEstimatedRowCount obtained from DB Client for {}: indexName: {}, indexInfo: {}, " + "avgRowSize: {}, estimatedSize {}", this, (indexDesc == null ? "null" : indexDesc.getIndexName()), (indexDesc == null ? "null" : indexDesc.getIndexInfo()), avgRowSize, fullTableEstimatedSize);
return new MapRDBStatisticsPayload(ROWCOUNT_UNKNOWN, ROWCOUNT_UNKNOWN, avgRowSize);
}
use of com.mapr.db.Table in project drill by apache.
the class JsonTableRangePartitionFunction method initialize.
public void initialize(MapRDBFormatPlugin plugin) {
// get the table handle from the table cache
Table table = plugin.getJsonTableCache().getTable(tableName, userName);
// Get all scan ranges for the primary table.
// The reason is the row keys could typically belong to any one of the tablets of the table, so
// there is no use trying to get only limited set of scan ranges.
// NOTE: here we use the restrictedScanRangeSizeMB because the range partitioning should be parallelized
// based on the number of scan ranges on the RestrictedJsonTableGroupScan.
List<ScanRange> ranges = table.getMetaTable().getScanRanges(plugin.getRestrictedScanRangeSizeMB());
this.startKeys = Lists.newArrayList();
this.stopKeys = Lists.newArrayList();
logger.debug("Num scan ranges for table {} = {}", table.getName(), ranges.size());
int count = 0;
for (ScanRange r : ranges) {
QueryCondition condition = r.getCondition();
List<RowkeyRange> rowkeyRanges = ((ConditionImpl) condition).getRowkeyRanges();
byte[] start = rowkeyRanges.get(0).getStartRow();
byte[] stop = rowkeyRanges.get(rowkeyRanges.size() - 1).getStopRow();
Preconditions.checkNotNull(start, String.format("Encountered a null start key at position %d for scan range condition %s.", count, condition.toString()));
Preconditions.checkNotNull(stop, String.format("Encountered a null stop key at position %d for scan range condition %s.", count, condition.toString()));
if (count > 0) {
// after the first start key, rest should be non-empty
Preconditions.checkState(!(Bytes.equals(start, MapRConstants.EMPTY_BYTE_ARRAY)), String.format("Encountered an empty start key at position %d", count));
}
if (count < ranges.size() - 1) {
// except for the last stop key, rest should be non-empty
Preconditions.checkState(!(Bytes.equals(stop, MapRConstants.EMPTY_BYTE_ARRAY)), String.format("Encountered an empty stop key at position %d", count));
}
startKeys.add(start);
stopKeys.add(stop);
count++;
}
// check validity; only need to check one of the lists since they are populated together
Preconditions.checkArgument(startKeys.size() > 0, "Found empty list of start/stopKeys.");
Preconditions.checkState(startKeys.size() == ranges.size(), String.format("Mismatch between the lengths: num start keys = %d, num scan ranges = %d", startKeys.size(), ranges.size()));
Preconditions.checkState(stopKeys.size() == ranges.size(), String.format("Mismatch between the lengths: num stop keys = %d, num scan ranges = %d", stopKeys.size(), ranges.size()));
}
use of com.mapr.db.Table in project drill by apache.
the class TestEncodedFieldPaths method setup_TestEncodedFieldPaths.
@BeforeClass
public static void setup_TestEncodedFieldPaths() throws Exception {
try (Table table = DBTests.createOrReplaceTable(TABLE_NAME, ImmutableMap.of("codes", "codes"))) {
tableCreated = true;
tablePath = table.getPath().toUri().getPath();
DBTests.createIndex(TABLE_NAME, INDEX_NAME, new String[] { "age" }, new String[] { "name.last", "data.salary" });
DBTests.admin().getTableIndexes(table.getPath(), true);
try (final InputStream in = TestEncodedFieldPaths.class.getResourceAsStream(JSON_FILE_URL);
final DocumentStream stream = Json.newDocumentStream(in)) {
table.insertOrReplace(stream);
table.flush();
}
// wait for the indexes to sync
DBTests.waitForRowCount(table.getPath(), 5, INDEX_FLUSH_TIMEOUT);
DBTests.waitForIndexFlush(table.getPath(), INDEX_FLUSH_TIMEOUT);
} finally {
test("ALTER SESSION SET `planner.disable_full_table_scan` = true");
}
}
Aggregations