Search in sources :

Example 11 with DbGroupScan

use of org.apache.drill.exec.physical.base.DbGroupScan in project drill by apache.

the class AbstractIndexPlanGenerator method convertRowType.

protected RelDataType convertRowType(RelDataType origRowType, RelDataTypeFactory typeFactory) {
    if (getRowKeyIndex(origRowType, origScan) >= 0) {
        // row key already present
        return origRowType;
    }
    List<RelDataTypeField> fields = new ArrayList<>();
    fields.addAll(origRowType.getFieldList());
    fields.add(new RelDataTypeFieldImpl(((DbGroupScan) IndexPlanUtils.getGroupScan(origScan)).getRowKeyName(), fields.size(), typeFactory.createSqlType(SqlTypeName.ANY)));
    return new RelRecordType(fields);
}
Also used : RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan) ArrayList(java.util.ArrayList) RelDataTypeFieldImpl(org.apache.calcite.rel.type.RelDataTypeFieldImpl) RelRecordType(org.apache.calcite.rel.type.RelRecordType)

Example 12 with DbGroupScan

use of org.apache.drill.exec.physical.base.DbGroupScan in project drill by apache.

the class MapRDBStatistics method initialize.

public boolean initialize(RexNode condition, DrillScanRelBase scanRel, IndexCallContext context) {
    GroupScan scan = IndexPlanUtils.getGroupScan(scanRel);
    PlannerSettings settings = PrelUtil.getPlannerSettings(scanRel.getCluster().getPlanner());
    rowKeyJoinBackIOFactor = settings.getIndexRowKeyJoinCostFactor();
    if (scan instanceof DbGroupScan) {
        String conditionAsStr = convertRexToString(condition, scanRel.getRowType());
        if (statsCache.get(conditionAsStr) == null) {
            IndexCollection indexes = ((DbGroupScan) scan).getSecondaryIndexCollection(scanRel);
            populateStats(condition, indexes, scanRel, context);
            logger.info("index_plan_info: initialize: scanRel #{} and groupScan {} got fulltable {}, statsCache: {}, fiStatsCache: {}", scanRel.getId(), System.identityHashCode(scan), fullTableScanPayload, statsCache, fIStatsCache);
            return true;
        }
    }
    return false;
}
Also used : DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan)

Example 13 with DbGroupScan

use of org.apache.drill.exec.physical.base.DbGroupScan in project drill by apache.

the class MapRDBIndexDescriptor method getCost.

@Override
public RelOptCost getCost(IndexProperties indexProps, RelOptPlanner planner, int numProjectedFields, GroupScan primaryTableGroupScan) {
    Preconditions.checkArgument(primaryTableGroupScan instanceof DbGroupScan);
    DbGroupScan dbGroupScan = (DbGroupScan) primaryTableGroupScan;
    DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
    double totalRows = indexProps.getTotalRows();
    double leadRowCount = indexProps.getLeadingSelectivity() * totalRows;
    double avgRowSize = indexProps.getAvgRowSize();
    if (indexProps.isCovering()) {
        // covering index
        // int numIndexCols = allFields.size();
        // for disk i/o, all index columns are going to be read into memory
        double numBlocks = Math.ceil((leadRowCount * avgRowSize) / pluginCost.getBlockSize(primaryTableGroupScan));
        double diskCost = numBlocks * pluginCost.getSequentialBlockReadCost(primaryTableGroupScan);
        // cpu cost is cost of filter evaluation for the remainder condition
        double cpuCost = 0.0;
        if (indexProps.getTotalRemainderFilter() != null) {
            cpuCost = leadRowCount * DrillCostBase.COMPARE_CPU_COST;
        }
        // TODO: add network cost once full table scan also considers network cost
        double networkCost = 0.0;
        return costFactory.makeCost(leadRowCount, cpuCost, diskCost, networkCost);
    } else {
        // non-covering index
        // int numIndexCols = allFields.size();
        double numBlocksIndex = Math.ceil((leadRowCount * avgRowSize) / pluginCost.getBlockSize(primaryTableGroupScan));
        double diskCostIndex = numBlocksIndex * pluginCost.getSequentialBlockReadCost(primaryTableGroupScan);
        // for the primary table join-back each row may belong to a different block, so in general num_blocks = num_rows;
        // however, num_blocks cannot exceed the total number of blocks of the table
        double totalBlocksPrimary = Math.ceil((dbGroupScan.getColumns().size() * pluginCost.getAverageColumnSize(primaryTableGroupScan) * totalRows) / pluginCost.getBlockSize(primaryTableGroupScan));
        double diskBlocksPrimary = Math.min(totalBlocksPrimary, leadRowCount);
        double diskCostPrimary = diskBlocksPrimary * pluginCost.getRandomBlockReadCost(primaryTableGroupScan);
        double diskCostTotal = diskCostIndex + diskCostPrimary;
        // cpu cost of remainder condition evaluation over the selected rows
        double cpuCost = 0.0;
        if (indexProps.getTotalRemainderFilter() != null) {
            cpuCost = leadRowCount * DrillCostBase.COMPARE_CPU_COST;
        }
        // TODO: add network cost once full table scan also considers network cost
        double networkCost = 0.0;
        return costFactory.makeCost(leadRowCount, cpuCost, diskCostTotal, networkCost);
    }
}
Also used : DrillCostFactory(org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory) DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan)

Example 14 with DbGroupScan

use of org.apache.drill.exec.physical.base.DbGroupScan in project drill by apache.

the class FunctionalIndexHelper method convertRowTypeForIndexScan.

/**
 * For IndexScan in non-covering case, rowType to return contains only row_key('_id') of primary table.
 * so the rowType for IndexScan should be converted from [Primary_table.row_key, primary_table.indexed_col]
 * to [indexTable.row_key(primary_table.indexed_col), indexTable.<primary_key.row_key> (Primary_table.row_key)]
 * This will impact the columns of scan, the rowType of ScanRel
 *
 * @param origScan
 * @param idxMarker  the IndexableExprMarker that has analyzed original index condition on top of index scan
 * @param idxScan
 * @return
 */
public static RelDataType convertRowTypeForIndexScan(DrillScanRelBase origScan, IndexableExprMarker idxMarker, IndexGroupScan idxScan, FunctionalIndexInfo functionInfo) {
    RelDataTypeFactory typeFactory = origScan.getCluster().getTypeFactory();
    List<RelDataTypeField> fields = new ArrayList<>();
    Set<SchemaPath> rowPaths = new LinkedHashSet<>();
    // row_key in the rowType of scan on primary table
    RelDataTypeField rowkey_primary;
    RelRecordType newRowType = null;
    DbGroupScan scan = (DbGroupScan) IndexPlanUtils.getGroupScan(origScan);
    // first add row_key of primary table,
    rowkey_primary = new RelDataTypeFieldImpl(scan.getRowKeyName(), fields.size(), typeFactory.createSqlType(SqlTypeName.ANY));
    fields.add(rowkey_primary);
    Map<RexNode, LogicalExpression> idxExprMap = idxMarker.getIndexableExpression();
    for (LogicalExpression indexedExpr : idxExprMap.values()) {
        if (indexedExpr instanceof SchemaPath) {
            rowPaths.add((SchemaPath) indexedExpr);
        } else if (indexedExpr instanceof CastExpression) {
            SchemaPath newPath = functionInfo.getNewPathFromExpr(indexedExpr);
            if (newPath != null) {
                rowPaths.add(newPath);
            }
        }
    }
    for (SchemaPath newPath : rowPaths) {
        fields.add(new RelDataTypeFieldImpl(newPath.getRootSegmentPath(), fields.size(), typeFactory.createSqlType(SqlTypeName.ANY)));
    }
    // update columns of groupscan accordingly
    Set<RelDataTypeField> rowfields = Sets.newLinkedHashSet();
    final List<SchemaPath> columns = Lists.newArrayList();
    for (RelDataTypeField f : fields) {
        SchemaPath path = SchemaPath.parseFromString(f.getName());
        rowfields.add(new RelDataTypeFieldImpl(path.getRootSegmentPath(), rowfields.size(), typeFactory.createSqlType(SqlTypeName.ANY)));
        columns.add(path);
    }
    idxScan.setColumns(columns);
    // rowtype does not take the whole path, but only the rootSegment of the SchemaPath
    newRowType = new RelRecordType(Lists.newArrayList(rowfields));
    return newRowType;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) RelRecordType(org.apache.calcite.rel.type.RelRecordType) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) SchemaPath(org.apache.drill.common.expression.SchemaPath) DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) RelDataTypeFieldImpl(org.apache.calcite.rel.type.RelDataTypeFieldImpl) CastExpression(org.apache.drill.common.expression.CastExpression) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

DbGroupScan (org.apache.drill.exec.physical.base.DbGroupScan)14 RexNode (org.apache.calcite.rex.RexNode)7 RelNode (org.apache.calcite.rel.RelNode)6 ArrayList (java.util.ArrayList)5 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)5 SchemaPath (org.apache.drill.common.expression.SchemaPath)5 ScanPrel (org.apache.drill.exec.planner.physical.ScanPrel)4 RelTraitSet (org.apache.calcite.plan.RelTraitSet)3 RelDataType (org.apache.calcite.rel.type.RelDataType)3 RelDataTypeFactory (org.apache.calcite.rel.type.RelDataTypeFactory)3 GroupScan (org.apache.drill.exec.physical.base.GroupScan)3 IndexDescriptor (org.apache.drill.exec.planner.index.IndexDescriptor)3 DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)3 DrillDistributionTrait (org.apache.drill.exec.planner.physical.DrillDistributionTrait)3 PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)3 ProjectPrel (org.apache.drill.exec.planner.physical.ProjectPrel)3 RelCollation (org.apache.calcite.rel.RelCollation)2 RelDataTypeFieldImpl (org.apache.calcite.rel.type.RelDataTypeFieldImpl)2 RelRecordType (org.apache.calcite.rel.type.RelRecordType)2 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)2