Search in sources :

Example 6 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class MapOperator method cloneConfsForColPruning.

/**
 * For each source table, combine the nested column pruning information from all its
 * table scan descriptors and set it in a configuration copy. This is necessary since
 * the configuration properties are set on a per-table basis, so we can't just use a
 * single configuration for all the tables.
 */
private Map<String, Configuration> cloneConfsForColPruning(Configuration hconf) {
    Map<String, Configuration> tableNameToConf = new HashMap<>();
    for (Map.Entry<Path, List<String>> e : conf.getPathToAliases().entrySet()) {
        List<String> aliases = e.getValue();
        if (aliases == null || aliases.isEmpty()) {
            continue;
        }
        String tableName = conf.getPathToPartitionInfo().get(e.getKey()).getTableName();
        if (tableNameToConf.containsKey(tableName)) {
            continue;
        }
        for (String alias : aliases) {
            Operator<?> rootOp = conf.getAliasToWork().get(alias);
            if (!(rootOp instanceof TableScanOperator)) {
                continue;
            }
            TableScanDesc tableScanDesc = ((TableScanOperator) rootOp).getConf();
            List<String> nestedColumnPaths = tableScanDesc.getNeededNestedColumnPaths();
            if (nestedColumnPaths == null || nestedColumnPaths.isEmpty()) {
                continue;
            }
            if (!tableNameToConf.containsKey(tableName)) {
                Configuration clonedConf = new Configuration(hconf);
                clonedConf.unset(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
                clonedConf.unset(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
                clonedConf.unset(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
                tableNameToConf.put(tableName, clonedConf);
            }
            Configuration newConf = tableNameToConf.get(tableName);
            ColumnProjectionUtils.appendReadColumns(newConf, tableScanDesc.getNeededColumnIDs(), tableScanDesc.getOutputColumnNames(), tableScanDesc.getNeededNestedColumnPaths());
        }
    }
    // Assign tables without nested column pruning info to the default conf
    for (PartitionDesc pd : conf.getPathToPartitionInfo().values()) {
        if (!tableNameToConf.containsKey(pd.getTableName())) {
            tableNameToConf.put(pd.getTableName(), hconf);
        }
    }
    for (PartitionDesc pd : conf.getAliasToPartnInfo().values()) {
        if (!tableNameToConf.containsKey(pd.getTableName())) {
            tableNameToConf.put(pd.getTableName(), hconf);
        }
    }
    return tableNameToConf;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ArrayList(java.util.ArrayList) List(java.util.List) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 7 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class SparkPlanGenerator method generateMapInput.

@SuppressWarnings("unchecked")
private MapInput generateMapInput(SparkPlan sparkPlan, MapWork mapWork) throws Exception {
    JobConf jobConf = cloneJobConf(mapWork);
    Class ifClass = getInputFormat(jobConf, mapWork);
    sc.sc().setCallSite(CallSite.apply(mapWork.getName(), ""));
    JavaPairRDD<WritableComparable, Writable> hadoopRDD;
    if (mapWork.getNumMapTasks() != null) {
        jobConf.setNumMapTasks(mapWork.getNumMapTasks());
        hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class, mapWork.getNumMapTasks());
    } else {
        hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class);
    }
    boolean toCache = false;
    String tables = mapWork.getAllRootOperators().stream().filter(op -> op instanceof TableScanOperator).map(ts -> ((TableScanDesc) ts.getConf()).getAlias()).collect(Collectors.joining(", "));
    String rddName = mapWork.getName() + " (" + tables + ", " + hadoopRDD.getNumPartitions() + (toCache ? ", cached)" : ")");
    // Caching is disabled for MapInput due to HIVE-8920
    MapInput result = new MapInput(sparkPlan, hadoopRDD, toCache, rddName, mapWork);
    return result;
}
Also used : StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) FileSystem(org.apache.hadoop.fs.FileSystem) CallSite(org.apache.spark.util.CallSite) LoggerFactory(org.slf4j.LoggerFactory) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) WritableComparable(org.apache.hadoop.io.WritableComparable) HashMap(java.util.HashMap) Writable(org.apache.hadoop.io.Writable) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) ExecReducer(org.apache.hadoop.hive.ql.exec.mr.ExecReducer) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) BucketizedHiveInputFormat(org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat) Logger(org.slf4j.Logger) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) SessionState(org.apache.hadoop.hive.ql.session.SessionState) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) JavaUtils(org.apache.hadoop.hive.common.JavaUtils) MergeFileWork(org.apache.hadoop.hive.ql.io.merge.MergeFileWork) Operator(org.apache.hadoop.hive.ql.exec.Operator) ExecMapper(org.apache.hadoop.hive.ql.exec.mr.ExecMapper) JobConf(org.apache.hadoop.mapred.JobConf) StatsFactory(org.apache.hadoop.hive.ql.stats.StatsFactory) List(java.util.List) SparkEdgeProperty(org.apache.hadoop.hive.ql.plan.SparkEdgeProperty) MergeFileOutputFormat(org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) Preconditions(com.google.common.base.Preconditions) FileOutputFormat(org.apache.hadoop.mapred.FileOutputFormat) MergeFileMapper(org.apache.hadoop.hive.ql.io.merge.MergeFileMapper) ErrorMsg(org.apache.hadoop.hive.ql.ErrorMsg) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) WritableComparable(org.apache.hadoop.io.WritableComparable) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Writable(org.apache.hadoop.io.Writable) JobConf(org.apache.hadoop.mapred.JobConf)

Example 8 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class TestGenTezWork method setUp.

/**
 * @throws java.lang.Exception
 */
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
    // Init conf
    final HiveConf conf = new HiveConf(SemanticAnalyzer.class);
    SessionState.start(conf);
    // Init parse context
    final ParseContext pctx = new ParseContext();
    pctx.setContext(new Context(conf));
    ctx = new GenTezProcContext(conf, pctx, Collections.EMPTY_LIST, new ArrayList<Task<?>>(), Collections.EMPTY_SET, Collections.EMPTY_SET);
    proc = new GenTezWork(new GenTezUtils() {

        @Override
        protected void setupMapWork(MapWork mapWork, GenTezProcContext context, PrunedPartitionList partitions, TableScanOperator root, String alias) throws SemanticException {
            LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
            map.put("foo", root);
            mapWork.setAliasToWork(map);
            return;
        }
    });
    CompilationOpContext cCtx = new CompilationOpContext();
    fs = new FileSinkOperator(cCtx);
    fs.setConf(new FileSinkDesc());
    rs = new ReduceSinkOperator(cCtx);
    rs.setConf(new ReduceSinkDesc());
    TableDesc tableDesc = new TableDesc();
    tableDesc.setProperties(new Properties());
    rs.getConf().setKeySerializeInfo(tableDesc);
    ts = new TableScanOperator(cCtx);
    ts.setConf(new TableScanDesc(null));
    ts.getChildOperators().add(rs);
    rs.getParentOperators().add(ts);
    rs.getChildOperators().add(fs);
    fs.getParentOperators().add(rs);
    ctx.preceedingWork = null;
    ctx.currentRootOperator = ts;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Before(org.junit.Before)

Example 9 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class TestSharedWorkOptimizer method getTsOp.

private TableScanOperator getTsOp() {
    Table tblMetadata = new Table("db", "table");
    TableScanDesc desc = new TableScanDesc("alias_" + cCtx.nextOperatorId(), tblMetadata);
    Operator<TableScanDesc> ts = OperatorFactory.get(cCtx, desc);
    return (TableScanOperator) ts;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc)

Example 10 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class TestOperatorSignature method getTsOp.

private Operator<TableScanDesc> getTsOp(int i) {
    Table tblMetadata = new Table("db", "table");
    TableScanDesc desc = new TableScanDesc("alias_" + cCtx.nextOperatorId(), tblMetadata);
    List<ExprNodeDesc> as = Lists.newArrayList(new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(i)), new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "c1", "aa", false));
    ExprNodeGenericFuncDesc f1 = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, udf, as);
    desc.setFilterExpr(f1);
    Operator<TableScanDesc> ts = OperatorFactory.get(cCtx, desc);
    return ts;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) Table(org.apache.hadoop.hive.ql.metadata.Table) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

TableScanDesc (org.apache.hadoop.hive.ql.plan.TableScanDesc)28 ArrayList (java.util.ArrayList)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 Table (org.apache.hadoop.hive.ql.metadata.Table)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)7 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)6 HashMap (java.util.HashMap)5 LinkedHashMap (java.util.LinkedHashMap)4 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)4 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)4 Serializable (java.io.Serializable)3 List (java.util.List)3 Map (java.util.Map)3 Path (org.apache.hadoop.fs.Path)3 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)3 Operator (org.apache.hadoop.hive.ql.exec.Operator)3 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3