use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class MapOperator method cloneConfsForColPruning.
/**
* For each source table, combine the nested column pruning information from all its
* table scan descriptors and set it in a configuration copy. This is necessary since
* the configuration properties are set on a per-table basis, so we can't just use a
* single configuration for all the tables.
*/
private Map<String, Configuration> cloneConfsForColPruning(Configuration hconf) {
Map<String, Configuration> tableNameToConf = new HashMap<>();
for (Map.Entry<Path, List<String>> e : conf.getPathToAliases().entrySet()) {
List<String> aliases = e.getValue();
if (aliases == null || aliases.isEmpty()) {
continue;
}
String tableName = conf.getPathToPartitionInfo().get(e.getKey()).getTableName();
if (tableNameToConf.containsKey(tableName)) {
continue;
}
for (String alias : aliases) {
Operator<?> rootOp = conf.getAliasToWork().get(alias);
if (!(rootOp instanceof TableScanOperator)) {
continue;
}
TableScanDesc tableScanDesc = ((TableScanOperator) rootOp).getConf();
List<String> nestedColumnPaths = tableScanDesc.getNeededNestedColumnPaths();
if (nestedColumnPaths == null || nestedColumnPaths.isEmpty()) {
continue;
}
if (!tableNameToConf.containsKey(tableName)) {
Configuration clonedConf = new Configuration(hconf);
clonedConf.unset(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
clonedConf.unset(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
clonedConf.unset(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
tableNameToConf.put(tableName, clonedConf);
}
Configuration newConf = tableNameToConf.get(tableName);
ColumnProjectionUtils.appendReadColumns(newConf, tableScanDesc.getNeededColumnIDs(), tableScanDesc.getOutputColumnNames(), tableScanDesc.getNeededNestedColumnPaths());
}
}
// Assign tables without nested column pruning info to the default conf
for (PartitionDesc pd : conf.getPathToPartitionInfo().values()) {
if (!tableNameToConf.containsKey(pd.getTableName())) {
tableNameToConf.put(pd.getTableName(), hconf);
}
}
for (PartitionDesc pd : conf.getAliasToPartnInfo().values()) {
if (!tableNameToConf.containsKey(pd.getTableName())) {
tableNameToConf.put(pd.getTableName(), hconf);
}
}
return tableNameToConf;
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class SparkPlanGenerator method generateMapInput.
@SuppressWarnings("unchecked")
private MapInput generateMapInput(SparkPlan sparkPlan, MapWork mapWork) throws Exception {
JobConf jobConf = cloneJobConf(mapWork);
Class ifClass = getInputFormat(jobConf, mapWork);
sc.sc().setCallSite(CallSite.apply(mapWork.getName(), ""));
JavaPairRDD<WritableComparable, Writable> hadoopRDD;
if (mapWork.getNumMapTasks() != null) {
jobConf.setNumMapTasks(mapWork.getNumMapTasks());
hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class, mapWork.getNumMapTasks());
} else {
hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class);
}
boolean toCache = false;
String tables = mapWork.getAllRootOperators().stream().filter(op -> op instanceof TableScanOperator).map(ts -> ((TableScanDesc) ts.getConf()).getAlias()).collect(Collectors.joining(", "));
String rddName = mapWork.getName() + " (" + tables + ", " + hadoopRDD.getNumPartitions() + (toCache ? ", cached)" : ")");
// Caching is disabled for MapInput due to HIVE-8920
MapInput result = new MapInput(sparkPlan, hadoopRDD, toCache, rddName, mapWork);
return result;
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class TestGenTezWork method setUp.
/**
* @throws java.lang.Exception
*/
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
// Init conf
final HiveConf conf = new HiveConf(SemanticAnalyzer.class);
SessionState.start(conf);
// Init parse context
final ParseContext pctx = new ParseContext();
pctx.setContext(new Context(conf));
ctx = new GenTezProcContext(conf, pctx, Collections.EMPTY_LIST, new ArrayList<Task<?>>(), Collections.EMPTY_SET, Collections.EMPTY_SET);
proc = new GenTezWork(new GenTezUtils() {
@Override
protected void setupMapWork(MapWork mapWork, GenTezProcContext context, PrunedPartitionList partitions, TableScanOperator root, String alias) throws SemanticException {
LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
map.put("foo", root);
mapWork.setAliasToWork(map);
return;
}
});
CompilationOpContext cCtx = new CompilationOpContext();
fs = new FileSinkOperator(cCtx);
fs.setConf(new FileSinkDesc());
rs = new ReduceSinkOperator(cCtx);
rs.setConf(new ReduceSinkDesc());
TableDesc tableDesc = new TableDesc();
tableDesc.setProperties(new Properties());
rs.getConf().setKeySerializeInfo(tableDesc);
ts = new TableScanOperator(cCtx);
ts.setConf(new TableScanDesc(null));
ts.getChildOperators().add(rs);
rs.getParentOperators().add(ts);
rs.getChildOperators().add(fs);
fs.getParentOperators().add(rs);
ctx.preceedingWork = null;
ctx.currentRootOperator = ts;
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class TestSharedWorkOptimizer method getTsOp.
private TableScanOperator getTsOp() {
Table tblMetadata = new Table("db", "table");
TableScanDesc desc = new TableScanDesc("alias_" + cCtx.nextOperatorId(), tblMetadata);
Operator<TableScanDesc> ts = OperatorFactory.get(cCtx, desc);
return (TableScanOperator) ts;
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class TestOperatorSignature method getTsOp.
private Operator<TableScanDesc> getTsOp(int i) {
Table tblMetadata = new Table("db", "table");
TableScanDesc desc = new TableScanDesc("alias_" + cCtx.nextOperatorId(), tblMetadata);
List<ExprNodeDesc> as = Lists.newArrayList(new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(i)), new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "c1", "aa", false));
ExprNodeGenericFuncDesc f1 = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, udf, as);
desc.setFilterExpr(f1);
Operator<TableScanDesc> ts = OperatorFactory.get(cCtx, desc);
return ts;
}
Aggregations