use of org.apache.hadoop.hive.ql.metadata.Table in project hive by apache.
the class TextMetaDataFormatter method showTableStatus.
@Override
public void showTableStatus(DataOutputStream outStream, Hive db, HiveConf conf, List<Table> tbls, Map<String, String> part, Partition par) throws HiveException {
try {
Iterator<Table> iterTables = tbls.iterator();
while (iterTables.hasNext()) {
// create a row per table name
Table tbl = iterTables.next();
String tableName = tbl.getTableName();
String tblLoc = null;
String inputFormattCls = null;
String outputFormattCls = null;
if (part != null) {
if (par != null) {
if (par.getLocation() != null) {
tblLoc = par.getDataLocation().toString();
}
inputFormattCls = par.getInputFormatClass().getName();
outputFormattCls = par.getOutputFormatClass().getName();
}
} else {
if (tbl.getPath() != null) {
tblLoc = tbl.getDataLocation().toString();
}
inputFormattCls = tbl.getInputFormatClass().getName();
outputFormattCls = tbl.getOutputFormatClass().getName();
}
String owner = tbl.getOwner();
List<FieldSchema> cols = tbl.getCols();
String ddlCols = MetaStoreUtils.getDDLFromFieldSchema("columns", cols);
boolean isPartitioned = tbl.isPartitioned();
String partitionCols = "";
if (isPartitioned) {
partitionCols = MetaStoreUtils.getDDLFromFieldSchema("partition_columns", tbl.getPartCols());
}
outStream.write(("tableName:" + tableName).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("owner:" + owner).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("location:" + tblLoc).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("inputformat:" + inputFormattCls).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("outputformat:" + outputFormattCls).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("columns:" + ddlCols).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("partitioned:" + isPartitioned).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("partitionColumns:" + partitionCols).getBytes("UTF-8"));
outStream.write(terminator);
// output file system information
Path tblPath = tbl.getPath();
List<Path> locations = new ArrayList<Path>();
if (isPartitioned) {
if (par == null) {
for (Partition curPart : db.getPartitions(tbl)) {
if (curPart.getLocation() != null) {
locations.add(new Path(curPart.getLocation()));
}
}
} else {
if (par.getLocation() != null) {
locations.add(new Path(par.getLocation()));
}
}
} else {
if (tblPath != null) {
locations.add(tblPath);
}
}
if (!locations.isEmpty()) {
writeFileSystemStats(outStream, conf, locations, tblPath, false, 0);
}
outStream.write(terminator);
}
} catch (IOException e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.metadata.Table in project hive by apache.
the class DynamicPartitionPruningOptimization method generateSemiJoinOperatorPlan.
// Generates plan for min/max when dynamic partition pruning is ruled out.
private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContext parseContext, TableScanOperator ts, String keyBaseAlias) throws SemanticException {
// we will put a fork in the plan at the source of the reduce sink
Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
// we need the expr that generated the key of the reduce sink
ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex());
String internalColName = null;
ExprNodeDesc exprNodeDesc = key;
// Find the ExprNodeColumnDesc
while (!(exprNodeDesc instanceof ExprNodeColumnDesc) && (exprNodeDesc.getChildren() != null)) {
exprNodeDesc = exprNodeDesc.getChildren().get(0);
}
if (!(exprNodeDesc instanceof ExprNodeColumnDesc)) {
// Bail out
return false;
}
internalColName = ((ExprNodeColumnDesc) exprNodeDesc).getColumn();
if (parentOfRS instanceof SelectOperator) {
// Make sure the semijoin branch is not on partition column.
ExprNodeDesc expr = parentOfRS.getColumnExprMap().get(internalColName);
while (!(expr instanceof ExprNodeColumnDesc) && (expr.getChildren() != null)) {
expr = expr.getChildren().get(0);
}
if (!(expr instanceof ExprNodeColumnDesc)) {
// Bail out
return false;
}
ExprNodeColumnDesc colExpr = (ExprNodeColumnDesc) expr;
String colName = ExprNodeDescUtils.extractColName(colExpr);
// Fetch the TableScan Operator.
Operator<?> op = parentOfRS.getParentOperators().get(0);
while (op != null && !(op instanceof TableScanOperator)) {
op = op.getParentOperators().get(0);
}
assert op != null;
Table table = ((TableScanOperator) op).getConf().getTableMetadata();
if (table.isPartitionKey(colName)) {
// The column is partition column, skip the optimization.
return false;
}
}
List<ExprNodeDesc> keyExprs = new ArrayList<ExprNodeDesc>();
keyExprs.add(key);
// group by requires "ArrayList", don't ask.
ArrayList<String> outputNames = new ArrayList<String>();
outputNames.add(HiveConf.getColumnInternalName(0));
// project the relevant key column
SelectDesc select = new SelectDesc(keyExprs, outputNames);
// Create the new RowSchema for the projected column
ColumnInfo columnInfo = parentOfRS.getSchema().getColumnInfo(internalColName);
ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>();
signature.add(columnInfo);
RowSchema rowSchema = new RowSchema(signature);
// Create the column expr map
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ExprNodeDesc exprNode = null;
if (parentOfRS.getColumnExprMap() != null) {
exprNode = parentOfRS.getColumnExprMap().get(internalColName).clone();
} else {
exprNode = new ExprNodeColumnDesc(columnInfo);
}
if (exprNode instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc encd = (ExprNodeColumnDesc) exprNode;
encd.setColumn(internalColName);
}
colExprMap.put(internalColName, exprNode);
// Create the Select Operator
SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(select, rowSchema, colExprMap, parentOfRS);
// do a group by to aggregate min,max and bloom filter.
float groupByMemoryUsage = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
ArrayList<ExprNodeDesc> groupByExprs = new ArrayList<ExprNodeDesc>();
// Add min/max and bloom filter aggregations
List<ObjectInspector> aggFnOIs = new ArrayList<ObjectInspector>();
aggFnOIs.add(key.getWritableObjectInspector());
ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
params.add(new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), "", false));
ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
try {
AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
bloomFilterEval.setSourceOperator(selectOp);
bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
aggs.add(min);
aggs.add(max);
aggs.add(bloomFilter);
} catch (SemanticException e) {
LOG.error("Error creating min/max aggregations on key", e);
throw new IllegalStateException("Error creating min/max aggregations on key", e);
}
// Create the Group by Operator
ArrayList<String> gbOutputNames = new ArrayList<String>();
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(0));
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(1));
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(2));
GroupByDesc groupBy = new GroupByDesc(GroupByDesc.Mode.HASH, gbOutputNames, new ArrayList<ExprNodeDesc>(), aggs, false, groupByMemoryUsage, memoryThreshold, null, false, 0, false);
ArrayList<ColumnInfo> groupbyColInfos = new ArrayList<ColumnInfo>();
groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(0), key.getTypeInfo(), "", false));
groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(1), key.getTypeInfo(), "", false));
groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(2), key.getTypeInfo(), "", false));
GroupByOperator groupByOp = (GroupByOperator) OperatorFactory.getAndMakeChild(groupBy, new RowSchema(groupbyColInfos), selectOp);
groupByOp.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
// Get the column names of the aggregations for reduce sink
int colPos = 0;
ArrayList<ExprNodeDesc> rsValueCols = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < aggs.size() - 1; i++) {
ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(key.getTypeInfo(), gbOutputNames.get(colPos++), "", false);
rsValueCols.add(colExpr);
}
// Bloom Filter uses binary
ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(TypeInfoFactory.binaryTypeInfo, gbOutputNames.get(colPos++), "", false);
rsValueCols.add(colExpr);
// Create the reduce sink operator
ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(new ArrayList<ExprNodeDesc>(), rsValueCols, gbOutputNames, false, -1, 0, 1, Operation.NOT_ACID);
ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(groupByOp.getSchema()), groupByOp);
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
rsOp.setColumnExprMap(columnExprMap);
// Create the final Group By Operator
ArrayList<AggregationDesc> aggsFinal = new ArrayList<AggregationDesc>();
try {
List<ObjectInspector> minFinalFnOIs = new ArrayList<ObjectInspector>();
List<ObjectInspector> maxFinalFnOIs = new ArrayList<ObjectInspector>();
List<ObjectInspector> bloomFilterFinalFnOIs = new ArrayList<ObjectInspector>();
ArrayList<ExprNodeDesc> minFinalParams = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> maxFinalParams = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> bloomFilterFinalParams = new ArrayList<ExprNodeDesc>();
// Use the expressions from Reduce Sink.
minFinalFnOIs.add(rsValueCols.get(0).getWritableObjectInspector());
maxFinalFnOIs.add(rsValueCols.get(1).getWritableObjectInspector());
bloomFilterFinalFnOIs.add(rsValueCols.get(2).getWritableObjectInspector());
// Coming from a ReduceSink the aggregations would be in the form VALUE._col0, VALUE._col1
minFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(0).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(0), "", false));
maxFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(1).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(1), "", false));
bloomFilterFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(2).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(2), "", false));
AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", minFinalFnOIs, false, false), minFinalParams, false, Mode.FINAL);
AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", maxFinalFnOIs, false, false), maxFinalParams, false, Mode.FINAL);
AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", bloomFilterFinalFnOIs, false, false), bloomFilterFinalParams, false, Mode.FINAL);
GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
bloomFilterEval.setSourceOperator(selectOp);
bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
aggsFinal.add(min);
aggsFinal.add(max);
aggsFinal.add(bloomFilter);
} catch (SemanticException e) {
LOG.error("Error creating min/max aggregations on key", e);
throw new IllegalStateException("Error creating min/max aggregations on key", e);
}
GroupByDesc groupByDescFinal = new GroupByDesc(GroupByDesc.Mode.FINAL, gbOutputNames, new ArrayList<ExprNodeDesc>(), aggsFinal, false, groupByMemoryUsage, memoryThreshold, null, false, 0, false);
GroupByOperator groupByOpFinal = (GroupByOperator) OperatorFactory.getAndMakeChild(groupByDescFinal, new RowSchema(rsOp.getSchema()), rsOp);
groupByOpFinal.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
// for explain purpose
if (parseContext.getContext().getExplainConfig() != null && parseContext.getContext().getExplainConfig().isFormatted()) {
List<String> outputOperators = new ArrayList<>();
outputOperators.add(groupByOpFinal.getOperatorId());
rsOp.getConf().setOutputOperators(outputOperators);
}
// Create the final Reduce Sink Operator
ReduceSinkDesc rsDescFinal = PlanUtils.getReduceSinkDesc(new ArrayList<ExprNodeDesc>(), rsValueCols, gbOutputNames, false, -1, 0, 1, Operation.NOT_ACID);
ReduceSinkOperator rsOpFinal = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDescFinal, new RowSchema(groupByOpFinal.getSchema()), groupByOpFinal);
rsOpFinal.setColumnExprMap(columnExprMap);
LOG.debug("DynamicMinMaxPushdown: Saving RS to TS mapping: " + rsOpFinal + ": " + ts);
parseContext.getRsOpToTsOpMap().put(rsOpFinal, ts);
// for explain purpose
if (parseContext.getContext().getExplainConfig() != null && parseContext.getContext().getExplainConfig().isFormatted()) {
List<String> outputOperators = new ArrayList<>();
outputOperators.add(ts.getOperatorId());
rsOpFinal.getConf().setOutputOperators(outputOperators);
}
// Save the info that is required at query time to resolve dynamic/runtime values.
RuntimeValuesInfo runtimeValuesInfo = new RuntimeValuesInfo();
TableDesc rsFinalTableDesc = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(rsValueCols, "_col"));
List<String> dynamicValueIDs = new ArrayList<String>();
dynamicValueIDs.add(keyBaseAlias + "_min");
dynamicValueIDs.add(keyBaseAlias + "_max");
dynamicValueIDs.add(keyBaseAlias + "_bloom_filter");
runtimeValuesInfo.setTableDesc(rsFinalTableDesc);
runtimeValuesInfo.setDynamicValueIDs(dynamicValueIDs);
runtimeValuesInfo.setColExprs(rsValueCols);
parseContext.getRsToRuntimeValuesInfoMap().put(rsOpFinal, runtimeValuesInfo);
return true;
}
use of org.apache.hadoop.hive.ql.metadata.Table in project hive by apache.
the class SemanticAnalyzer method addDbAndTabToOutputs.
private void addDbAndTabToOutputs(String[] qualifiedTabName, TableType type) throws SemanticException {
Database database = getDatabase(qualifiedTabName[0]);
outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_SHARED));
Table t = new Table(qualifiedTabName[0], qualifiedTabName[1]);
t.setTableType(type);
outputs.add(new WriteEntity(t, WriteEntity.WriteType.DDL_NO_LOCK));
}
use of org.apache.hadoop.hive.ql.metadata.Table in project hive by apache.
the class SemanticAnalyzer method getTableObjectByName.
private Table getTableObjectByName(String tableName) throws HiveException {
if (!tabNameToTabObject.containsKey(tableName)) {
Table table = db.getTable(tableName);
tabNameToTabObject.put(tableName, table);
return table;
} else {
return tabNameToTabObject.get(tableName);
}
}
use of org.apache.hadoop.hive.ql.metadata.Table in project hive by apache.
the class SemanticAnalyzer method walkASTMarkTABREF.
private void walkASTMarkTABREF(ASTNode ast, Set<String> cteAlias) throws SemanticException {
Queue<Node> queue = new LinkedList<>();
queue.add(ast);
Map<HivePrivilegeObject, MaskAndFilterInfo> basicInfos = new LinkedHashMap<>();
while (!queue.isEmpty()) {
ASTNode astNode = (ASTNode) queue.poll();
if (astNode.getToken().getType() == HiveParser.TOK_TABREF) {
int aliasIndex = 0;
StringBuffer additionalTabInfo = new StringBuffer();
for (int index = 1; index < astNode.getChildCount(); index++) {
ASTNode ct = (ASTNode) astNode.getChild(index);
if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE || ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE || ct.getToken().getType() == HiveParser.TOK_TABLEPROPERTIES) {
additionalTabInfo.append(ctx.getTokenRewriteStream().toString(ct.getTokenStartIndex(), ct.getTokenStopIndex()));
} else {
aliasIndex = index;
}
}
ASTNode tableTree = (ASTNode) (astNode.getChild(0));
String tabIdName = getUnescapedName(tableTree);
String alias;
if (aliasIndex != 0) {
alias = unescapeIdentifier(astNode.getChild(aliasIndex).getText());
} else {
alias = getUnescapedUnqualifiedTableName(tableTree);
}
// select * from TAB2 [no masking]
if (cteAlias.contains(tabIdName)) {
continue;
}
String replacementText = null;
Table table = null;
try {
table = getTableObjectByName(tabIdName);
} catch (HiveException e) {
// Table may not be found when materialization of CTE is on.
LOG.info("Table " + tabIdName + " is not found in walkASTMarkTABREF.");
continue;
}
List<String> colNames = new ArrayList<>();
List<String> colTypes = new ArrayList<>();
for (FieldSchema col : table.getAllCols()) {
colNames.add(col.getName());
colTypes.add(col.getType());
}
basicInfos.put(new HivePrivilegeObject(table.getDbName(), table.getTableName(), colNames), new MaskAndFilterInfo(colTypes, additionalTabInfo.toString(), alias, astNode, table.isView()));
}
if (astNode.getChildCount() > 0 && !ignoredTokens.contains(astNode.getToken().getType())) {
for (Node child : astNode.getChildren()) {
queue.offer(child);
}
}
}
List<HivePrivilegeObject> basicPrivObjs = new ArrayList<>();
basicPrivObjs.addAll(basicInfos.keySet());
List<HivePrivilegeObject> needRewritePrivObjs = tableMask.applyRowFilterAndColumnMasking(basicPrivObjs);
if (needRewritePrivObjs != null && !needRewritePrivObjs.isEmpty()) {
for (HivePrivilegeObject privObj : needRewritePrivObjs) {
MaskAndFilterInfo info = basicInfos.get(privObj);
String replacementText = tableMask.create(privObj, info);
if (replacementText != null) {
// We don't support masking/filtering against ACID query at the moment
if (ctx.getIsUpdateDeleteMerge()) {
throw new SemanticException(ErrorMsg.MASKING_FILTERING_ON_ACID_NOT_SUPPORTED, privObj.getDbname(), privObj.getObjectName());
}
tableMask.setNeedsRewrite(true);
tableMask.addTranslation(info.astNode, replacementText);
}
}
}
}
Aggregations