use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class DDLTask method dropPartitions.
private void dropPartitions(Hive db, Table tbl, DropTableDesc dropTbl) throws HiveException {
ReplicationSpec replicationSpec = dropTbl.getReplicationSpec();
if (replicationSpec.isInReplicationScope()) {
// parameter key values.
for (DropTableDesc.PartSpec partSpec : dropTbl.getPartSpecs()) {
List<Partition> partitions = new ArrayList<>();
try {
db.getPartitionsByExpr(tbl, partSpec.getPartSpec(), conf, partitions);
for (Partition p : Iterables.filter(partitions, replicationSpec.allowEventReplacementInto())) {
db.dropPartition(tbl.getDbName(), tbl.getTableName(), p.getValues(), true);
}
} catch (NoSuchObjectException e) {
// ignore NSOE because that means there's nothing to drop.
} catch (Exception e) {
throw new HiveException(e.getMessage(), e);
}
}
return;
}
// ifExists is currently verified in DDLSemanticAnalyzer
List<Partition> droppedParts = db.dropPartitions(dropTbl.getTableName(), dropTbl.getPartSpecs(), PartitionDropOptions.instance().deleteData(true).ifExists(true).purgeData(dropTbl.getIfPurge()));
for (Partition partition : droppedParts) {
console.printInfo("Dropped the partition " + partition.getName());
// We have already locked the table, don't lock the partitions.
addIfAbsentByName(new WriteEntity(partition, WriteEntity.WriteType.DDL_NO_LOCK));
}
;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class DDLTask method renamePartition.
/**
* Rename a partition in a table
*
* @param db
* Database to rename the partition.
* @param renamePartitionDesc
* rename old Partition to new one.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
*/
private int renamePartition(Hive db, RenamePartitionDesc renamePartitionDesc) throws HiveException {
String tableName = renamePartitionDesc.getTableName();
LinkedHashMap<String, String> oldPartSpec = renamePartitionDesc.getOldPartSpec();
if (!allowOperationInReplicationScope(db, tableName, oldPartSpec, renamePartitionDesc.getReplicationSpec())) {
// or the existing table is newer than our update.
if (LOG.isDebugEnabled()) {
LOG.debug("DDLTask: Rename Partition is skipped as table {} / partition {} is newer than update", tableName, FileUtils.makePartName(new ArrayList<>(oldPartSpec.keySet()), new ArrayList<>(oldPartSpec.values())));
}
return 0;
}
String[] names = Utilities.getDbTableName(tableName);
if (Utils.isBootstrapDumpInProgress(db, names[0])) {
LOG.error("DDLTask: Rename Partition not allowed as bootstrap dump in progress");
throw new HiveException("Rename Partition: Not allowed as bootstrap dump in progress");
}
Table tbl = db.getTable(tableName);
Partition oldPart = db.getPartition(tbl, oldPartSpec, false);
if (oldPart == null) {
String partName = FileUtils.makePartName(new ArrayList<String>(oldPartSpec.keySet()), new ArrayList<String>(oldPartSpec.values()));
throw new HiveException("Rename partition: source partition [" + partName + "] does not exist.");
}
Partition part = db.getPartition(tbl, oldPartSpec, false);
part.setValues(renamePartitionDesc.getNewPartSpec());
db.renamePartition(tbl, oldPartSpec, part);
Partition newPart = db.getPartition(tbl, renamePartitionDesc.getNewPartSpec(), false);
work.getInputs().add(new ReadEntity(oldPart));
// We've already obtained a lock on the table, don't lock the partition too
addIfAbsentByName(new WriteEntity(newPart, WriteEntity.WriteType.DDL_NO_LOCK));
return 0;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class SemanticAnalyzer method doPhase1.
/**
* Phase 1: (including, but not limited to):
*
* 1. Gets all the aliases for all the tables / subqueries and makes the
* appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
* destination and names the clause "inclause" + i 3. Creates a map from a
* string representation of an aggregation tree to the actual aggregation AST
* 4. Creates a mapping from the clause name to the select expression AST in
* destToSelExpr 5. Creates a mapping from a table alias to the lateral view
* AST's in aliasToLateralViews
*
* @param ast
* @param qb
* @param ctx_1
* @throws SemanticException
*/
@SuppressWarnings({ "fallthrough", "nls" })
public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) throws SemanticException {
boolean phase1Result = true;
QBParseInfo qbp = qb.getParseInfo();
boolean skipRecursion = false;
if (ast.getToken() != null) {
skipRecursion = true;
switch(ast.getToken().getType()) {
case HiveParser.TOK_SELECTDI:
qb.countSelDi();
// fall through
case HiveParser.TOK_SELECT:
qb.countSel();
qbp.setSelExprForClause(ctx_1.dest, ast);
int posn = 0;
if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.QUERY_HINT) {
ParseDriver pd = new ParseDriver();
String queryHintStr = ast.getChild(0).getText();
if (LOG.isDebugEnabled()) {
LOG.debug("QUERY HINT: " + queryHintStr);
}
try {
ASTNode hintNode = pd.parseHint(queryHintStr);
qbp.setHints(hintNode);
posn++;
} catch (ParseException e) {
throw new SemanticException("failed to parse query hint: " + e.getMessage(), e);
}
}
if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM)) {
queryProperties.setUsesScript(true);
}
LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest);
doPhase1GetColumnAliasesFromSelect(ast, qbp);
qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
qbp.setDistinctFuncExprsForClause(ctx_1.dest, doPhase1GetDistinctFuncExprs(aggregations));
break;
case HiveParser.TOK_WHERE:
qbp.setWhrExprForClause(ctx_1.dest, ast);
if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty()) {
queryProperties.setFilterWithSubQuery(true);
}
break;
case HiveParser.TOK_INSERT_INTO:
String currentDatabase = SessionState.get().getCurrentDatabase();
String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
qbp.addInsertIntoTable(tab_name, ast);
case HiveParser.TOK_DESTINATION:
ctx_1.dest = this.ctx.getDestNamePrefix(ast, qb).toString() + ctx_1.nextNum;
ctx_1.nextNum++;
boolean isTmpFileDest = false;
if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {
ASTNode ch = (ASTNode) ast.getChild(0);
if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0 && ch.getChild(0) instanceof ASTNode) {
ch = (ASTNode) ch.getChild(0);
isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE;
} else {
if (ast.getToken().getType() == HiveParser.TOK_DESTINATION && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), SessionState.get().getCurrentDatabase());
qbp.getInsertOverwriteTables().put(fullTableName.toLowerCase(), ast);
qbp.setDestToOpType(ctx_1.dest, true);
}
}
}
// is there a insert in the subquery
if (qbp.getIsSubQ() && !isTmpFileDest) {
throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
}
qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
handleInsertStatementSpecPhase1(ast, qbp, ctx_1);
if (qbp.getClauseNamesForDest().size() == 2) {
// From the moment that we have two destination clauses,
// we know that this is a multi-insert query.
// Thus, set property to right value.
// Using qbp.getClauseNamesForDest().size() >= 2 would be
// equivalent, but we use == to avoid setting the property
// multiple times
queryProperties.setMultiDestQuery(true);
}
if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) {
plannerCtx.setInsertToken(ast, isTmpFileDest);
} else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) {
// For multi-insert query, currently we only optimize the FROM clause.
// Hence, introduce multi-insert token on top of it.
// However, first we need to reset existing token (insert).
// Using qbp.getClauseNamesForDest().size() >= 2 would be
// equivalent, but we use == to avoid setting the property
// multiple times
plannerCtx.resetToken();
plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0));
}
break;
case HiveParser.TOK_FROM:
int child_count = ast.getChildCount();
if (child_count != 1) {
throw new SemanticException(generateErrorMessage(ast, "Multiple Children " + child_count));
}
if (!qbp.getIsSubQ()) {
qbp.setQueryFromExpr(ast);
}
// Check if this is a subquery / lateral view
ASTNode frm = (ASTNode) ast.getChild(0);
if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
processTable(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
processSubQuery(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
queryProperties.setHasLateralViews(true);
processLateralView(qb, frm);
} else if (isJoinToken(frm)) {
processJoin(qb, frm);
qbp.setJoinExpr(frm);
} else if (frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
queryProperties.setHasPTF(true);
processPTF(qb, frm);
}
break;
case HiveParser.TOK_CLUSTERBY:
// Get the clusterby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasClusterBy(true);
qbp.setClusterByExprForClause(ctx_1.dest, ast);
break;
case HiveParser.TOK_DISTRIBUTEBY:
// Get the distribute by aliases - these are aliased to the entries in
// the
// select list
queryProperties.setHasDistributeBy(true);
qbp.setDistributeByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_SORTBY:
// Get the sort by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasSortBy(true);
qbp.setSortByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_ORDERBY:
// Get the order by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasOrderBy(true);
qbp.setOrderByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
}
// If there are aggregations in order by, we need to remember them in qb.
qbp.addAggregationExprsForClause(ctx_1.dest, doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
break;
case HiveParser.TOK_GROUPBY:
case HiveParser.TOK_ROLLUP_GROUPBY:
case HiveParser.TOK_CUBE_GROUPBY:
case HiveParser.TOK_GROUPING_SETS:
// Get the groupby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasGroupBy(true);
if (qbp.getJoinExpr() != null) {
queryProperties.setHasJoinFollowedByGroupBy(true);
}
if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
}
qbp.setGroupByExprForClause(ctx_1.dest, ast);
skipRecursion = true;
// Rollup and Cubes are syntactic sugar on top of grouping sets
if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
qbp.getDestRollups().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
qbp.getDestCubes().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
qbp.getDestGroupingSets().add(ctx_1.dest);
}
break;
case HiveParser.TOK_HAVING:
qbp.setHavingExprForClause(ctx_1.dest, ast);
qbp.addAggregationExprsForClause(ctx_1.dest, doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
break;
case HiveParser.KW_WINDOW:
if (!qb.hasWindowingSpec(ctx_1.dest)) {
throw new SemanticException(generateErrorMessage(ast, "Query has no Cluster/Distribute By; but has a Window definition"));
}
handleQueryWindowClauses(qb, ctx_1, ast);
break;
case HiveParser.TOK_LIMIT:
if (ast.getChildCount() == 2) {
qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText()), new Integer(ast.getChild(1).getText()));
} else {
qbp.setDestLimit(ctx_1.dest, new Integer(0), new Integer(ast.getChild(0).getText()));
}
break;
case HiveParser.TOK_ANALYZE:
// Case of analyze command
String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase();
qb.setTabAlias(table_name, table_name);
qb.addAlias(table_name);
qb.getParseInfo().setIsAnalyzeCommand(true);
qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
// Allow analyze the whole table and dynamic partitions
HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
break;
case HiveParser.TOK_UNIONALL:
if (!qbp.getIsSubQ()) {
// contained in a subquery. Just in case, we keep the error as a fallback.
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
}
skipRecursion = false;
break;
case HiveParser.TOK_INSERT:
ASTNode destination = (ASTNode) ast.getChild(0);
Tree tab = destination.getChild(0);
// Proceed if AST contains partition & If Not Exists
if (destination.getChildCount() == 2 && tab.getChildCount() == 2 && destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
String tableName = tab.getChild(0).getChild(0).getText();
Tree partitions = tab.getChild(1);
int childCount = partitions.getChildCount();
HashMap<String, String> partition = new HashMap<String, String>();
for (int i = 0; i < childCount; i++) {
String partitionName = partitions.getChild(i).getChild(0).getText();
// Convert to lowercase for the comparison
partitionName = partitionName.toLowerCase();
Tree pvalue = partitions.getChild(i).getChild(1);
if (pvalue == null) {
break;
}
String partitionVal = stripQuotes(pvalue.getText());
partition.put(partitionName, partitionVal);
}
// if it is a dynamic partition throw the exception
if (childCount != partition.size()) {
throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS.getMsg(partition.toString()));
}
Table table = null;
try {
table = this.getTableObjectByName(tableName);
} catch (HiveException ex) {
throw new SemanticException(ex);
}
try {
Partition parMetaData = db.getPartition(table, partition, false);
// Check partition exists if it exists skip the overwrite
if (parMetaData != null) {
phase1Result = false;
skipRecursion = true;
LOG.info("Partition already exists so insert into overwrite " + "skipped for partition : " + parMetaData.toString());
break;
}
} catch (HiveException e) {
LOG.info("Error while getting metadata : ", e);
}
validatePartSpec(table, partition, (ASTNode) tab, conf, false);
}
skipRecursion = false;
break;
case HiveParser.TOK_LATERAL_VIEW:
case HiveParser.TOK_LATERAL_VIEW_OUTER:
// todo: nested LV
assert ast.getChildCount() == 1;
qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
break;
case HiveParser.TOK_CTE:
processCTE(qb, ast);
break;
default:
skipRecursion = false;
break;
}
}
if (!skipRecursion) {
// Iterate over the rest of the children
int child_count = ast.getChildCount();
for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
// Recurse
phase1Result = phase1Result && doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1, plannerCtx);
}
}
return phase1Result;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class PartitionPruner method getPartitionsFromServer.
private static PrunedPartitionList getPartitionsFromServer(Table tab, final String key, final ExprNodeGenericFuncDesc compactExpr, HiveConf conf, String alias, Set<String> partColsUsedInFilter, boolean isPruningByExactFilter) throws SemanticException {
try {
// Finally, check the filter for non-built-in UDFs. If these are present, we cannot
// do filtering on the server, and have to fall back to client path.
boolean doEvalClientSide = hasUserFunctions(compactExpr);
// Now filter.
List<Partition> partitions = new ArrayList<Partition>();
boolean hasUnknownPartitions = false;
PerfLogger perfLogger = SessionState.getPerfLogger();
if (!doEvalClientSide) {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
try {
hasUnknownPartitions = Hive.get().getPartitionsByExpr(tab, compactExpr, conf, partitions);
} catch (IMetaStoreClient.IncompatibleMetastoreException ime) {
// TODO: backward compat for Hive <= 0.12. Can be removed later.
LOG.warn("Metastore doesn't support getPartitionsByExpr", ime);
doEvalClientSide = true;
} finally {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
}
}
if (doEvalClientSide) {
// Either we have user functions, or metastore is old version - filter names locally.
hasUnknownPartitions = pruneBySequentialScan(tab, partitions, compactExpr, conf);
}
// metastore and so some partitions may have no data based on other filters.
return new PrunedPartitionList(tab, key, new LinkedHashSet<Partition>(partitions), new ArrayList<String>(partColsUsedInFilter), hasUnknownPartitions || !isPruningByExactFilter);
} catch (SemanticException e) {
throw e;
} catch (Exception e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class TaskCompiler method genTableStats.
private Task<?> genTableStats(ParseContext parseContext, TableScanOperator tableScan, Task currentTask, final HashSet<WriteEntity> outputs) throws HiveException {
Class<? extends InputFormat> inputFormat = tableScan.getConf().getTableMetadata().getInputFormatClass();
Table table = tableScan.getConf().getTableMetadata();
List<Partition> partitions = new ArrayList<>();
if (table.isPartitioned()) {
partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions());
for (Partition partn : partitions) {
LOG.trace("adding part: " + partn);
outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK));
}
}
TableSpec tableSpec = new TableSpec(table, partitions);
tableScan.getConf().getTableMetadata().setTableSpec(tableSpec);
if (inputFormat.equals(OrcInputFormat.class)) {
// For ORC, there is no Tez Job for table stats.
StatsWork columnStatsWork = new StatsWork(table, parseContext.getConf());
columnStatsWork.setFooterScan();
// If partition is specified, get pruned partition list
if (partitions.size() > 0) {
columnStatsWork.addInputPartitions(parseContext.getPrunedPartitions(tableScan).getPartitions());
}
return TaskFactory.get(columnStatsWork);
} else {
BasicStatsWork statsWork = new BasicStatsWork(tableScan.getConf().getTableMetadata().getTableSpec());
StatsWork columnStatsWork = new StatsWork(table, statsWork, parseContext.getConf());
columnStatsWork.collectStatsFromAggregator(tableScan.getConf());
columnStatsWork.setSourceTask(currentTask);
return TaskFactory.get(columnStatsWork);
}
}
Aggregations