use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class StatsUtils method getBasicStatForPartitions.
/**
* Get basic stats of partitions
* @param table
* - table
* @param parts
* - partitions
* @param statType
* - type of stats
* @return value of stats
*/
public static List<Long> getBasicStatForPartitions(Table table, List<Partition> parts, String statType) {
List<Long> stats = Lists.newArrayList();
for (Partition part : parts) {
Map<String, String> params = part.getParameters();
long result = 0;
if (params != null) {
try {
result = Long.parseLong(params.get(statType));
} catch (NumberFormatException e) {
result = 0;
}
stats.add(result);
}
}
return stats;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class SemanticAnalyzer method doPhase1.
/**
* Phase 1: (including, but not limited to):
*
* 1. Gets all the aliases for all the tables / subqueries and makes the
* appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
* destination and names the clause "inclause" + i 3. Creates a map from a
* string representation of an aggregation tree to the actual aggregation AST
* 4. Creates a mapping from the clause name to the select expression AST in
* destToSelExpr 5. Creates a mapping from a table alias to the lateral view
* AST's in aliasToLateralViews
*
* @param ast
* @param qb
* @param ctx_1
* @throws SemanticException
*/
@SuppressWarnings({ "fallthrough", "nls" })
public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) throws SemanticException {
boolean phase1Result = true;
QBParseInfo qbp = qb.getParseInfo();
boolean skipRecursion = false;
if (ast.getToken() != null) {
skipRecursion = true;
switch(ast.getToken().getType()) {
case HiveParser.TOK_SELECTDI:
qb.countSelDi();
// fall through
case HiveParser.TOK_SELECT:
qb.countSel();
qbp.setSelExprForClause(ctx_1.dest, ast);
int posn = 0;
if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.QUERY_HINT) {
ParseDriver pd = new ParseDriver();
String queryHintStr = ast.getChild(0).getText();
if (LOG.isDebugEnabled()) {
LOG.debug("QUERY HINT: " + queryHintStr);
}
try {
ASTNode hintNode = pd.parseHint(queryHintStr);
qbp.setHints((ASTNode) hintNode);
posn++;
} catch (ParseException e) {
throw new SemanticException("failed to parse query hint: " + e.getMessage(), e);
}
}
if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM))
queryProperties.setUsesScript(true);
LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest);
doPhase1GetColumnAliasesFromSelect(ast, qbp);
qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
qbp.setDistinctFuncExprsForClause(ctx_1.dest, doPhase1GetDistinctFuncExprs(aggregations));
break;
case HiveParser.TOK_WHERE:
qbp.setWhrExprForClause(ctx_1.dest, ast);
if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty())
queryProperties.setFilterWithSubQuery(true);
break;
case HiveParser.TOK_INSERT_INTO:
String currentDatabase = SessionState.get().getCurrentDatabase();
String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
qbp.addInsertIntoTable(tab_name, ast);
case HiveParser.TOK_DESTINATION:
ctx_1.dest = this.ctx.getDestNamePrefix(ast).toString() + ctx_1.nextNum;
ctx_1.nextNum++;
boolean isTmpFileDest = false;
if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {
ASTNode ch = (ASTNode) ast.getChild(0);
if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0 && ch.getChild(0) instanceof ASTNode) {
ch = (ASTNode) ch.getChild(0);
isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE;
} else {
if (ast.getToken().getType() == HiveParser.TOK_DESTINATION && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), SessionState.get().getCurrentDatabase());
qbp.getInsertOverwriteTables().put(fullTableName, ast);
}
}
}
// is there a insert in the subquery
if (qbp.getIsSubQ() && !isTmpFileDest) {
throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
}
qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
handleInsertStatementSpecPhase1(ast, qbp, ctx_1);
if (qbp.getClauseNamesForDest().size() == 2) {
// From the moment that we have two destination clauses,
// we know that this is a multi-insert query.
// Thus, set property to right value.
// Using qbp.getClauseNamesForDest().size() >= 2 would be
// equivalent, but we use == to avoid setting the property
// multiple times
queryProperties.setMultiDestQuery(true);
}
if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) {
plannerCtx.setInsertToken(ast, isTmpFileDest);
} else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) {
// For multi-insert query, currently we only optimize the FROM clause.
// Hence, introduce multi-insert token on top of it.
// However, first we need to reset existing token (insert).
// Using qbp.getClauseNamesForDest().size() >= 2 would be
// equivalent, but we use == to avoid setting the property
// multiple times
plannerCtx.resetToken();
plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0));
}
break;
case HiveParser.TOK_FROM:
int child_count = ast.getChildCount();
if (child_count != 1) {
throw new SemanticException(generateErrorMessage(ast, "Multiple Children " + child_count));
}
if (!qbp.getIsSubQ()) {
qbp.setQueryFromExpr(ast);
}
// Check if this is a subquery / lateral view
ASTNode frm = (ASTNode) ast.getChild(0);
if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
processTable(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) {
// Create a temp table with the passed values in it then rewrite this portion of the
// tree to be from that table.
ASTNode newFrom = genValuesTempTable(frm, qb);
ast.setChild(0, newFrom);
processTable(qb, newFrom);
} else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
processSubQuery(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
queryProperties.setHasLateralViews(true);
processLateralView(qb, frm);
} else if (isJoinToken(frm)) {
processJoin(qb, frm);
qbp.setJoinExpr(frm);
} else if (frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
queryProperties.setHasPTF(true);
processPTF(qb, frm);
}
break;
case HiveParser.TOK_CLUSTERBY:
// Get the clusterby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasClusterBy(true);
qbp.setClusterByExprForClause(ctx_1.dest, ast);
break;
case HiveParser.TOK_DISTRIBUTEBY:
// Get the distribute by aliases - these are aliased to the entries in
// the
// select list
queryProperties.setHasDistributeBy(true);
qbp.setDistributeByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_SORTBY:
// Get the sort by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasSortBy(true);
qbp.setSortByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_ORDERBY:
// Get the order by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasOrderBy(true);
qbp.setOrderByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_GROUPBY:
case HiveParser.TOK_ROLLUP_GROUPBY:
case HiveParser.TOK_CUBE_GROUPBY:
case HiveParser.TOK_GROUPING_SETS:
// Get the groupby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasGroupBy(true);
if (qbp.getJoinExpr() != null) {
queryProperties.setHasJoinFollowedByGroupBy(true);
}
if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
}
qbp.setGroupByExprForClause(ctx_1.dest, ast);
skipRecursion = true;
// Rollup and Cubes are syntactic sugar on top of grouping sets
if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
qbp.getDestRollups().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
qbp.getDestCubes().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
qbp.getDestGroupingSets().add(ctx_1.dest);
}
break;
case HiveParser.TOK_HAVING:
qbp.setHavingExprForClause(ctx_1.dest, ast);
qbp.addAggregationExprsForClause(ctx_1.dest, doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
break;
case HiveParser.KW_WINDOW:
if (!qb.hasWindowingSpec(ctx_1.dest)) {
throw new SemanticException(generateErrorMessage(ast, "Query has no Cluster/Distribute By; but has a Window definition"));
}
handleQueryWindowClauses(qb, ctx_1, ast);
break;
case HiveParser.TOK_LIMIT:
if (ast.getChildCount() == 2) {
qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText()), new Integer(ast.getChild(1).getText()));
} else {
qbp.setDestLimit(ctx_1.dest, new Integer(0), new Integer(ast.getChild(0).getText()));
}
break;
case HiveParser.TOK_ANALYZE:
// Case of analyze command
String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase();
qb.setTabAlias(table_name, table_name);
qb.addAlias(table_name);
qb.getParseInfo().setIsAnalyzeCommand(true);
qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan);
// Allow analyze the whole table and dynamic partitions
HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
break;
case HiveParser.TOK_UNIONALL:
if (!qbp.getIsSubQ()) {
// contained in a subquery. Just in case, we keep the error as a fallback.
throw new SemanticException(generateErrorMessage(ast, ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
}
skipRecursion = false;
break;
case HiveParser.TOK_INSERT:
ASTNode destination = (ASTNode) ast.getChild(0);
Tree tab = destination.getChild(0);
// Proceed if AST contains partition & If Not Exists
if (destination.getChildCount() == 2 && tab.getChildCount() == 2 && destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
String tableName = tab.getChild(0).getChild(0).getText();
Tree partitions = tab.getChild(1);
int childCount = partitions.getChildCount();
HashMap<String, String> partition = new HashMap<String, String>();
for (int i = 0; i < childCount; i++) {
String partitionName = partitions.getChild(i).getChild(0).getText();
Tree pvalue = partitions.getChild(i).getChild(1);
if (pvalue == null) {
break;
}
String partitionVal = stripQuotes(pvalue.getText());
partition.put(partitionName, partitionVal);
}
// if it is a dynamic partition throw the exception
if (childCount != partition.size()) {
throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS.getMsg(partition.toString()));
}
Table table = null;
try {
table = this.getTableObjectByName(tableName);
} catch (HiveException ex) {
throw new SemanticException(ex);
}
try {
Partition parMetaData = db.getPartition(table, partition, false);
// Check partition exists if it exists skip the overwrite
if (parMetaData != null) {
phase1Result = false;
skipRecursion = true;
LOG.info("Partition already exists so insert into overwrite " + "skipped for partition : " + parMetaData.toString());
break;
}
} catch (HiveException e) {
LOG.info("Error while getting metadata : ", e);
}
validatePartSpec(table, partition, (ASTNode) tab, conf, false);
}
skipRecursion = false;
break;
case HiveParser.TOK_LATERAL_VIEW:
case HiveParser.TOK_LATERAL_VIEW_OUTER:
// todo: nested LV
assert ast.getChildCount() == 1;
qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
break;
case HiveParser.TOK_CTE:
processCTE(qb, ast);
break;
default:
skipRecursion = false;
break;
}
}
if (!skipRecursion) {
// Iterate over the rest of the children
int child_count = ast.getChildCount();
for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
// Recurse
phase1Result = phase1Result && doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1, plannerCtx);
}
}
return phase1Result;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project incubator-atlas by apache.
the class HiveMetaStoreBridgeTest method testImportWhenPartitionKeysAreNull.
@Test
public void testImportWhenPartitionKeysAreNull() throws Exception {
setupDB(hiveClient, TEST_DB_NAME);
List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME);
Table hiveTable = hiveTables.get(0);
returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
when(atlasClient.getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, TEST_DB_NAME, TEST_TABLE_NAME))).thenReturn(getEntityReference(HiveDataTypes.HIVE_TABLE.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(CLUSTER_NAME, hiveTable);
when(atlasClient.getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQualifiedName)).thenReturn(getEntityReference(HiveDataTypes.HIVE_PROCESS.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
when(atlasClient.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")).thenReturn(createTableReference());
Partition partition = mock(Partition.class);
when(partition.getTable()).thenReturn(hiveTable);
List partitionValues = Arrays.asList(new String[] {});
when(partition.getValues()).thenReturn(partitionValues);
when(hiveClient.getPartitions(hiveTable)).thenReturn(Arrays.asList(new Partition[] { partition }));
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
try {
bridge.importHiveMetadata(true);
} catch (Exception e) {
Assert.fail("Partition with null key caused import to fail with exception ", e);
}
}
Aggregations