Search in sources :

Example 81 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class StatsUtils method getBasicStatForPartitions.

/**
   * Get basic stats of partitions
   * @param table
   *          - table
   * @param parts
   *          - partitions
   * @param statType
   *          - type of stats
   * @return value of stats
   */
public static List<Long> getBasicStatForPartitions(Table table, List<Partition> parts, String statType) {
    List<Long> stats = Lists.newArrayList();
    for (Partition part : parts) {
        Map<String, String> params = part.getParameters();
        long result = 0;
        if (params != null) {
            try {
                result = Long.parseLong(params.get(statType));
            } catch (NumberFormatException e) {
                result = 0;
            }
            stats.add(result);
        }
    }
    return stats;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition)

Example 82 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class SemanticAnalyzer method doPhase1.

/**
   * Phase 1: (including, but not limited to):
   *
   * 1. Gets all the aliases for all the tables / subqueries and makes the
   * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
   * destination and names the clause "inclause" + i 3. Creates a map from a
   * string representation of an aggregation tree to the actual aggregation AST
   * 4. Creates a mapping from the clause name to the select expression AST in
   * destToSelExpr 5. Creates a mapping from a table alias to the lateral view
   * AST's in aliasToLateralViews
   *
   * @param ast
   * @param qb
   * @param ctx_1
   * @throws SemanticException
   */
@SuppressWarnings({ "fallthrough", "nls" })
public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) throws SemanticException {
    boolean phase1Result = true;
    QBParseInfo qbp = qb.getParseInfo();
    boolean skipRecursion = false;
    if (ast.getToken() != null) {
        skipRecursion = true;
        switch(ast.getToken().getType()) {
            case HiveParser.TOK_SELECTDI:
                qb.countSelDi();
            // fall through
            case HiveParser.TOK_SELECT:
                qb.countSel();
                qbp.setSelExprForClause(ctx_1.dest, ast);
                int posn = 0;
                if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.QUERY_HINT) {
                    ParseDriver pd = new ParseDriver();
                    String queryHintStr = ast.getChild(0).getText();
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("QUERY HINT: " + queryHintStr);
                    }
                    try {
                        ASTNode hintNode = pd.parseHint(queryHintStr);
                        qbp.setHints((ASTNode) hintNode);
                        posn++;
                    } catch (ParseException e) {
                        throw new SemanticException("failed to parse query hint: " + e.getMessage(), e);
                    }
                }
                if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM))
                    queryProperties.setUsesScript(true);
                LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest);
                doPhase1GetColumnAliasesFromSelect(ast, qbp);
                qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
                qbp.setDistinctFuncExprsForClause(ctx_1.dest, doPhase1GetDistinctFuncExprs(aggregations));
                break;
            case HiveParser.TOK_WHERE:
                qbp.setWhrExprForClause(ctx_1.dest, ast);
                if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty())
                    queryProperties.setFilterWithSubQuery(true);
                break;
            case HiveParser.TOK_INSERT_INTO:
                String currentDatabase = SessionState.get().getCurrentDatabase();
                String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
                qbp.addInsertIntoTable(tab_name, ast);
            case HiveParser.TOK_DESTINATION:
                ctx_1.dest = this.ctx.getDestNamePrefix(ast).toString() + ctx_1.nextNum;
                ctx_1.nextNum++;
                boolean isTmpFileDest = false;
                if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {
                    ASTNode ch = (ASTNode) ast.getChild(0);
                    if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0 && ch.getChild(0) instanceof ASTNode) {
                        ch = (ASTNode) ch.getChild(0);
                        isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE;
                    } else {
                        if (ast.getToken().getType() == HiveParser.TOK_DESTINATION && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
                            String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), SessionState.get().getCurrentDatabase());
                            qbp.getInsertOverwriteTables().put(fullTableName, ast);
                        }
                    }
                }
                // is there a insert in the subquery
                if (qbp.getIsSubQ() && !isTmpFileDest) {
                    throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
                }
                qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
                handleInsertStatementSpecPhase1(ast, qbp, ctx_1);
                if (qbp.getClauseNamesForDest().size() == 2) {
                    // From the moment that we have two destination clauses,
                    // we know that this is a multi-insert query.
                    // Thus, set property to right value.
                    // Using qbp.getClauseNamesForDest().size() >= 2 would be
                    // equivalent, but we use == to avoid setting the property
                    // multiple times
                    queryProperties.setMultiDestQuery(true);
                }
                if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) {
                    plannerCtx.setInsertToken(ast, isTmpFileDest);
                } else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) {
                    // For multi-insert query, currently we only optimize the FROM clause.
                    // Hence, introduce multi-insert token on top of it.
                    // However, first we need to reset existing token (insert).
                    // Using qbp.getClauseNamesForDest().size() >= 2 would be
                    // equivalent, but we use == to avoid setting the property
                    // multiple times
                    plannerCtx.resetToken();
                    plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0));
                }
                break;
            case HiveParser.TOK_FROM:
                int child_count = ast.getChildCount();
                if (child_count != 1) {
                    throw new SemanticException(generateErrorMessage(ast, "Multiple Children " + child_count));
                }
                if (!qbp.getIsSubQ()) {
                    qbp.setQueryFromExpr(ast);
                }
                // Check if this is a subquery / lateral view
                ASTNode frm = (ASTNode) ast.getChild(0);
                if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
                    processTable(qb, frm);
                } else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) {
                    // Create a temp table with the passed values in it then rewrite this portion of the
                    // tree to be from that table.
                    ASTNode newFrom = genValuesTempTable(frm, qb);
                    ast.setChild(0, newFrom);
                    processTable(qb, newFrom);
                } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
                    processSubQuery(qb, frm);
                } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
                    queryProperties.setHasLateralViews(true);
                    processLateralView(qb, frm);
                } else if (isJoinToken(frm)) {
                    processJoin(qb, frm);
                    qbp.setJoinExpr(frm);
                } else if (frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
                    queryProperties.setHasPTF(true);
                    processPTF(qb, frm);
                }
                break;
            case HiveParser.TOK_CLUSTERBY:
                // Get the clusterby aliases - these are aliased to the entries in the
                // select list
                queryProperties.setHasClusterBy(true);
                qbp.setClusterByExprForClause(ctx_1.dest, ast);
                break;
            case HiveParser.TOK_DISTRIBUTEBY:
                // Get the distribute by aliases - these are aliased to the entries in
                // the
                // select list
                queryProperties.setHasDistributeBy(true);
                qbp.setDistributeByExprForClause(ctx_1.dest, ast);
                if (qbp.getClusterByForClause(ctx_1.dest) != null) {
                    throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
                } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
                    throw new SemanticException(generateErrorMessage(ast, ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
                }
                break;
            case HiveParser.TOK_SORTBY:
                // Get the sort by aliases - these are aliased to the entries in the
                // select list
                queryProperties.setHasSortBy(true);
                qbp.setSortByExprForClause(ctx_1.dest, ast);
                if (qbp.getClusterByForClause(ctx_1.dest) != null) {
                    throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
                } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
                    throw new SemanticException(generateErrorMessage(ast, ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
                }
                break;
            case HiveParser.TOK_ORDERBY:
                // Get the order by aliases - these are aliased to the entries in the
                // select list
                queryProperties.setHasOrderBy(true);
                qbp.setOrderByExprForClause(ctx_1.dest, ast);
                if (qbp.getClusterByForClause(ctx_1.dest) != null) {
                    throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
                }
                break;
            case HiveParser.TOK_GROUPBY:
            case HiveParser.TOK_ROLLUP_GROUPBY:
            case HiveParser.TOK_CUBE_GROUPBY:
            case HiveParser.TOK_GROUPING_SETS:
                // Get the groupby aliases - these are aliased to the entries in the
                // select list
                queryProperties.setHasGroupBy(true);
                if (qbp.getJoinExpr() != null) {
                    queryProperties.setHasJoinFollowedByGroupBy(true);
                }
                if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
                    throw new SemanticException(generateErrorMessage(ast, ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
                }
                qbp.setGroupByExprForClause(ctx_1.dest, ast);
                skipRecursion = true;
                // Rollup and Cubes are syntactic sugar on top of grouping sets
                if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
                    qbp.getDestRollups().add(ctx_1.dest);
                } else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
                    qbp.getDestCubes().add(ctx_1.dest);
                } else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
                    qbp.getDestGroupingSets().add(ctx_1.dest);
                }
                break;
            case HiveParser.TOK_HAVING:
                qbp.setHavingExprForClause(ctx_1.dest, ast);
                qbp.addAggregationExprsForClause(ctx_1.dest, doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
                break;
            case HiveParser.KW_WINDOW:
                if (!qb.hasWindowingSpec(ctx_1.dest)) {
                    throw new SemanticException(generateErrorMessage(ast, "Query has no Cluster/Distribute By; but has a Window definition"));
                }
                handleQueryWindowClauses(qb, ctx_1, ast);
                break;
            case HiveParser.TOK_LIMIT:
                if (ast.getChildCount() == 2) {
                    qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText()), new Integer(ast.getChild(1).getText()));
                } else {
                    qbp.setDestLimit(ctx_1.dest, new Integer(0), new Integer(ast.getChild(0).getText()));
                }
                break;
            case HiveParser.TOK_ANALYZE:
                // Case of analyze command
                String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase();
                qb.setTabAlias(table_name, table_name);
                qb.addAlias(table_name);
                qb.getParseInfo().setIsAnalyzeCommand(true);
                qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
                qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan);
                // Allow analyze the whole table and dynamic partitions
                HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
                HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
                break;
            case HiveParser.TOK_UNIONALL:
                if (!qbp.getIsSubQ()) {
                    // contained in a subquery. Just in case, we keep the error as a fallback.
                    throw new SemanticException(generateErrorMessage(ast, ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
                }
                skipRecursion = false;
                break;
            case HiveParser.TOK_INSERT:
                ASTNode destination = (ASTNode) ast.getChild(0);
                Tree tab = destination.getChild(0);
                // Proceed if AST contains partition & If Not Exists
                if (destination.getChildCount() == 2 && tab.getChildCount() == 2 && destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
                    String tableName = tab.getChild(0).getChild(0).getText();
                    Tree partitions = tab.getChild(1);
                    int childCount = partitions.getChildCount();
                    HashMap<String, String> partition = new HashMap<String, String>();
                    for (int i = 0; i < childCount; i++) {
                        String partitionName = partitions.getChild(i).getChild(0).getText();
                        Tree pvalue = partitions.getChild(i).getChild(1);
                        if (pvalue == null) {
                            break;
                        }
                        String partitionVal = stripQuotes(pvalue.getText());
                        partition.put(partitionName, partitionVal);
                    }
                    // if it is a dynamic partition throw the exception
                    if (childCount != partition.size()) {
                        throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS.getMsg(partition.toString()));
                    }
                    Table table = null;
                    try {
                        table = this.getTableObjectByName(tableName);
                    } catch (HiveException ex) {
                        throw new SemanticException(ex);
                    }
                    try {
                        Partition parMetaData = db.getPartition(table, partition, false);
                        // Check partition exists if it exists skip the overwrite
                        if (parMetaData != null) {
                            phase1Result = false;
                            skipRecursion = true;
                            LOG.info("Partition already exists so insert into overwrite " + "skipped for partition : " + parMetaData.toString());
                            break;
                        }
                    } catch (HiveException e) {
                        LOG.info("Error while getting metadata : ", e);
                    }
                    validatePartSpec(table, partition, (ASTNode) tab, conf, false);
                }
                skipRecursion = false;
                break;
            case HiveParser.TOK_LATERAL_VIEW:
            case HiveParser.TOK_LATERAL_VIEW_OUTER:
                // todo: nested LV
                assert ast.getChildCount() == 1;
                qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
                break;
            case HiveParser.TOK_CTE:
                processCTE(qb, ast);
                break;
            default:
                skipRecursion = false;
                break;
        }
    }
    if (!skipRecursion) {
        // Iterate over the rest of the children
        int child_count = ast.getChildCount();
        for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
            // Recurse
            phase1Result = phase1Result && doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1, plannerCtx);
        }
    }
    return phase1Result;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) Tree(org.antlr.runtime.tree.Tree) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 83 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project incubator-atlas by apache.

the class HiveMetaStoreBridgeTest method testImportWhenPartitionKeysAreNull.

@Test
public void testImportWhenPartitionKeysAreNull() throws Exception {
    setupDB(hiveClient, TEST_DB_NAME);
    List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME);
    Table hiveTable = hiveTables.get(0);
    returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
    when(atlasClient.getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, TEST_DB_NAME, TEST_TABLE_NAME))).thenReturn(getEntityReference(HiveDataTypes.HIVE_TABLE.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
    String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(CLUSTER_NAME, hiveTable);
    when(atlasClient.getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQualifiedName)).thenReturn(getEntityReference(HiveDataTypes.HIVE_PROCESS.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
    when(atlasClient.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")).thenReturn(createTableReference());
    Partition partition = mock(Partition.class);
    when(partition.getTable()).thenReturn(hiveTable);
    List partitionValues = Arrays.asList(new String[] {});
    when(partition.getValues()).thenReturn(partitionValues);
    when(hiveClient.getPartitions(hiveTable)).thenReturn(Arrays.asList(new Partition[] { partition }));
    HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
    try {
        bridge.importHiveMetadata(true);
    } catch (Exception e) {
        Assert.fail("Partition with null key caused import to fail with exception ", e);
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) List(java.util.List) AtlasServiceException(org.apache.atlas.AtlasServiceException) JSONException(org.codehaus.jettison.json.JSONException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Test(org.testng.annotations.Test)

Aggregations

Partition (org.apache.hadoop.hive.ql.metadata.Partition)83 Table (org.apache.hadoop.hive.ql.metadata.Table)48 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)42 ArrayList (java.util.ArrayList)35 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)23 Path (org.apache.hadoop.fs.Path)21 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)21 HashMap (java.util.HashMap)17 LinkedHashMap (java.util.LinkedHashMap)17 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)16 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)16 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)15 IOException (java.io.IOException)13 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)13 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)12 FileNotFoundException (java.io.FileNotFoundException)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)10 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)10 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)10