Search in sources :

Example 31 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class TextMetaDataFormatter method showTableStatus.

@Override
public void showTableStatus(DataOutputStream outStream, Hive db, HiveConf conf, List<Table> tbls, Map<String, String> part, Partition par) throws HiveException {
    try {
        Iterator<Table> iterTables = tbls.iterator();
        while (iterTables.hasNext()) {
            // create a row per table name
            Table tbl = iterTables.next();
            String tableName = tbl.getTableName();
            String tblLoc = null;
            String inputFormattCls = null;
            String outputFormattCls = null;
            if (part != null) {
                if (par != null) {
                    if (par.getLocation() != null) {
                        tblLoc = par.getDataLocation().toString();
                    }
                    inputFormattCls = par.getInputFormatClass().getName();
                    outputFormattCls = par.getOutputFormatClass().getName();
                }
            } else {
                if (tbl.getPath() != null) {
                    tblLoc = tbl.getDataLocation().toString();
                }
                inputFormattCls = tbl.getInputFormatClass().getName();
                outputFormattCls = tbl.getOutputFormatClass().getName();
            }
            String owner = tbl.getOwner();
            List<FieldSchema> cols = tbl.getCols();
            String ddlCols = MetaStoreUtils.getDDLFromFieldSchema("columns", cols);
            boolean isPartitioned = tbl.isPartitioned();
            String partitionCols = "";
            if (isPartitioned) {
                partitionCols = MetaStoreUtils.getDDLFromFieldSchema("partition_columns", tbl.getPartCols());
            }
            outStream.write(("tableName:" + tableName).getBytes("UTF-8"));
            outStream.write(terminator);
            outStream.write(("owner:" + owner).getBytes("UTF-8"));
            outStream.write(terminator);
            outStream.write(("location:" + tblLoc).getBytes("UTF-8"));
            outStream.write(terminator);
            outStream.write(("inputformat:" + inputFormattCls).getBytes("UTF-8"));
            outStream.write(terminator);
            outStream.write(("outputformat:" + outputFormattCls).getBytes("UTF-8"));
            outStream.write(terminator);
            outStream.write(("columns:" + ddlCols).getBytes("UTF-8"));
            outStream.write(terminator);
            outStream.write(("partitioned:" + isPartitioned).getBytes("UTF-8"));
            outStream.write(terminator);
            outStream.write(("partitionColumns:" + partitionCols).getBytes("UTF-8"));
            outStream.write(terminator);
            // output file system information
            Path tblPath = tbl.getPath();
            List<Path> locations = new ArrayList<Path>();
            if (isPartitioned) {
                if (par == null) {
                    for (Partition curPart : db.getPartitions(tbl)) {
                        if (curPart.getLocation() != null) {
                            locations.add(new Path(curPart.getLocation()));
                        }
                    }
                } else {
                    if (par.getLocation() != null) {
                        locations.add(new Path(par.getLocation()));
                    }
                }
            } else {
                if (tblPath != null) {
                    locations.add(tblPath);
                }
            }
            if (!locations.isEmpty()) {
                writeFileSystemStats(outStream, conf, locations, tblPath, false, 0);
            }
            outStream.write(terminator);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 32 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class AbstractBucketJoinProc method convertMapJoinToBucketMapJoin.

/*
   * Convert mapjoin to a bucketed mapjoin.
   * The operator tree is not changed, but the mapjoin descriptor in the big table is
   * enhanced to keep the big table bucket -> small table buckets mapping.
   */
protected void convertMapJoinToBucketMapJoin(MapJoinOperator mapJoinOp, BucketJoinProcCtx context) throws SemanticException {
    MapJoinDesc desc = mapJoinOp.getConf();
    Map<String, Map<String, List<String>>> aliasBucketFileNameMapping = new LinkedHashMap<String, Map<String, List<String>>>();
    Map<String, List<Integer>> tblAliasToNumberOfBucketsInEachPartition = context.getTblAliasToNumberOfBucketsInEachPartition();
    Map<String, List<List<String>>> tblAliasToBucketedFilePathsInEachPartition = context.getTblAliasToBucketedFilePathsInEachPartition();
    Map<Partition, List<String>> bigTblPartsToBucketFileNames = context.getBigTblPartsToBucketFileNames();
    Map<Partition, Integer> bigTblPartsToBucketNumber = context.getBigTblPartsToBucketNumber();
    List<String> joinAliases = context.getJoinAliases();
    String baseBigAlias = context.getBaseBigAlias();
    // sort bucket names for the big table
    for (List<String> partBucketNames : bigTblPartsToBucketFileNames.values()) {
        Collections.sort(partBucketNames);
    }
    // in the big table to bucket file names in small tables.
    for (int j = 0; j < joinAliases.size(); j++) {
        String alias = joinAliases.get(j);
        if (alias.equals(baseBigAlias)) {
            continue;
        }
        for (List<String> names : tblAliasToBucketedFilePathsInEachPartition.get(alias)) {
            Collections.sort(names);
        }
        List<Integer> smallTblBucketNums = tblAliasToNumberOfBucketsInEachPartition.get(alias);
        List<List<String>> smallTblFilesList = tblAliasToBucketedFilePathsInEachPartition.get(alias);
        Map<String, List<String>> mappingBigTableBucketFileNameToSmallTableBucketFileNames = new LinkedHashMap<String, List<String>>();
        aliasBucketFileNameMapping.put(alias, mappingBigTableBucketFileNameToSmallTableBucketFileNames);
        // for each bucket file in big table, get the corresponding bucket file
        // name in the small table.
        // more than 1 partition in the big table, do the mapping for each partition
        Iterator<Entry<Partition, List<String>>> bigTblPartToBucketNames = bigTblPartsToBucketFileNames.entrySet().iterator();
        Iterator<Entry<Partition, Integer>> bigTblPartToBucketNum = bigTblPartsToBucketNumber.entrySet().iterator();
        while (bigTblPartToBucketNames.hasNext()) {
            assert bigTblPartToBucketNum.hasNext();
            int bigTblBucketNum = bigTblPartToBucketNum.next().getValue();
            List<String> bigTblBucketNameList = bigTblPartToBucketNames.next().getValue();
            fillMappingBigTableBucketFileNameToSmallTableBucketFileNames(smallTblBucketNums, smallTblFilesList, mappingBigTableBucketFileNameToSmallTableBucketFileNames, bigTblBucketNum, bigTblBucketNameList, desc.getBigTableBucketNumMapping());
        }
    }
    desc.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
    desc.setBigTableAlias(baseBigAlias);
    boolean bigTablePartitioned = context.isBigTablePartitioned();
    if (bigTablePartitioned) {
        desc.setBigTablePartSpecToFileMapping(convert(bigTblPartsToBucketFileNames));
    }
    Map<Integer, Set<String>> posToAliasMap = mapJoinOp.getPosToAliasMap();
    Map<String, String> aliasToNewAliasMap = context.getAliasToNewAliasMap();
    if (aliasToNewAliasMap != null && posToAliasMap != null) {
        for (Map.Entry<String, String> entry : aliasToNewAliasMap.entrySet()) {
            for (Set<String> aliases : posToAliasMap.values()) {
                if (aliases.remove(entry.getKey())) {
                    aliases.add(entry.getValue());
                }
            }
        }
    }
    // successfully convert to bucket map join
    desc.setBucketMapJoin(true);
}
Also used : Set(java.util.Set) LinkedHashMap(java.util.LinkedHashMap) Entry(java.util.Map.Entry) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) List(java.util.List) Partition(org.apache.hadoop.hive.ql.metadata.Partition) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 33 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class AvgPartitionSizeBasedBigTableSelectorForAutoSMJ method getBigTablePosition.

public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp, Set<Integer> bigTableCandidates) throws SemanticException {
    int bigTablePos = -1;
    long maxSize = -1;
    // number of partitions for the chosen big table
    int numPartitionsCurrentBigTable = 0;
    HiveConf conf = parseCtx.getConf();
    try {
        List<TableScanOperator> topOps = new ArrayList<TableScanOperator>();
        getListTopOps(joinOp, topOps);
        int currentPos = 0;
        for (TableScanOperator topOp : topOps) {
            if (topOp == null) {
                return -1;
            }
            if (!bigTableCandidates.contains(currentPos)) {
                currentPos++;
                continue;
            }
            // in case the sizes match, preference is
            int numPartitions = 1;
            // given to the table with fewer partitions
            Table table = topOp.getConf().getTableMetadata();
            long averageSize = 0;
            if (!table.isPartitioned()) {
                averageSize = getSize(conf, table);
            } else {
                // For partitioned tables, get the size of all the partitions
                PrunedPartitionList partsList = PartitionPruner.prune(topOp, parseCtx, null);
                numPartitions = partsList.getNotDeniedPartns().size();
                long totalSize = 0;
                for (Partition part : partsList.getNotDeniedPartns()) {
                    totalSize += getSize(conf, part);
                }
                averageSize = numPartitions == 0 ? 0 : totalSize / numPartitions;
            }
            if (averageSize > maxSize) {
                maxSize = averageSize;
                bigTablePos = currentPos;
                numPartitionsCurrentBigTable = numPartitions;
            } else // If the sizes match, prefer the table with fewer partitions
            if (averageSize == maxSize) {
                if (numPartitions < numPartitionsCurrentBigTable) {
                    bigTablePos = currentPos;
                    numPartitionsCurrentBigTable = numPartitions;
                }
            }
            currentPos++;
        }
    } catch (HiveException e) {
        throw new SemanticException(e.getMessage());
    }
    return bigTablePos;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) HiveConf(org.apache.hadoop.hive.conf.HiveConf) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 34 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class JsonMetaDataFormatter method makeTableStatusLocations.

private List<Path> makeTableStatusLocations(Table tbl, Hive db, Partition par) throws HiveException {
    // output file system information
    Path tblPath = tbl.getPath();
    List<Path> locations = new ArrayList<Path>();
    if (tbl.isPartitioned()) {
        if (par == null) {
            for (Partition curPart : db.getPartitions(tbl)) {
                if (curPart.getLocation() != null) {
                    locations.add(new Path(curPart.getLocation()));
                }
            }
        } else {
            if (par.getLocation() != null) {
                locations.add(new Path(par.getLocation()));
            }
        }
    } else {
        if (tblPath != null) {
            locations.add(tblPath);
        }
    }
    return locations;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ArrayList(java.util.ArrayList)

Example 35 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class PartitionPruner method prune.

/**
   * Get the partition list for the table that satisfies the partition pruner
   * condition.
   *
   * @param tab
   *          the table object for the alias
   * @param prunerExpr
   *          the pruner expression for the alias
   * @param conf
   *          for checking whether "strict" mode is on.
   * @param alias
   *          for generating error message only.
   * @param prunedPartitionsMap
   *          cached result for the table
   * @return the partition list for the table that satisfies the partition
   *         pruner condition.
   * @throws SemanticException
   */
public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, Map<String, PrunedPartitionList> prunedPartitionsMap) throws SemanticException {
    if (LOG.isTraceEnabled()) {
        LOG.trace("Started pruning partition");
        LOG.trace("dbname = " + tab.getDbName());
        LOG.trace("tabname = " + tab.getTableName());
        LOG.trace("prune Expression = " + (prunerExpr == null ? "" : prunerExpr));
    }
    String key = tab.getDbName() + "." + tab.getTableName() + ";";
    if (!tab.isPartitioned()) {
        // If the table is not partitioned, return empty list.
        return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap);
    }
    if (!hasColumnExpr(prunerExpr)) {
        // If the "strict" mode is on, we have to provide partition pruner for each table.
        String error = StrictChecks.checkNoPartitionFilter(conf);
        if (error != null) {
            throw new SemanticException(error + " No partition predicate for Alias \"" + alias + "\" Table \"" + tab.getTableName() + "\"");
        }
    }
    if (prunerExpr == null) {
        // In non-strict mode and there is no predicates at all - get everything.
        return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap);
    }
    Set<String> partColsUsedInFilter = new LinkedHashSet<String>();
    // Replace virtual columns with nulls. See javadoc for details.
    prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab), partColsUsedInFilter);
    // Remove all parts that are not partition columns. See javadoc for details.
    ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone());
    String oldFilter = prunerExpr.getExprString();
    if (compactExpr == null || isBooleanExpr(compactExpr)) {
        if (isFalseExpr(compactExpr)) {
            return new PrunedPartitionList(tab, new LinkedHashSet<Partition>(0), new ArrayList<String>(0), false);
        }
        // For null and true values, return every partition
        return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Filter w/ compacting: " + compactExpr.getExprString() + "; filter w/o compacting: " + oldFilter);
    }
    key = key + compactExpr.getExprString();
    PrunedPartitionList ppList = prunedPartitionsMap.get(key);
    if (ppList != null) {
        return ppList;
    }
    ppList = getPartitionsFromServer(tab, (ExprNodeGenericFuncDesc) compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString()));
    prunedPartitionsMap.put(key, ppList);
    return ppList;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Partition(org.apache.hadoop.hive.ql.metadata.Partition) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

Partition (org.apache.hadoop.hive.ql.metadata.Partition)83 Table (org.apache.hadoop.hive.ql.metadata.Table)48 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)42 ArrayList (java.util.ArrayList)35 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)23 Path (org.apache.hadoop.fs.Path)21 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)21 HashMap (java.util.HashMap)17 LinkedHashMap (java.util.LinkedHashMap)17 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)16 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)16 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)15 IOException (java.io.IOException)13 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)13 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)12 FileNotFoundException (java.io.FileNotFoundException)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)10 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)10 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)10