use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class TextMetaDataFormatter method showTableStatus.
@Override
public void showTableStatus(DataOutputStream outStream, Hive db, HiveConf conf, List<Table> tbls, Map<String, String> part, Partition par) throws HiveException {
try {
Iterator<Table> iterTables = tbls.iterator();
while (iterTables.hasNext()) {
// create a row per table name
Table tbl = iterTables.next();
String tableName = tbl.getTableName();
String tblLoc = null;
String inputFormattCls = null;
String outputFormattCls = null;
if (part != null) {
if (par != null) {
if (par.getLocation() != null) {
tblLoc = par.getDataLocation().toString();
}
inputFormattCls = par.getInputFormatClass().getName();
outputFormattCls = par.getOutputFormatClass().getName();
}
} else {
if (tbl.getPath() != null) {
tblLoc = tbl.getDataLocation().toString();
}
inputFormattCls = tbl.getInputFormatClass().getName();
outputFormattCls = tbl.getOutputFormatClass().getName();
}
String owner = tbl.getOwner();
List<FieldSchema> cols = tbl.getCols();
String ddlCols = MetaStoreUtils.getDDLFromFieldSchema("columns", cols);
boolean isPartitioned = tbl.isPartitioned();
String partitionCols = "";
if (isPartitioned) {
partitionCols = MetaStoreUtils.getDDLFromFieldSchema("partition_columns", tbl.getPartCols());
}
outStream.write(("tableName:" + tableName).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("owner:" + owner).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("location:" + tblLoc).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("inputformat:" + inputFormattCls).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("outputformat:" + outputFormattCls).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("columns:" + ddlCols).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("partitioned:" + isPartitioned).getBytes("UTF-8"));
outStream.write(terminator);
outStream.write(("partitionColumns:" + partitionCols).getBytes("UTF-8"));
outStream.write(terminator);
// output file system information
Path tblPath = tbl.getPath();
List<Path> locations = new ArrayList<Path>();
if (isPartitioned) {
if (par == null) {
for (Partition curPart : db.getPartitions(tbl)) {
if (curPart.getLocation() != null) {
locations.add(new Path(curPart.getLocation()));
}
}
} else {
if (par.getLocation() != null) {
locations.add(new Path(par.getLocation()));
}
}
} else {
if (tblPath != null) {
locations.add(tblPath);
}
}
if (!locations.isEmpty()) {
writeFileSystemStats(outStream, conf, locations, tblPath, false, 0);
}
outStream.write(terminator);
}
} catch (IOException e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class AbstractBucketJoinProc method convertMapJoinToBucketMapJoin.
/*
* Convert mapjoin to a bucketed mapjoin.
* The operator tree is not changed, but the mapjoin descriptor in the big table is
* enhanced to keep the big table bucket -> small table buckets mapping.
*/
protected void convertMapJoinToBucketMapJoin(MapJoinOperator mapJoinOp, BucketJoinProcCtx context) throws SemanticException {
MapJoinDesc desc = mapJoinOp.getConf();
Map<String, Map<String, List<String>>> aliasBucketFileNameMapping = new LinkedHashMap<String, Map<String, List<String>>>();
Map<String, List<Integer>> tblAliasToNumberOfBucketsInEachPartition = context.getTblAliasToNumberOfBucketsInEachPartition();
Map<String, List<List<String>>> tblAliasToBucketedFilePathsInEachPartition = context.getTblAliasToBucketedFilePathsInEachPartition();
Map<Partition, List<String>> bigTblPartsToBucketFileNames = context.getBigTblPartsToBucketFileNames();
Map<Partition, Integer> bigTblPartsToBucketNumber = context.getBigTblPartsToBucketNumber();
List<String> joinAliases = context.getJoinAliases();
String baseBigAlias = context.getBaseBigAlias();
// sort bucket names for the big table
for (List<String> partBucketNames : bigTblPartsToBucketFileNames.values()) {
Collections.sort(partBucketNames);
}
// in the big table to bucket file names in small tables.
for (int j = 0; j < joinAliases.size(); j++) {
String alias = joinAliases.get(j);
if (alias.equals(baseBigAlias)) {
continue;
}
for (List<String> names : tblAliasToBucketedFilePathsInEachPartition.get(alias)) {
Collections.sort(names);
}
List<Integer> smallTblBucketNums = tblAliasToNumberOfBucketsInEachPartition.get(alias);
List<List<String>> smallTblFilesList = tblAliasToBucketedFilePathsInEachPartition.get(alias);
Map<String, List<String>> mappingBigTableBucketFileNameToSmallTableBucketFileNames = new LinkedHashMap<String, List<String>>();
aliasBucketFileNameMapping.put(alias, mappingBigTableBucketFileNameToSmallTableBucketFileNames);
// for each bucket file in big table, get the corresponding bucket file
// name in the small table.
// more than 1 partition in the big table, do the mapping for each partition
Iterator<Entry<Partition, List<String>>> bigTblPartToBucketNames = bigTblPartsToBucketFileNames.entrySet().iterator();
Iterator<Entry<Partition, Integer>> bigTblPartToBucketNum = bigTblPartsToBucketNumber.entrySet().iterator();
while (bigTblPartToBucketNames.hasNext()) {
assert bigTblPartToBucketNum.hasNext();
int bigTblBucketNum = bigTblPartToBucketNum.next().getValue();
List<String> bigTblBucketNameList = bigTblPartToBucketNames.next().getValue();
fillMappingBigTableBucketFileNameToSmallTableBucketFileNames(smallTblBucketNums, smallTblFilesList, mappingBigTableBucketFileNameToSmallTableBucketFileNames, bigTblBucketNum, bigTblBucketNameList, desc.getBigTableBucketNumMapping());
}
}
desc.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
desc.setBigTableAlias(baseBigAlias);
boolean bigTablePartitioned = context.isBigTablePartitioned();
if (bigTablePartitioned) {
desc.setBigTablePartSpecToFileMapping(convert(bigTblPartsToBucketFileNames));
}
Map<Integer, Set<String>> posToAliasMap = mapJoinOp.getPosToAliasMap();
Map<String, String> aliasToNewAliasMap = context.getAliasToNewAliasMap();
if (aliasToNewAliasMap != null && posToAliasMap != null) {
for (Map.Entry<String, String> entry : aliasToNewAliasMap.entrySet()) {
for (Set<String> aliases : posToAliasMap.values()) {
if (aliases.remove(entry.getKey())) {
aliases.add(entry.getValue());
}
}
}
}
// successfully convert to bucket map join
desc.setBucketMapJoin(true);
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class AvgPartitionSizeBasedBigTableSelectorForAutoSMJ method getBigTablePosition.
public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp, Set<Integer> bigTableCandidates) throws SemanticException {
int bigTablePos = -1;
long maxSize = -1;
// number of partitions for the chosen big table
int numPartitionsCurrentBigTable = 0;
HiveConf conf = parseCtx.getConf();
try {
List<TableScanOperator> topOps = new ArrayList<TableScanOperator>();
getListTopOps(joinOp, topOps);
int currentPos = 0;
for (TableScanOperator topOp : topOps) {
if (topOp == null) {
return -1;
}
if (!bigTableCandidates.contains(currentPos)) {
currentPos++;
continue;
}
// in case the sizes match, preference is
int numPartitions = 1;
// given to the table with fewer partitions
Table table = topOp.getConf().getTableMetadata();
long averageSize = 0;
if (!table.isPartitioned()) {
averageSize = getSize(conf, table);
} else {
// For partitioned tables, get the size of all the partitions
PrunedPartitionList partsList = PartitionPruner.prune(topOp, parseCtx, null);
numPartitions = partsList.getNotDeniedPartns().size();
long totalSize = 0;
for (Partition part : partsList.getNotDeniedPartns()) {
totalSize += getSize(conf, part);
}
averageSize = numPartitions == 0 ? 0 : totalSize / numPartitions;
}
if (averageSize > maxSize) {
maxSize = averageSize;
bigTablePos = currentPos;
numPartitionsCurrentBigTable = numPartitions;
} else // If the sizes match, prefer the table with fewer partitions
if (averageSize == maxSize) {
if (numPartitions < numPartitionsCurrentBigTable) {
bigTablePos = currentPos;
numPartitionsCurrentBigTable = numPartitions;
}
}
currentPos++;
}
} catch (HiveException e) {
throw new SemanticException(e.getMessage());
}
return bigTablePos;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class JsonMetaDataFormatter method makeTableStatusLocations.
private List<Path> makeTableStatusLocations(Table tbl, Hive db, Partition par) throws HiveException {
// output file system information
Path tblPath = tbl.getPath();
List<Path> locations = new ArrayList<Path>();
if (tbl.isPartitioned()) {
if (par == null) {
for (Partition curPart : db.getPartitions(tbl)) {
if (curPart.getLocation() != null) {
locations.add(new Path(curPart.getLocation()));
}
}
} else {
if (par.getLocation() != null) {
locations.add(new Path(par.getLocation()));
}
}
} else {
if (tblPath != null) {
locations.add(tblPath);
}
}
return locations;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class PartitionPruner method prune.
/**
* Get the partition list for the table that satisfies the partition pruner
* condition.
*
* @param tab
* the table object for the alias
* @param prunerExpr
* the pruner expression for the alias
* @param conf
* for checking whether "strict" mode is on.
* @param alias
* for generating error message only.
* @param prunedPartitionsMap
* cached result for the table
* @return the partition list for the table that satisfies the partition
* pruner condition.
* @throws SemanticException
*/
public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, Map<String, PrunedPartitionList> prunedPartitionsMap) throws SemanticException {
if (LOG.isTraceEnabled()) {
LOG.trace("Started pruning partition");
LOG.trace("dbname = " + tab.getDbName());
LOG.trace("tabname = " + tab.getTableName());
LOG.trace("prune Expression = " + (prunerExpr == null ? "" : prunerExpr));
}
String key = tab.getDbName() + "." + tab.getTableName() + ";";
if (!tab.isPartitioned()) {
// If the table is not partitioned, return empty list.
return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap);
}
if (!hasColumnExpr(prunerExpr)) {
// If the "strict" mode is on, we have to provide partition pruner for each table.
String error = StrictChecks.checkNoPartitionFilter(conf);
if (error != null) {
throw new SemanticException(error + " No partition predicate for Alias \"" + alias + "\" Table \"" + tab.getTableName() + "\"");
}
}
if (prunerExpr == null) {
// In non-strict mode and there is no predicates at all - get everything.
return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap);
}
Set<String> partColsUsedInFilter = new LinkedHashSet<String>();
// Replace virtual columns with nulls. See javadoc for details.
prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab), partColsUsedInFilter);
// Remove all parts that are not partition columns. See javadoc for details.
ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone());
String oldFilter = prunerExpr.getExprString();
if (compactExpr == null || isBooleanExpr(compactExpr)) {
if (isFalseExpr(compactExpr)) {
return new PrunedPartitionList(tab, new LinkedHashSet<Partition>(0), new ArrayList<String>(0), false);
}
// For null and true values, return every partition
return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Filter w/ compacting: " + compactExpr.getExprString() + "; filter w/o compacting: " + oldFilter);
}
key = key + compactExpr.getExprString();
PrunedPartitionList ppList = prunedPartitionsMap.get(key);
if (ppList != null) {
return ppList;
}
ppList = getPartitionsFromServer(tab, (ExprNodeGenericFuncDesc) compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString()));
prunedPartitionsMap.put(key, ppList);
return ppList;
}
Aggregations