use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class GenMRTableScan1 method process.
/**
* Table Sink encountered.
* @param nd
* the table sink operator encountered
* @param opProcCtx
* context
*/
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
TableScanOperator op = (TableScanOperator) nd;
GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
ParseContext parseCtx = ctx.getParseCtx();
Table table = op.getConf().getTableMetadata();
Class<? extends InputFormat> inputFormat = table.getInputFormatClass();
Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
// create a dummy MapReduce task
MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx);
MapRedTask currTask = (MapRedTask) TaskFactory.get(currWork);
ctx.setCurrTask(currTask);
ctx.setCurrTopOp(op);
for (String alias : parseCtx.getTopOps().keySet()) {
Operator<? extends OperatorDesc> currOp = parseCtx.getTopOps().get(alias);
if (currOp == op) {
String currAliasId = alias;
ctx.setCurrAliasId(currAliasId);
mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId));
if (parseCtx.getQueryProperties().isAnalyzeCommand()) {
boolean noScan = parseCtx.getQueryProperties().isNoScanAnalyzeCommand();
if (OrcInputFormat.class.isAssignableFrom(inputFormat) || MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) {
// For ORC and Parquet, all the following statements are the same
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
// There will not be any MR or Tez job above this task
StatsWork statWork = new StatsWork(table, parseCtx.getConf());
statWork.setFooterScan();
// If partition is specified, get pruned partition list
Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(op);
if (confirmedParts.size() > 0) {
List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
PrunedPartitionList partList = new PrunedPartitionList(table, confirmedParts, partCols, false);
statWork.addInputPartitions(partList.getPartitions());
}
Task<StatsWork> snjTask = TaskFactory.get(statWork);
ctx.setCurrTask(snjTask);
ctx.setCurrTopOp(null);
ctx.getRootTasks().clear();
ctx.getRootTasks().add(snjTask);
} else {
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
// The plan consists of a simple MapRedTask followed by a StatsTask.
// The MR task is just a simple TableScanOperator
BasicStatsWork statsWork = new BasicStatsWork(table.getTableSpec());
statsWork.setNoScanAnalyzeCommand(noScan);
StatsWork columnStatsWork = new StatsWork(table, statsWork, parseCtx.getConf());
columnStatsWork.collectStatsFromAggregator(op.getConf());
columnStatsWork.setSourceTask(currTask);
Task<StatsWork> columnStatsTask = TaskFactory.get(columnStatsWork);
currTask.addDependentTask(columnStatsTask);
if (!ctx.getRootTasks().contains(currTask)) {
ctx.getRootTasks().add(currTask);
}
// The plan consists of a StatsTask only.
if (noScan) {
columnStatsTask.setParentTasks(null);
ctx.getRootTasks().remove(currTask);
ctx.getRootTasks().add(columnStatsTask);
}
currWork.getMapWork().setGatheringStats(true);
if (currWork.getReduceWork() != null) {
currWork.getReduceWork().setGatheringStats(true);
}
// NOTE: here we should use the new partition predicate pushdown API to get a list of
// pruned list,
// and pass it to setTaskPlan as the last parameter
Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(op);
if (confirmedPartns.size() > 0) {
List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
PrunedPartitionList partList = new PrunedPartitionList(table, confirmedPartns, partCols, false);
GenMapRedUtils.setTaskPlan(currAliasId, op, currTask, false, ctx, partList);
} else {
// non-partitioned table
GenMapRedUtils.setTaskPlan(currAliasId, op, currTask, false, ctx);
}
}
}
return true;
}
}
assert false;
return null;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class TestHCatMultiOutputFormat method getTableData.
/**
* Method to fetch table data
*
* @param table table name
* @param database database
* @return list of columns in comma seperated way
* @throws Exception if any error occurs
*/
private List<String> getTableData(String table, String database) throws Exception {
QueryState queryState = new QueryState.Builder().build();
HiveConf conf = queryState.getConf();
conf.addResource("hive-site.xml");
ArrayList<String> results = new ArrayList<String>();
ArrayList<String> temp = new ArrayList<String>();
Hive hive = Hive.get(conf);
org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
FetchWork work;
if (!tbl.getPartCols().isEmpty()) {
List<Partition> partitions = hive.getPartitions(tbl);
List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
List<Path> partLocs = new ArrayList<Path>();
TableDesc tableDesc = Utilities.getTableDesc(tbl);
for (Partition part : partitions) {
partLocs.add(part.getDataLocation());
partDesc.add(Utilities.getPartitionDescFromTableDesc(tableDesc, part, true));
}
work = new FetchWork(partLocs, partDesc, tableDesc);
work.setLimit(100);
} else {
work = new FetchWork(tbl.getDataLocation(), Utilities.getTableDesc(tbl));
}
FetchTask task = new FetchTask();
task.setWork(work);
task.initialize(queryState, null, null, new CompilationOpContext());
task.fetch(temp);
for (String str : temp) {
results.add(str.replace("\t", ","));
}
return results;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class LoadPartitions method forExistingTable.
private TaskTracker forExistingTable(AddPartitionDesc lastPartitionReplicated) throws Exception {
boolean encounteredTheLastReplicatedPartition = (lastPartitionReplicated == null);
Map<String, String> lastReplicatedPartSpec = null;
if (!encounteredTheLastReplicatedPartition) {
lastReplicatedPartSpec = lastPartitionReplicated.getPartition(0).getPartSpec();
LOG.info("Start processing from partition info spec : {}", StringUtils.mapToString(lastReplicatedPartSpec));
}
ReplicationSpec replicationSpec = event.replicationSpec();
Iterator<AddPartitionDesc> partitionIterator = event.partitionDescriptions(tableDesc).iterator();
while (!encounteredTheLastReplicatedPartition && partitionIterator.hasNext()) {
AddPartitionDesc addPartitionDesc = partitionIterator.next();
Map<String, String> currentSpec = addPartitionDesc.getPartition(0).getPartSpec();
encounteredTheLastReplicatedPartition = lastReplicatedPartSpec.equals(currentSpec);
}
while (partitionIterator.hasNext() && tracker.canAddMoreTasks()) {
AddPartitionDesc addPartitionDesc = partitionIterator.next();
Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
Partition ptn = context.hiveDb.getPartition(table, partSpec, false);
if (ptn == null) {
if (!replicationSpec.isMetadataOnly()) {
addPartition(partitionIterator.hasNext(), addPartitionDesc);
}
} else {
// the destination ptn's repl.last.id is older than the replacement's.
if (replicationSpec.allowReplacementInto(ptn.getParameters())) {
if (replicationSpec.isMetadataOnly()) {
tracker.addTask(alterSinglePartition(addPartitionDesc, replicationSpec, ptn));
if (!tracker.canAddMoreTasks()) {
tracker.setReplicationState(new ReplicationState(new PartitionState(table.getTableName(), addPartitionDesc)));
}
} else {
addPartition(partitionIterator.hasNext(), addPartitionDesc);
}
} else {
// ignore this ptn, do nothing, not an error.
}
}
}
return tracker;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class BasicStatsNoJobTask method aggregateStats.
private int aggregateStats(ExecutorService threadPool, Hive db) {
int ret = 0;
try {
JobConf jc = new JobConf(conf);
TableSpec tableSpecs = work.getTableSpecs();
if (tableSpecs == null) {
throw new RuntimeException("this is unexpected...needs some investigation");
}
Table table = tableSpecs.tableHandle;
Collection<Partition> partitions = null;
if (work.getPartitions() == null || work.getPartitions().isEmpty()) {
if (table.isPartitioned()) {
partitions = tableSpecs.partitions;
}
} else {
partitions = work.getPartitions();
}
LinkedList<Partish> partishes = Lists.newLinkedList();
if (partitions == null) {
partishes.add(Partish.buildFor(table));
} else {
for (Partition part : partitions) {
partishes.add(Partish.buildFor(table, part));
}
}
List<FooterStatCollector> scs = Lists.newArrayList();
for (Partish partish : partishes) {
scs.add(new FooterStatCollector(jc, partish));
}
for (FooterStatCollector sc : scs) {
sc.init(conf, console);
threadPool.execute(sc);
}
LOG.debug("Stats collection waiting for threadpool to shutdown..");
shutdownAndAwaitTermination(threadPool);
LOG.debug("Stats collection threadpool shutdown successful.");
ret = updatePartitions(db, scs, table);
} catch (Exception e) {
console.printError("Failed to collect footer statistics.", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = -1;
}
}
// anything else indicates failure
return ret;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class BasicStatsTask method getPartitionsList.
/**
* Get the list of partitions that need to update statistics.
* TODO: we should reuse the Partitions generated at compile time
* since getting the list of partitions is quite expensive.
*
* @return a list of partitions that need to update statistics.
* @throws HiveException
*/
private List<Partition> getPartitionsList(Hive db) throws HiveException {
if (work.getLoadFileDesc() != null) {
// we are in CTAS, so we know there are no partitions
return null;
}
List<Partition> list = new ArrayList<Partition>();
if (work.getTableSpecs() != null) {
// ANALYZE command
TableSpec tblSpec = work.getTableSpecs();
table = tblSpec.tableHandle;
if (!table.isPartitioned()) {
return null;
}
// get all partitions that matches with the partition spec
List<Partition> partitions = tblSpec.partitions;
if (partitions != null) {
for (Partition partn : partitions) {
list.add(partn);
}
}
} else if (work.getLoadTableDesc() != null) {
// INSERT OVERWRITE command
LoadTableDesc tbd = work.getLoadTableDesc();
table = db.getTable(tbd.getTable().getTableName());
if (!table.isPartitioned()) {
return null;
}
DynamicPartitionCtx dpCtx = tbd.getDPCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
// If no dynamic partitions are generated, dpPartSpecs may not be initialized
if (dpPartSpecs != null) {
// load the list of DP partitions and return the list of partition specs
list.addAll(dpPartSpecs);
}
} else {
// static partition
Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
list.add(partn);
}
}
return list;
}
Aggregations