use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class DDLSemanticAnalyzer method addTablePartsOutputs.
/**
* Add the table partitions to be modified in the output, so that it is available for the
* pre-execution hook. If the partition does not exist, throw an error if
* throwIfNonExistent is true, otherwise ignore it.
*/
private void addTablePartsOutputs(Table table, List<Map<String, String>> partSpecs, boolean throwIfNonExistent, boolean allowMany, ASTNode ast, WriteEntity.WriteType writeType) throws SemanticException {
Iterator<Map<String, String>> i;
int index;
for (i = partSpecs.iterator(), index = 1; i.hasNext(); ++index) {
Map<String, String> partSpec = i.next();
List<Partition> parts = null;
if (allowMany) {
try {
parts = db.getPartitions(table, partSpec);
} catch (HiveException e) {
LOG.error("Got HiveException during obtaining list of partitions" + StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
} else {
parts = new ArrayList<Partition>();
try {
Partition p = db.getPartition(table, partSpec, false);
if (p != null) {
parts.add(p);
}
} catch (HiveException e) {
LOG.debug("Wrong specification" + StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
}
if (parts.isEmpty()) {
if (throwIfNonExistent) {
throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(ast.getChild(index)));
}
}
for (Partition p : parts) {
// Don't request any locks here, as the table has already been locked.
outputs.add(new WriteEntity(p, writeType));
}
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class DDLSemanticAnalyzer method addTableDropPartsOutputs.
/**
* Add the table partitions to be modified in the output, so that it is available for the
* pre-execution hook. If the partition does not exist, throw an error if
* throwIfNonExistent is true, otherwise ignore it.
*/
private void addTableDropPartsOutputs(Table tab, Collection<List<ExprNodeGenericFuncDesc>> partSpecs, boolean throwIfNonExistent) throws SemanticException {
for (List<ExprNodeGenericFuncDesc> specs : partSpecs) {
for (ExprNodeGenericFuncDesc partSpec : specs) {
List<Partition> parts = new ArrayList<Partition>();
boolean hasUnknown = false;
try {
hasUnknown = db.getPartitionsByExpr(tab, partSpec, conf, parts);
} catch (Exception e) {
throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(partSpec.getExprString()), e);
}
if (hasUnknown) {
throw new SemanticException("Unexpected unknown partitions for " + partSpec.getExprString());
}
// earlier... If we get rid of output, we can get rid of this.
if (parts.isEmpty()) {
if (throwIfNonExistent) {
throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(partSpec.getExprString()));
}
}
for (Partition p : parts) {
outputs.add(new WriteEntity(p, WriteEntity.WriteType.DDL_EXCLUSIVE));
}
}
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class LoadSemanticAnalyzer method analyzeInternal.
@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
boolean isLocal = false;
boolean isOverWrite = false;
Tree fromTree = ast.getChild(0);
Tree tableTree = ast.getChild(1);
if (ast.getChildCount() == 4) {
isLocal = true;
isOverWrite = true;
}
if (ast.getChildCount() == 3) {
if (ast.getChild(2).getText().toLowerCase().equals("local")) {
isLocal = true;
} else {
isOverWrite = true;
}
}
// initialize load path
URI fromURI;
try {
String fromPath = stripQuotes(fromTree.getText());
fromURI = initializeFromURI(fromPath, isLocal);
} catch (IOException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
} catch (URISyntaxException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
}
// initialize destination table/partition
TableSpec ts = new TableSpec(db, conf, (ASTNode) tableTree);
if (ts.tableHandle.isView() || ts.tableHandle.isMaterializedView()) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
if (ts.tableHandle.isNonNative()) {
throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
}
if (ts.tableHandle.isStoredAsSubDirectories()) {
throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg());
}
List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) {
throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
}
List<String> bucketCols = ts.tableHandle.getBucketCols();
if (bucketCols != null && !bucketCols.isEmpty()) {
String error = StrictChecks.checkBucketing(conf);
if (error != null) {
throw new SemanticException("Please load into an intermediate table" + " and use 'insert... select' to allow Hive to enforce bucketing. " + error);
}
}
// make sure the arguments make sense
List<FileStatus> files = applyConstraintsAndGetFiles(fromURI, fromTree, isLocal, ts.tableHandle);
// for managed tables, make sure the file formats match
if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType()) && conf.getBoolVar(HiveConf.ConfVars.HIVECHECKFILEFORMAT)) {
ensureFileFormatsMatch(ts, files, fromURI);
}
inputs.add(toReadEntity(new Path(fromURI)));
Task<? extends Serializable> rTask = null;
// create final load/move work
boolean preservePartitionSpecs = false;
Map<String, String> partSpec = ts.getPartSpec();
if (partSpec == null) {
partSpec = new LinkedHashMap<String, String>();
outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
} else {
try {
Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false);
if (part != null) {
if (isOverWrite) {
outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT_OVERWRITE));
} else {
outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT));
// If partition already exists and we aren't overwriting it, then respect
// its current location info rather than picking it from the parent TableDesc
preservePartitionSpecs = true;
}
} else {
outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
}
} catch (HiveException e) {
throw new SemanticException(e);
}
}
Long writeId = null;
int stmtId = -1;
if (AcidUtils.isTransactionalTable(ts.tableHandle)) {
try {
writeId = SessionState.get().getTxnMgr().getTableWriteId(ts.tableHandle.getDbName(), ts.tableHandle.getTableName());
} catch (LockException ex) {
throw new SemanticException("Failed to allocate the write id", ex);
}
stmtId = SessionState.get().getTxnMgr().getStmtIdAndIncrement();
}
// Note: this sets LoadFileType incorrectly for ACID; is that relevant for load?
// See setLoadFileType and setIsAcidIow calls elsewhere for an example.
LoadTableDesc loadTableWork = new LoadTableDesc(new Path(fromURI), Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING, writeId);
loadTableWork.setStmtId(stmtId);
if (preservePartitionSpecs) {
// Note : preservePartitionSpecs=true implies inheritTableSpecs=false but
// but preservePartitionSpecs=false(default) here is not sufficient enough
// info to set inheritTableSpecs=true
loadTableWork.setInheritTableSpecs(false);
}
Task<? extends Serializable> childTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true, isLocal));
if (rTask != null) {
rTask.addDependentTask(childTask);
} else {
rTask = childTask;
}
rootTasks.add(rTask);
// The user asked for stats to be collected.
// Some stats like number of rows require a scan of the data
// However, some other stats, like number of files, do not require a complete scan
// Update the stats which do not require a complete scan.
Task<? extends Serializable> statTask = null;
if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
BasicStatsWork basicStatsWork = new BasicStatsWork(loadTableWork);
basicStatsWork.setNoStatsAggregator(true);
basicStatsWork.setClearAggregatorStats(true);
StatsWork columnStatsWork = new StatsWork(ts.tableHandle, basicStatsWork, conf);
statTask = TaskFactory.get(columnStatsWork);
}
if (statTask != null) {
childTask.addDependentTask(statTask);
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class ProcessAnalyzeTable method process.
@SuppressWarnings("unchecked")
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
GenTezProcContext context = (GenTezProcContext) procContext;
TableScanOperator tableScan = (TableScanOperator) nd;
ParseContext parseContext = context.parseContext;
Table table = tableScan.getConf().getTableMetadata();
Class<? extends InputFormat> inputFormat = table.getInputFormatClass();
if (parseContext.getQueryProperties().isAnalyzeCommand()) {
assert tableScan.getChildOperators() == null || tableScan.getChildOperators().size() == 0;
String alias = null;
for (String a : parseContext.getTopOps().keySet()) {
if (tableScan == parseContext.getTopOps().get(a)) {
alias = a;
}
}
assert alias != null;
TezWork tezWork = context.currentTask.getWork();
if (OrcInputFormat.class.isAssignableFrom(inputFormat) || MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) {
// For ORC & Parquet, all the following statements are the same
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
// There will not be any Tez job above this task
StatsWork statWork = new StatsWork(table, parseContext.getConf());
statWork.setFooterScan();
// If partition is specified, get pruned partition list
Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan);
if (confirmedParts.size() > 0) {
List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan);
PrunedPartitionList partList = new PrunedPartitionList(table, confirmedParts, partCols, false);
statWork.addInputPartitions(partList.getPartitions());
}
Task<StatsWork> snjTask = TaskFactory.get(statWork);
snjTask.setParentTasks(null);
context.rootTasks.remove(context.currentTask);
context.rootTasks.add(snjTask);
return true;
} else {
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
// The plan consists of a simple TezTask followed by a StatsTask.
// The Tez task is just a simple TableScanOperator
BasicStatsWork basicStatsWork = new BasicStatsWork(table.getTableSpec());
basicStatsWork.setNoScanAnalyzeCommand(parseContext.getQueryProperties().isNoScanAnalyzeCommand());
StatsWork columnStatsWork = new StatsWork(table, basicStatsWork, parseContext.getConf());
columnStatsWork.collectStatsFromAggregator(tableScan.getConf());
columnStatsWork.setSourceTask(context.currentTask);
Task<StatsWork> statsTask = TaskFactory.get(columnStatsWork);
context.currentTask.addDependentTask(statsTask);
// The plan consists of a StatsTask only.
if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) {
statsTask.setParentTasks(null);
context.rootTasks.remove(context.currentTask);
context.rootTasks.add(statsTask);
}
// NOTE: here we should use the new partition predicate pushdown API to
// get a list of pruned list,
// and pass it to setTaskPlan as the last parameter
Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan);
PrunedPartitionList partitions = null;
if (confirmedPartns.size() > 0) {
List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan);
partitions = new PrunedPartitionList(table, confirmedPartns, partCols, false);
}
MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions);
w.setGatheringStats(true);
return true;
}
}
return null;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class TableSerializer method writePartitions.
private void writePartitions(JsonWriter writer, ReplicationSpec additionalPropertiesProvider) throws SemanticException, IOException {
writer.jsonGenerator.writeStartArray();
if (partitions != null) {
for (org.apache.hadoop.hive.ql.metadata.Partition partition : partitions) {
new PartitionSerializer(partition.getTPartition()).writeTo(writer, additionalPropertiesProvider);
}
}
writer.jsonGenerator.writeEndArray();
}
Aggregations