use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class SemanticAnalyzer method genConstraintsPlan.
private Operator genConstraintsPlan(String dest, QB qb, Operator input) throws SemanticException {
if (deleting(dest)) {
// for DELETE statements NOT NULL constraint need not be checked
return input;
}
// MERGE statements could have inserted a cardinality violation branch, we need to avoid that
if (mergeCardinalityViolationBranch(input)) {
return input;
}
// if this is an insert into statement we might need to add constraint check
Table targetTable = null;
Integer dest_type = qb.getMetaData().getDestTypeForAlias(dest);
if (dest_type == QBMetaData.DEST_TABLE) {
targetTable = qb.getMetaData().getDestTableForAlias(dest);
} else if (dest_type == QBMetaData.DEST_PARTITION) {
Partition dest_part = qb.getMetaData().getDestPartitionForAlias(dest);
targetTable = dest_part.getTable();
} else {
throw new SemanticException("Generating constraint check plan: Invalid target type: " + dest);
}
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
ExprNodeDesc nullConstraintExpr = getNotNullConstraintExpr(targetTable, input, dest);
ExprNodeDesc checkConstraintExpr = getCheckConstraintExpr(targetTable, input, inputRR, dest);
ExprNodeDesc combinedConstraintExpr = null;
if (nullConstraintExpr != null && checkConstraintExpr != null) {
assert (input.getParentOperators().size() == 1);
combinedConstraintExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("and", nullConstraintExpr, checkConstraintExpr);
} else if (nullConstraintExpr != null) {
combinedConstraintExpr = nullConstraintExpr;
} else if (checkConstraintExpr != null) {
combinedConstraintExpr = checkConstraintExpr;
}
if (combinedConstraintExpr != null) {
ExprNodeDesc constraintUDF = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("enforce_constraint", combinedConstraintExpr);
Operator newConstraintFilter = putOpInsertMap(OperatorFactory.getAndMakeChild(new FilterDesc(constraintUDF, false), new RowSchema(inputRR.getColumnInfos()), input), inputRR);
return newConstraintFilter;
}
return input;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class SemanticAnalyzer method validate.
@Override
public void validate() throws SemanticException {
LOG.debug("validation start");
boolean wasAcidChecked = false;
// Validate inputs and outputs have right protectmode to execute the query
for (ReadEntity readEntity : getInputs()) {
ReadEntity.Type type = readEntity.getType();
if (type != ReadEntity.Type.TABLE && type != ReadEntity.Type.PARTITION) {
// here to make the logic complete.
continue;
}
Table tbl = readEntity.getTable();
Partition p = readEntity.getPartition();
if (p != null) {
tbl = p.getTable();
}
if (tbl != null && AcidUtils.isTransactionalTable(tbl)) {
transactionalInQuery = true;
if (!wasAcidChecked) {
checkAcidTxnManager(tbl);
}
wasAcidChecked = true;
}
}
for (WriteEntity writeEntity : getOutputs()) {
WriteEntity.Type type = writeEntity.getType();
if (type == WriteEntity.Type.PARTITION || type == WriteEntity.Type.DUMMYPARTITION) {
String conflictingArchive = null;
try {
Partition usedp = writeEntity.getPartition();
Table tbl = usedp.getTable();
if (AcidUtils.isTransactionalTable(tbl)) {
transactionalInQuery = true;
if (!wasAcidChecked) {
checkAcidTxnManager(tbl);
}
wasAcidChecked = true;
}
LOG.debug("validated " + usedp.getName());
LOG.debug(usedp.getTable().getTableName());
WriteEntity.WriteType writeType = writeEntity.getWriteType();
if (writeType != WriteType.UPDATE && writeType != WriteType.DELETE) {
// Do not check for ACID; it does not create new parts and this is expensive as hell.
// TODO: add an API to get table name list for archived parts with a single call;
// nobody uses this so we could skip the whole thing.
conflictingArchive = ArchiveUtils.conflictingArchiveNameOrNull(db, tbl, usedp.getSpec());
}
} catch (HiveException e) {
throw new SemanticException(e);
}
if (conflictingArchive != null) {
String message = String.format("Insert conflict with existing archive: %s", conflictingArchive);
throw new SemanticException(message);
}
} else if (type == WriteEntity.Type.TABLE) {
Table tbl = writeEntity.getTable();
if (AcidUtils.isTransactionalTable(tbl)) {
transactionalInQuery = true;
if (!wasAcidChecked) {
checkAcidTxnManager(tbl);
}
wasAcidChecked = true;
}
}
if (type != WriteEntity.Type.TABLE && type != WriteEntity.Type.PARTITION) {
LOG.debug("not validating writeEntity, because entity is neither table nor partition");
continue;
}
}
boolean reworkMapredWork = HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_REWORK_MAPREDWORK);
// validate all tasks
for (Task<? extends Serializable> rootTask : rootTasks) {
validate(rootTask, reworkMapredWork);
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class SemanticAnalyzer method setupStats.
private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, RowResolver rwsch) throws SemanticException {
// if it is not analyze command and not column stats, then do not gatherstats
if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) {
tsDesc.setGatherStats(false);
} else {
if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString();
LOG.debug("Set stats collection dir : " + statsTmpLoc);
tsDesc.setTmpStatsDir(statsTmpLoc);
}
tsDesc.setGatherStats(true);
tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
// append additional virtual columns for storing statistics
Iterator<VirtualColumn> vcs = VirtualColumn.getStatsRegistry(conf).iterator();
List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
while (vcs.hasNext()) {
VirtualColumn vc = vcs.next();
rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
vcList.add(vc);
}
tsDesc.addVirtualCols(vcList);
String tblName = tab.getTableName();
// Theoretically the key prefix could be any unique string shared
// between TableScanOperator (when publishing) and StatsTask (when aggregating).
// Here we use
// db_name.table_name + partitionSec
// as the prefix for easy of read during explain and debugging.
// Currently, partition spec can only be static partition.
String k = org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.encodeTableName(tblName) + Path.SEPARATOR;
tsDesc.setStatsAggPrefix(tab.getDbName() + "." + k);
// set up WriteEntity for replication
outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED));
// add WriteEntity for each matching partition
if (tab.isPartitioned()) {
List<String> cols = new ArrayList<String>();
if (qbp.getAnalyzeRewrite() != null) {
List<FieldSchema> partitionCols = tab.getPartCols();
for (FieldSchema fs : partitionCols) {
cols.add(fs.getName());
}
tsDesc.setPartColumns(cols);
return;
}
TableSpec tblSpec = qbp.getTableSpec(alias);
Map<String, String> partSpec = tblSpec.getPartSpec();
if (partSpec != null) {
cols.addAll(partSpec.keySet());
tsDesc.setPartColumns(cols);
} else {
throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg());
}
List<Partition> partitions = qbp.getTableSpec().partitions;
if (partitions != null) {
for (Partition partn : partitions) {
// inputs.add(new ReadEntity(partn)); // is this needed at all?
LOG.info("XXX: adding part: " + partn);
outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK));
}
}
}
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class DDLSemanticAnalyzer method addInputsOutputsAlterTable.
private void addInputsOutputsAlterTable(String tableName, Map<String, String> partSpec, AlterTableDesc desc, AlterTableTypes op, boolean doForceExclusive) throws SemanticException {
boolean isCascade = desc != null && desc.getIsCascade();
boolean alterPartitions = partSpec != null && !partSpec.isEmpty();
// cascade only occurs at table level then cascade to partition level
if (isCascade && alterPartitions) {
throw new SemanticException(ErrorMsg.ALTER_TABLE_PARTITION_CASCADE_NOT_SUPPORTED, op.getName());
}
Table tab = getTable(tableName, true);
// cascade only occurs with partitioned table
if (isCascade && !tab.isPartitioned()) {
throw new SemanticException(ErrorMsg.ALTER_TABLE_NON_PARTITIONED_TABLE_CASCADE_NOT_SUPPORTED);
}
// Determine the lock type to acquire
WriteEntity.WriteType writeType = doForceExclusive ? WriteType.DDL_EXCLUSIVE : determineAlterTableWriteType(tab, desc, op);
if (!alterPartitions) {
inputs.add(new ReadEntity(tab));
alterTableOutput = new WriteEntity(tab, writeType);
outputs.add(alterTableOutput);
// do not need the lock for partitions since they are covered by the table lock
if (isCascade) {
for (Partition part : getPartitions(tab, partSpec, false)) {
outputs.add(new WriteEntity(part, WriteEntity.WriteType.DDL_NO_LOCK));
}
}
} else {
ReadEntity re = new ReadEntity(tab);
// In the case of altering a table for its partitions we don't need to lock the table
// itself, just the partitions. But the table will have a ReadEntity. So mark that
// ReadEntity as no lock.
re.noLockNeeded();
inputs.add(re);
if (isFullSpec(tab, partSpec)) {
// Fully specified partition spec
Partition part = getPartition(tab, partSpec, true);
outputs.add(new WriteEntity(part, writeType));
} else {
// Partial partition spec supplied. Make sure this is allowed.
if (!AlterTableDesc.doesAlterTableTypeSupportPartialPartitionSpec(op)) {
throw new SemanticException(ErrorMsg.ALTER_TABLE_TYPE_PARTIAL_PARTITION_SPEC_NO_SUPPORTED, op.getName());
} else if (!conf.getBoolVar(HiveConf.ConfVars.DYNAMICPARTITIONING)) {
throw new SemanticException(ErrorMsg.DYNAMIC_PARTITION_DISABLED);
}
for (Partition part : getPartitions(tab, partSpec, true)) {
outputs.add(new WriteEntity(part, writeType));
}
}
}
if (desc != null) {
validateAlterTableType(tab, op, desc.getExpectView());
// validate Unset Non Existed Table Properties
if (op == AlterTableDesc.AlterTableTypes.DROPPROPS && !desc.getIsDropIfExists()) {
Map<String, String> tableParams = tab.getTTable().getParameters();
for (String currKey : desc.getProps().keySet()) {
if (!tableParams.containsKey(currKey)) {
String errorMsg = "The following property " + currKey + " does not exist in " + tab.getTableName();
throw new SemanticException(ErrorMsg.ALTER_TBL_UNSET_NON_EXIST_PROPERTY.getMsg(errorMsg));
}
}
}
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAlterTablePartMergeFiles.
private void analyzeAlterTablePartMergeFiles(ASTNode ast, String tableName, HashMap<String, String> partSpec) throws SemanticException {
AlterTablePartMergeFilesDesc mergeDesc = new AlterTablePartMergeFilesDesc(tableName, partSpec);
List<Path> inputDir = new ArrayList<Path>();
Path oldTblPartLoc = null;
Path newTblPartLoc = null;
Table tblObj = null;
ListBucketingCtx lbCtx = null;
try {
tblObj = getTable(tableName);
// TODO: we should probably block all ACID tables here.
if (AcidUtils.isInsertOnlyTable(tblObj.getParameters())) {
throw new SemanticException("Merge is not supported for MM tables");
}
mergeDesc.setTableDesc(Utilities.getTableDesc(tblObj));
List<String> bucketCols = null;
Class<? extends InputFormat> inputFormatClass = null;
boolean isArchived = false;
if (tblObj.isPartitioned()) {
if (partSpec == null) {
throw new SemanticException("source table " + tableName + " is partitioned but no partition desc found.");
} else {
Partition part = getPartition(tblObj, partSpec, false);
if (part == null) {
throw new SemanticException("source table " + tableName + " is partitioned but partition not found.");
}
bucketCols = part.getBucketCols();
inputFormatClass = part.getInputFormatClass();
isArchived = ArchiveUtils.isArchived(part);
Path tabPath = tblObj.getPath();
Path partPath = part.getDataLocation();
// if the table is in a different dfs than the partition,
// replace the partition's dfs with the table's dfs.
newTblPartLoc = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
oldTblPartLoc = partPath;
lbCtx = constructListBucketingCtx(part.getSkewedColNames(), part.getSkewedColValues(), part.getSkewedColValueLocationMaps(), part.isStoredAsSubDirectories(), conf);
}
} else {
inputFormatClass = tblObj.getInputFormatClass();
bucketCols = tblObj.getBucketCols();
// input and output are the same
oldTblPartLoc = tblObj.getPath();
newTblPartLoc = tblObj.getPath();
lbCtx = constructListBucketingCtx(tblObj.getSkewedColNames(), tblObj.getSkewedColValues(), tblObj.getSkewedColValueLocationMaps(), tblObj.isStoredAsSubDirectories(), conf);
}
// throw a HiveException for other than rcfile and orcfile.
if (!((inputFormatClass.equals(RCFileInputFormat.class) || (inputFormatClass.equals(OrcInputFormat.class))))) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_FILE_FORMAT.getMsg());
}
mergeDesc.setInputFormatClass(inputFormatClass);
// throw a HiveException if the table/partition is bucketized
if (bucketCols != null && bucketCols.size() > 0) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_BUCKETED.getMsg());
}
// throw a HiveException if the table/partition is archived
if (isArchived) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_PARTITION_ARCHIVED.getMsg());
}
// violating which can cause data loss
if (tblObj.isNonNative()) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_NON_NATIVE.getMsg());
}
if (tblObj.getTableType() != TableType.MANAGED_TABLE) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_NOT_MANAGED.getMsg());
}
// transactional tables are compacted and no longer needs to be bucketed, so not safe for merge/concatenation
boolean isAcid = AcidUtils.isTransactionalTable(tblObj);
if (isAcid) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_TRANSACTIONAL.getMsg());
}
inputDir.add(oldTblPartLoc);
mergeDesc.setInputDir(inputDir);
mergeDesc.setLbCtx(lbCtx);
addInputsOutputsAlterTable(tableName, partSpec, AlterTableTypes.MERGEFILES);
DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), mergeDesc);
ddlWork.setNeedLock(true);
Task<? extends Serializable> mergeTask = TaskFactory.get(ddlWork);
TableDesc tblDesc = Utilities.getTableDesc(tblObj);
Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
mergeDesc.setOutputDir(queryTmpdir);
// No need to handle MM tables - unsupported path.
LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap<>() : partSpec);
ltd.setLbCtx(lbCtx);
Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false));
mergeTask.addDependentTask(moveTsk);
if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
BasicStatsWork basicStatsWork;
if (oldTblPartLoc.equals(newTblPartLoc)) {
// If we're merging to the same location, we can avoid some metastore calls
TableSpec tableSpec = new TableSpec(db, tableName, partSpec);
basicStatsWork = new BasicStatsWork(tableSpec);
} else {
basicStatsWork = new BasicStatsWork(ltd);
}
basicStatsWork.setNoStatsAggregator(true);
basicStatsWork.setClearAggregatorStats(true);
StatsWork columnStatsWork = new StatsWork(tblObj, basicStatsWork, conf);
Task<? extends Serializable> statTask = TaskFactory.get(columnStatsWork);
moveTsk.addDependentTask(statTask);
}
rootTasks.add(mergeTask);
} catch (Exception e) {
throw new SemanticException(e);
}
}
Aggregations