use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.
the class DDLSemanticAnalyzer method analyzeDropDatabase.
private void analyzeDropDatabase(ASTNode ast) throws SemanticException {
String dbName = unescapeIdentifier(ast.getChild(0).getText());
boolean ifExists = false;
boolean ifCascade = false;
if (null != ast.getFirstChildWithType(HiveParser.TOK_IFEXISTS)) {
ifExists = true;
}
if (null != ast.getFirstChildWithType(HiveParser.TOK_CASCADE)) {
ifCascade = true;
}
Database database = getDatabase(dbName, !ifExists);
if (database == null) {
return;
}
// if cascade=true, then we need to authorize the drop table action as well
if (ifCascade) {
// add the tables as well to outputs
List<String> tableNames;
// get names of all tables under this dbName
try {
tableNames = db.getAllTables(dbName);
} catch (HiveException e) {
throw new SemanticException(e);
}
// add tables to outputs
if (tableNames != null) {
for (String tableName : tableNames) {
Table table = getTable(dbName, tableName, true);
// We want no lock here, as the database lock will cover the tables,
// and putting a lock will actually cause us to deadlock on ourselves.
outputs.add(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK));
}
}
}
inputs.add(new ReadEntity(database));
outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_EXCLUSIVE));
DropDatabaseDesc dropDatabaseDesc = new DropDatabaseDesc(dbName, ifExists, ifCascade);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropDatabaseDesc), conf));
}
use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.
the class LoadSemanticAnalyzer method ensureFileFormatsMatch.
private void ensureFileFormatsMatch(TableSpec ts, List<FileStatus> fileStatuses, final URI fromURI) throws SemanticException {
final Class<? extends InputFormat> destInputFormat;
try {
if (ts.getPartSpec() == null || ts.getPartSpec().isEmpty()) {
destInputFormat = ts.tableHandle.getInputFormatClass();
} else {
destInputFormat = ts.partHandle.getInputFormatClass();
}
} catch (HiveException e) {
throw new SemanticException(e);
}
try {
FileSystem fs = FileSystem.get(fromURI, conf);
boolean validFormat = HiveFileFormatUtils.checkInputFormat(fs, conf, destInputFormat, fileStatuses);
if (!validFormat) {
throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg());
}
} catch (Exception e) {
throw new SemanticException("Unable to load data to destination table." + " Error: " + e.getMessage());
}
}
use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAlterTablePartMergeFiles.
private void analyzeAlterTablePartMergeFiles(ASTNode ast, String tableName, HashMap<String, String> partSpec) throws SemanticException {
AlterTablePartMergeFilesDesc mergeDesc = new AlterTablePartMergeFilesDesc(tableName, partSpec);
List<Path> inputDir = new ArrayList<Path>();
Path oldTblPartLoc = null;
Path newTblPartLoc = null;
Table tblObj = null;
ListBucketingCtx lbCtx = null;
try {
tblObj = getTable(tableName);
List<String> bucketCols = null;
Class<? extends InputFormat> inputFormatClass = null;
boolean isArchived = false;
boolean checkIndex = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CONCATENATE_CHECK_INDEX);
if (checkIndex) {
List<Index> indexes = db.getIndexes(tblObj.getDbName(), tblObj.getTableName(), Short.MAX_VALUE);
if (indexes != null && indexes.size() > 0) {
throw new SemanticException("can not do merge because source table " + tableName + " is indexed.");
}
}
if (tblObj.isPartitioned()) {
if (partSpec == null) {
throw new SemanticException("source table " + tableName + " is partitioned but no partition desc found.");
} else {
Partition part = getPartition(tblObj, partSpec, false);
if (part == null) {
throw new SemanticException("source table " + tableName + " is partitioned but partition not found.");
}
bucketCols = part.getBucketCols();
inputFormatClass = part.getInputFormatClass();
isArchived = ArchiveUtils.isArchived(part);
Path tabPath = tblObj.getPath();
Path partPath = part.getDataLocation();
// if the table is in a different dfs than the partition,
// replace the partition's dfs with the table's dfs.
newTblPartLoc = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
oldTblPartLoc = partPath;
lbCtx = constructListBucketingCtx(part.getSkewedColNames(), part.getSkewedColValues(), part.getSkewedColValueLocationMaps(), part.isStoredAsSubDirectories(), conf);
}
} else {
inputFormatClass = tblObj.getInputFormatClass();
bucketCols = tblObj.getBucketCols();
// input and output are the same
oldTblPartLoc = tblObj.getPath();
newTblPartLoc = tblObj.getPath();
lbCtx = constructListBucketingCtx(tblObj.getSkewedColNames(), tblObj.getSkewedColValues(), tblObj.getSkewedColValueLocationMaps(), tblObj.isStoredAsSubDirectories(), conf);
}
// throw a HiveException for other than rcfile and orcfile.
if (!((inputFormatClass.equals(RCFileInputFormat.class) || (inputFormatClass.equals(OrcInputFormat.class))))) {
throw new SemanticException("Only RCFile and ORCFile Formats are supported right now.");
}
mergeDesc.setInputFormatClass(inputFormatClass);
// throw a HiveException if the table/partition is bucketized
if (bucketCols != null && bucketCols.size() > 0) {
throw new SemanticException("Merge can not perform on bucketized partition/table.");
}
// throw a HiveException if the table/partition is archived
if (isArchived) {
throw new SemanticException("Merge can not perform on archived partitions.");
}
inputDir.add(oldTblPartLoc);
mergeDesc.setInputDir(inputDir);
mergeDesc.setLbCtx(lbCtx);
addInputsOutputsAlterTable(tableName, partSpec, AlterTableTypes.MERGEFILES);
DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), mergeDesc);
ddlWork.setNeedLock(true);
Task<? extends Serializable> mergeTask = TaskFactory.get(ddlWork, conf);
TableDesc tblDesc = Utilities.getTableDesc(tblObj);
Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
mergeDesc.setOutputDir(queryTmpdir);
LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap<String, String>() : partSpec);
ltd.setLbCtx(lbCtx);
Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf);
mergeTask.addDependentTask(moveTsk);
if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsWork statDesc;
if (oldTblPartLoc.equals(newTblPartLoc)) {
// If we're merging to the same location, we can avoid some metastore calls
TableSpec tablepart = new TableSpec(db, conf, tableName, partSpec);
statDesc = new StatsWork(tablepart);
} else {
statDesc = new StatsWork(ltd);
}
statDesc.setNoStatsAggregator(true);
statDesc.setClearAggregatorStats(true);
statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
Task<? extends Serializable> statTask = TaskFactory.get(statDesc, conf);
moveTsk.addDependentTask(statTask);
}
rootTasks.add(mergeTask);
} catch (Exception e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.
the class DDLSemanticAnalyzer method preparePartitions.
private List<Partition> preparePartitions(org.apache.hadoop.hive.ql.metadata.Table baseTbl, HashMap<String, String> partSpec, org.apache.hadoop.hive.ql.metadata.Table indexTbl, Hive db, List<Partition> indexTblPartitions) throws HiveException, MetaException {
List<Partition> baseTblPartitions = new ArrayList<Partition>();
if (partSpec != null) {
// if partspec is specified, then only producing index for that
// partition
Partition part = db.getPartition(baseTbl, partSpec, false);
if (part == null) {
throw new HiveException("Partition " + Warehouse.makePartName(partSpec, false) + " does not exist in table " + baseTbl.getTableName());
}
baseTblPartitions.add(part);
Partition indexPart = db.getPartition(indexTbl, partSpec, false);
if (indexPart == null) {
indexPart = db.createPartition(indexTbl, partSpec);
}
indexTblPartitions.add(indexPart);
} else if (baseTbl.isPartitioned()) {
// if no partition is specified, create indexes for all partitions one
// by one.
baseTblPartitions = db.getPartitions(baseTbl);
for (Partition basePart : baseTblPartitions) {
HashMap<String, String> pSpec = basePart.getSpec();
Partition indexPart = db.getPartition(indexTbl, pSpec, false);
if (indexPart == null) {
indexPart = db.createPartition(indexTbl, pSpec);
}
indexTblPartitions.add(indexPart);
}
}
return baseTblPartitions;
}
use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.
the class TaskCompiler method compile.
@SuppressWarnings({ "nls", "unchecked" })
public void compile(final ParseContext pCtx, final List<Task<? extends Serializable>> rootTasks, final HashSet<ReadEntity> inputs, final HashSet<WriteEntity> outputs) throws SemanticException {
Context ctx = pCtx.getContext();
GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx();
List<Task<MoveWork>> mvTask = new ArrayList<Task<MoveWork>>();
List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork();
List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork();
boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite();
int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit();
if (pCtx.getFetchTask() != null) {
if (pCtx.getFetchTask().getTblDesc() == null) {
return;
}
pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery());
TableDesc resultTab = pCtx.getFetchTask().getTblDesc();
// then either the ThriftFormatter or the DefaultFetchFormatter should be used.
if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
if (SessionState.get().isHiveServerQuery()) {
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName());
} else {
String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER);
if (formatterName == null || formatterName.isEmpty()) {
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName());
}
}
}
return;
}
optimizeOperatorPlan(pCtx, inputs, outputs);
/*
* In case of a select, use a fetch task instead of a move task.
* If the select is from analyze table column rewrite, don't create a fetch task. Instead create
* a column stats task later.
*/
if (pCtx.getQueryProperties().isQuery() && !isCStats) {
if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg());
}
LoadFileDesc loadFileDesc = loadFileWork.get(0);
String cols = loadFileDesc.getColumns();
String colTypes = loadFileDesc.getColumnTypes();
String resFileFormat;
TableDesc resultTab = pCtx.getFetchTableDesc();
if (resultTab == null) {
resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
if (SessionState.get().getIsUsingThriftJDBCBinarySerDe() && (resFileFormat.equalsIgnoreCase("SequenceFile"))) {
resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, ThriftJDBCBinarySerDe.class);
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// read formatted thrift objects from the output SequenceFile written by Tasks.
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
} else {
resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, LazySimpleSerDe.class);
}
} else {
if (resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB).equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// read formatted thrift objects from the output SequenceFile written by Tasks.
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
}
}
FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit);
boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
fetch.setHiveServerQuery(isHiveServerQuery);
fetch.setSource(pCtx.getFetchSource());
fetch.setSink(pCtx.getFetchSink());
if (isHiveServerQuery && null != resultTab && resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
fetch.setIsUsingThriftJDBCBinarySerDe(true);
} else {
fetch.setIsUsingThriftJDBCBinarySerDe(false);
}
pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch, conf));
// For the FetchTask, the limit optimization requires we fetch all the rows
// in memory and count how many rows we get. It's not practical if the
// limit factor is too big
int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit + ". Doesn't qualify limit optimization.");
globalLimitCtx.disableOpt();
}
if (outerQueryLimit == 0) {
// Believe it or not, some tools do generate queries with limit 0 and than expect
// query to run quickly. Lets meet their requirement.
LOG.info("Limit 0. No query execution needed.");
return;
}
} else if (!isCStats) {
for (LoadTableDesc ltd : loadTableWork) {
Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf);
mvTask.add(tsk);
// Check to see if we are stale'ing any indexes and auto-update them if we want
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, inputs, conf);
try {
List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
tsk.addDependentTask(updateTask);
}
} catch (HiveException e) {
console.printInfo("WARNING: could not auto-update stale indexes, which are not in sync");
}
}
}
boolean oneLoadFile = true;
for (LoadFileDesc lfd : loadFileWork) {
if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) {
// should not have more than 1 load file for
assert (oneLoadFile);
// CTAS
// make the movetask's destination directory the table's destination.
Path location;
String loc = pCtx.getQueryProperties().isCTAS() ? pCtx.getCreateTable().getLocation() : pCtx.getCreateViewDesc().getLocation();
if (loc == null) {
// get the default location
Path targetPath;
try {
String protoName = null;
if (pCtx.getQueryProperties().isCTAS()) {
protoName = pCtx.getCreateTable().getTableName();
} else if (pCtx.getQueryProperties().isMaterializedView()) {
protoName = pCtx.getCreateViewDesc().getViewName();
}
String[] names = Utilities.getDbTableName(protoName);
if (!db.databaseExists(names[0])) {
throw new SemanticException("ERROR: The database " + names[0] + " does not exist.");
}
Warehouse wh = new Warehouse(conf);
targetPath = wh.getTablePath(db.getDatabase(names[0]), names[1]);
} catch (HiveException e) {
throw new SemanticException(e);
} catch (MetaException e) {
throw new SemanticException(e);
}
location = targetPath;
} else {
location = new Path(loc);
}
lfd.setTargetDir(location);
oneLoadFile = false;
}
mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false), conf));
}
}
generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs);
// For each task, set the key descriptor for the reducer
for (Task<? extends Serializable> rootTask : rootTasks) {
GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask);
}
// to be used, please do so
for (Task<? extends Serializable> rootTask : rootTasks) {
setInputFormat(rootTask);
}
optimizeTaskPlan(rootTasks, pCtx, ctx);
/*
* If the query was the result of analyze table column compute statistics rewrite, create
* a column stats task instead of a fetch task to persist stats to the metastore.
*/
if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) {
Set<Task<? extends Serializable>> leafTasks = new LinkedHashSet<Task<? extends Serializable>>();
getLeafTasks(rootTasks, leafTasks);
if (isCStats) {
genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, leafTasks, outerQueryLimit, 0);
} else {
for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx.getColumnStatsAutoGatherContexts()) {
if (!columnStatsAutoGatherContext.isInsertInto()) {
genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, 0);
} else {
int numBitVector;
try {
numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
} catch (Exception e) {
throw new SemanticException(e.getMessage());
}
genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, numBitVector);
}
}
}
}
decideExecMode(rootTasks, ctx, globalLimitCtx);
if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization()) {
// generate a DDL task and make it a dependent task of the leaf
CreateTableDesc crtTblDesc = pCtx.getCreateTable();
crtTblDesc.validate(conf);
Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc), conf);
patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtTblTask);
} else if (pCtx.getQueryProperties().isMaterializedView()) {
// generate a DDL task and make it a dependent task of the leaf
CreateViewDesc viewDesc = pCtx.getCreateViewDesc();
Task<? extends Serializable> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc), conf);
patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtViewTask);
}
if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) {
LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
}
if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());
List<ExecDriver> mrTasks = Utilities.getMRTasks(rootTasks);
for (ExecDriver tsk : mrTasks) {
tsk.setRetryCmdWhenFail(true);
}
List<SparkTask> sparkTasks = Utilities.getSparkTasks(rootTasks);
for (SparkTask sparkTask : sparkTasks) {
sparkTask.setRetryCmdWhenFail(true);
}
}
Interner<TableDesc> interner = Interners.newStrongInterner();
for (Task<? extends Serializable> rootTask : rootTasks) {
GenMapRedUtils.internTableDesc(rootTask, interner);
GenMapRedUtils.deriveFinalExplainAttributes(rootTask, pCtx.getConf());
}
}
Aggregations