use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.
the class ImportSemanticAnalyzer method addSinglePartition.
private static Task<?> addSinglePartition(URI fromURI, FileSystem fs, ImportTableDesc tblDesc, Table table, Warehouse wh, AddPartitionDesc addPartitionDesc, ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x) throws MetaException, IOException, HiveException {
AddPartitionDesc.OnePartitionDesc partSpec = addPartitionDesc.getPartition(0);
if (tblDesc.isExternal() && tblDesc.getLocation() == null) {
x.getLOG().debug("Importing in-place: adding AddPart for partition " + partSpecToString(partSpec.getPartSpec()));
// addPartitionDesc already has the right partition location
Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc), x.getConf());
return addPartTask;
} else {
String srcLocation = partSpec.getLocation();
fixLocationInPartSpec(fs, tblDesc, table, wh, replicationSpec, partSpec, x);
x.getLOG().debug("adding dependent CopyWork/AddPart/MoveWork for partition " + partSpecToString(partSpec.getPartSpec()) + " with source location: " + srcLocation);
Path tgtLocation = new Path(partSpec.getLocation());
Path tmpPath = x.getCtx().getExternalTmpPath(tgtLocation);
Task<?> copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, new Path(srcLocation), tmpPath, x.getConf());
Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc), x.getConf());
LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath, Utilities.getTableDesc(table), partSpec.getPartSpec(), true);
loadTableWork.setInheritTableSpecs(false);
Task<?> loadPartTask = TaskFactory.get(new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false), x.getConf());
copyTask.addDependentTask(loadPartTask);
addPartTask.addDependentTask(loadPartTask);
x.getTasks().add(copyTask);
return addPartTask;
}
}
use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.
the class ImportSemanticAnalyzer method loadTable.
private static Task<?> loadTable(URI fromURI, Table table, boolean replace, Path tgtPath, ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x) {
Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME);
Path tmpPath = x.getCtx().getExternalTmpPath(tgtPath);
Task<?> copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, dataPath, tmpPath, x.getConf());
LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath, Utilities.getTableDesc(table), new TreeMap<String, String>(), replace);
Task<?> loadTableTask = TaskFactory.get(new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false), x.getConf());
copyTask.addDependentTask(loadTableTask);
x.getTasks().add(copyTask);
return loadTableTask;
}
use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.
the class IndexUpdater method generateUpdateTasks.
public List<Task<? extends Serializable>> generateUpdateTasks() throws HiveException {
hive = Hive.get(this.conf);
for (LoadTableDesc ltd : loadTableWork) {
TableDesc td = ltd.getTable();
Table srcTable = hive.getTable(td.getTableName());
List<Index> tblIndexes = IndexUtils.getAllIndexes(srcTable, (short) -1);
Map<String, String> partSpec = ltd.getPartitionSpec();
if (partSpec == null || partSpec.size() == 0) {
//unpartitioned table, update whole index
doIndexUpdate(tblIndexes);
} else {
doIndexUpdate(tblIndexes, partSpec);
}
}
return tasks;
}
use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.
the class StatsTask method getPartitionsList.
/**
* Get the list of partitions that need to update statistics.
* TODO: we should reuse the Partitions generated at compile time
* since getting the list of partitions is quite expensive.
*
* @return a list of partitions that need to update statistics.
* @throws HiveException
*/
private List<Partition> getPartitionsList(Hive db) throws HiveException {
if (work.getLoadFileDesc() != null) {
//we are in CTAS, so we know there are no partitions
return null;
}
List<Partition> list = new ArrayList<Partition>();
if (work.getTableSpecs() != null) {
// ANALYZE command
TableSpec tblSpec = work.getTableSpecs();
table = tblSpec.tableHandle;
if (!table.isPartitioned()) {
return null;
}
// get all partitions that matches with the partition spec
List<Partition> partitions = tblSpec.partitions;
if (partitions != null) {
for (Partition partn : partitions) {
list.add(partn);
}
}
} else if (work.getLoadTableDesc() != null) {
// INSERT OVERWRITE command
LoadTableDesc tbd = work.getLoadTableDesc();
table = db.getTable(tbd.getTable().getTableName());
if (!table.isPartitioned()) {
return null;
}
DynamicPartitionCtx dpCtx = tbd.getDPCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
// If no dynamic partitions are generated, dpPartSpecs may not be initialized
if (dpPartSpecs != null) {
// load the list of DP partitions and return the list of partition specs
list.addAll(dpPartSpecs);
}
} else {
// static partition
Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
list.add(partn);
}
}
return list;
}
use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAlterTablePartMergeFiles.
private void analyzeAlterTablePartMergeFiles(ASTNode ast, String tableName, HashMap<String, String> partSpec) throws SemanticException {
AlterTablePartMergeFilesDesc mergeDesc = new AlterTablePartMergeFilesDesc(tableName, partSpec);
List<Path> inputDir = new ArrayList<Path>();
Path oldTblPartLoc = null;
Path newTblPartLoc = null;
Table tblObj = null;
ListBucketingCtx lbCtx = null;
try {
tblObj = getTable(tableName);
// TODO: we should probably block all ACID tables here.
if (AcidUtils.isInsertOnlyTable(tblObj.getParameters())) {
throw new SemanticException("Merge is not supported for MM tables");
}
mergeDesc.setTableDesc(Utilities.getTableDesc(tblObj));
List<String> bucketCols = null;
Class<? extends InputFormat> inputFormatClass = null;
boolean isArchived = false;
if (tblObj.isPartitioned()) {
if (partSpec == null) {
throw new SemanticException("source table " + tableName + " is partitioned but no partition desc found.");
} else {
Partition part = getPartition(tblObj, partSpec, false);
if (part == null) {
throw new SemanticException("source table " + tableName + " is partitioned but partition not found.");
}
bucketCols = part.getBucketCols();
inputFormatClass = part.getInputFormatClass();
isArchived = ArchiveUtils.isArchived(part);
Path tabPath = tblObj.getPath();
Path partPath = part.getDataLocation();
// if the table is in a different dfs than the partition,
// replace the partition's dfs with the table's dfs.
newTblPartLoc = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
oldTblPartLoc = partPath;
lbCtx = constructListBucketingCtx(part.getSkewedColNames(), part.getSkewedColValues(), part.getSkewedColValueLocationMaps(), part.isStoredAsSubDirectories(), conf);
}
} else {
inputFormatClass = tblObj.getInputFormatClass();
bucketCols = tblObj.getBucketCols();
// input and output are the same
oldTblPartLoc = tblObj.getPath();
newTblPartLoc = tblObj.getPath();
lbCtx = constructListBucketingCtx(tblObj.getSkewedColNames(), tblObj.getSkewedColValues(), tblObj.getSkewedColValueLocationMaps(), tblObj.isStoredAsSubDirectories(), conf);
}
// throw a HiveException for other than rcfile and orcfile.
if (!((inputFormatClass.equals(RCFileInputFormat.class) || (inputFormatClass.equals(OrcInputFormat.class))))) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_FILE_FORMAT.getMsg());
}
mergeDesc.setInputFormatClass(inputFormatClass);
// throw a HiveException if the table/partition is bucketized
if (bucketCols != null && bucketCols.size() > 0) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_BUCKETED.getMsg());
}
// throw a HiveException if the table/partition is archived
if (isArchived) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_PARTITION_ARCHIVED.getMsg());
}
// violating which can cause data loss
if (tblObj.isNonNative()) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_NON_NATIVE.getMsg());
}
if (tblObj.getTableType() != TableType.MANAGED_TABLE) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_NOT_MANAGED.getMsg());
}
// transactional tables are compacted and no longer needs to be bucketed, so not safe for merge/concatenation
boolean isAcid = AcidUtils.isTransactionalTable(tblObj);
if (isAcid) {
throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_TRANSACTIONAL.getMsg());
}
inputDir.add(oldTblPartLoc);
mergeDesc.setInputDir(inputDir);
mergeDesc.setLbCtx(lbCtx);
addInputsOutputsAlterTable(tableName, partSpec, AlterTableTypes.MERGEFILES);
DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), mergeDesc);
ddlWork.setNeedLock(true);
Task<? extends Serializable> mergeTask = TaskFactory.get(ddlWork);
TableDesc tblDesc = Utilities.getTableDesc(tblObj);
Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
mergeDesc.setOutputDir(queryTmpdir);
// No need to handle MM tables - unsupported path.
LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap<>() : partSpec);
ltd.setLbCtx(lbCtx);
Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false));
mergeTask.addDependentTask(moveTsk);
if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
BasicStatsWork basicStatsWork;
if (oldTblPartLoc.equals(newTblPartLoc)) {
// If we're merging to the same location, we can avoid some metastore calls
TableSpec tableSpec = new TableSpec(db, tableName, partSpec);
basicStatsWork = new BasicStatsWork(tableSpec);
} else {
basicStatsWork = new BasicStatsWork(ltd);
}
basicStatsWork.setNoStatsAggregator(true);
basicStatsWork.setClearAggregatorStats(true);
StatsWork columnStatsWork = new StatsWork(tblObj, basicStatsWork, conf);
Task<? extends Serializable> statTask = TaskFactory.get(columnStatsWork);
moveTsk.addDependentTask(statTask);
}
rootTasks.add(mergeTask);
} catch (Exception e) {
throw new SemanticException(e);
}
}
Aggregations