use of org.apache.hadoop.hive.ql.plan.ListBucketingCtx in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAlterTablePartMergeFiles.
private void analyzeAlterTablePartMergeFiles(ASTNode ast, String tableName, HashMap<String, String> partSpec) throws SemanticException {
AlterTablePartMergeFilesDesc mergeDesc = new AlterTablePartMergeFilesDesc(tableName, partSpec);
List<Path> inputDir = new ArrayList<Path>();
Path oldTblPartLoc = null;
Path newTblPartLoc = null;
Table tblObj = null;
ListBucketingCtx lbCtx = null;
try {
tblObj = getTable(tableName);
List<String> bucketCols = null;
Class<? extends InputFormat> inputFormatClass = null;
boolean isArchived = false;
boolean checkIndex = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CONCATENATE_CHECK_INDEX);
if (checkIndex) {
List<Index> indexes = db.getIndexes(tblObj.getDbName(), tblObj.getTableName(), Short.MAX_VALUE);
if (indexes != null && indexes.size() > 0) {
throw new SemanticException("can not do merge because source table " + tableName + " is indexed.");
}
}
if (tblObj.isPartitioned()) {
if (partSpec == null) {
throw new SemanticException("source table " + tableName + " is partitioned but no partition desc found.");
} else {
Partition part = getPartition(tblObj, partSpec, false);
if (part == null) {
throw new SemanticException("source table " + tableName + " is partitioned but partition not found.");
}
bucketCols = part.getBucketCols();
inputFormatClass = part.getInputFormatClass();
isArchived = ArchiveUtils.isArchived(part);
Path tabPath = tblObj.getPath();
Path partPath = part.getDataLocation();
// if the table is in a different dfs than the partition,
// replace the partition's dfs with the table's dfs.
newTblPartLoc = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
oldTblPartLoc = partPath;
lbCtx = constructListBucketingCtx(part.getSkewedColNames(), part.getSkewedColValues(), part.getSkewedColValueLocationMaps(), part.isStoredAsSubDirectories(), conf);
}
} else {
inputFormatClass = tblObj.getInputFormatClass();
bucketCols = tblObj.getBucketCols();
// input and output are the same
oldTblPartLoc = tblObj.getPath();
newTblPartLoc = tblObj.getPath();
lbCtx = constructListBucketingCtx(tblObj.getSkewedColNames(), tblObj.getSkewedColValues(), tblObj.getSkewedColValueLocationMaps(), tblObj.isStoredAsSubDirectories(), conf);
}
// throw a HiveException for other than rcfile and orcfile.
if (!((inputFormatClass.equals(RCFileInputFormat.class) || (inputFormatClass.equals(OrcInputFormat.class))))) {
throw new SemanticException("Only RCFile and ORCFile Formats are supported right now.");
}
mergeDesc.setInputFormatClass(inputFormatClass);
// throw a HiveException if the table/partition is bucketized
if (bucketCols != null && bucketCols.size() > 0) {
throw new SemanticException("Merge can not perform on bucketized partition/table.");
}
// throw a HiveException if the table/partition is archived
if (isArchived) {
throw new SemanticException("Merge can not perform on archived partitions.");
}
inputDir.add(oldTblPartLoc);
mergeDesc.setInputDir(inputDir);
mergeDesc.setLbCtx(lbCtx);
addInputsOutputsAlterTable(tableName, partSpec, AlterTableTypes.MERGEFILES);
DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), mergeDesc);
ddlWork.setNeedLock(true);
Task<? extends Serializable> mergeTask = TaskFactory.get(ddlWork, conf);
TableDesc tblDesc = Utilities.getTableDesc(tblObj);
Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
mergeDesc.setOutputDir(queryTmpdir);
LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap<String, String>() : partSpec);
ltd.setLbCtx(lbCtx);
Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf);
mergeTask.addDependentTask(moveTsk);
if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsWork statDesc;
if (oldTblPartLoc.equals(newTblPartLoc)) {
// If we're merging to the same location, we can avoid some metastore calls
TableSpec tablepart = new TableSpec(db, conf, tableName, partSpec);
statDesc = new StatsWork(tablepart);
} else {
statDesc = new StatsWork(ltd);
}
statDesc.setNoStatsAggregator(true);
statDesc.setClearAggregatorStats(true);
statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
Task<? extends Serializable> statTask = TaskFactory.get(statDesc, conf);
moveTsk.addDependentTask(statTask);
}
rootTasks.add(mergeTask);
} catch (Exception e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.plan.ListBucketingCtx in project hive by apache.
the class BaseSemanticAnalyzer method constructListBucketingCtx.
/**
* Construct list bucketing context.
*
* @param skewedColNames
* @param skewedValues
* @param skewedColValueLocationMaps
* @param isStoredAsSubDirectories
* @return
*/
protected ListBucketingCtx constructListBucketingCtx(List<String> skewedColNames, List<List<String>> skewedValues, Map<List<String>, String> skewedColValueLocationMaps, boolean isStoredAsSubDirectories, HiveConf conf) {
ListBucketingCtx lbCtx = new ListBucketingCtx();
lbCtx.setSkewedColNames(skewedColNames);
lbCtx.setSkewedColValues(skewedValues);
lbCtx.setLbLocationMap(skewedColValueLocationMaps);
lbCtx.setStoredAsSubDirectories(isStoredAsSubDirectories);
lbCtx.setDefaultKey(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_KEY);
lbCtx.setDefaultDirName(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME);
return lbCtx;
}
use of org.apache.hadoop.hive.ql.plan.ListBucketingCtx in project hive by apache.
the class SemanticAnalyzer method genFileSinkPlan.
@SuppressWarnings("nls")
protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
QBMetaData qbm = qb.getMetaData();
Integer dest_type = qbm.getDestTypeForAlias(dest);
// destination table if any
Table dest_tab = null;
// should the destination table be written to using ACID
boolean destTableIsAcid = false;
boolean destTableIsTemporary = false;
boolean destTableIsMaterialization = false;
// destination partition if any
Partition dest_part = null;
// the intermediate destination directory
Path queryTmpdir = null;
// the final destination directory
Path dest_path = null;
TableDesc table_desc = null;
int currentTableId = 0;
boolean isLocal = false;
SortBucketRSCtx rsCtx = new SortBucketRSCtx();
DynamicPartitionCtx dpCtx = null;
LoadTableDesc ltd = null;
ListBucketingCtx lbCtx = null;
Map<String, String> partSpec = null;
switch(dest_type.intValue()) {
case QBMetaData.DEST_TABLE:
{
dest_tab = qbm.getDestTableForAlias(dest);
destTableIsAcid = AcidUtils.isAcidTable(dest_tab);
destTableIsTemporary = dest_tab.isTemporary();
// Is the user trying to insert into a external tables
if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && (dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE))) {
throw new SemanticException(ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName()));
}
partSpec = qbm.getPartSpecForAlias(dest);
dest_path = dest_tab.getPath();
// verify that our destination is empty before proceeding
if (dest_tab.isImmutable() && qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())) {
try {
FileSystem fs = dest_path.getFileSystem(conf);
if (!MetaStoreUtils.isDirEmpty(fs, dest_path)) {
LOG.warn("Attempted write into an immutable table : " + dest_tab.getTableName() + " : " + dest_path);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName()));
}
} catch (IOException ioe) {
LOG.warn("Error while trying to determine if immutable table has any data : " + dest_tab.getTableName() + " : " + dest_path);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage()));
}
}
// check for partition
List<FieldSchema> parts = dest_tab.getPartitionKeys();
if (parts != null && parts.size() > 0) {
// table is partitioned
if (partSpec == null || partSpec.size() == 0) {
// user did NOT specify partition
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg()));
}
dpCtx = qbm.getDPCtx(dest);
if (dpCtx == null) {
dest_tab.validatePartColumnNames(partSpec, false);
dpCtx = new DynamicPartitionCtx(dest_tab, partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
qbm.setDPCtx(dest, dpCtx);
}
if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) {
// allow DP
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg()));
}
if (dpCtx.getSPPath() != null) {
dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath());
}
if ((dest_tab.getNumBuckets() > 0)) {
dpCtx.setNumBuckets(dest_tab.getNumBuckets());
}
}
boolean isNonNativeTable = dest_tab.isNonNative();
if (isNonNativeTable) {
queryTmpdir = dest_path;
} else {
queryTmpdir = ctx.getTempDirForPath(dest_path, true);
}
if (dpCtx != null) {
// set the root of the temporary path where dynamic partition columns will populate
dpCtx.setRootPath(queryTmpdir);
}
// this table_desc does not contain the partitioning columns
table_desc = Utilities.getTableDesc(dest_tab);
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
currentTableId = destTableId;
destTableId++;
lbCtx = constructListBucketingCtx(dest_tab.getSkewedColNames(), dest_tab.getSkewedColValues(), dest_tab.getSkewedColValueLocationMaps(), dest_tab.isStoredAsSubDirectories(), conf);
// NOTE: specify Dynamic partitions in dest_tab for WriteEntity
if (!isNonNativeTable) {
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
if (destTableIsAcid) {
acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
checkAcidConstraints(qb, table_desc, dest_tab);
}
ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp);
ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
ltd.setLbCtx(lbCtx);
loadTableWork.add(ltd);
} else {
// This is a non-native table.
// We need to set stats as inaccurate.
setStatsForNonNativeTable(dest_tab);
// true if it is insert overwrite.
boolean overwrite = !qb.getParseInfo().isInsertIntoTable(String.format("%s.%s", dest_tab.getDbName(), dest_tab.getTableName()));
createInsertDesc(dest_tab, overwrite);
}
WriteEntity output = null;
// list of dynamically created partitions are known.
if ((dpCtx == null || dpCtx.getNumDPCols() == 0)) {
output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest));
if (!outputs.add(output)) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName()));
}
}
if ((dpCtx != null) && (dpCtx.getNumDPCols() >= 0)) {
// No static partition specified
if (dpCtx.getNumSPCols() == 0) {
output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest), false);
outputs.add(output);
output.setDynamicPartitionWrite(true);
} else // part of the partition specified
// Create a DummyPartition in this case. Since, the metastore does not store partial
// partitions currently, we need to store dummy partitions
{
try {
String ppath = dpCtx.getSPPath();
ppath = ppath.substring(0, ppath.length() - 1);
DummyPartition p = new DummyPartition(dest_tab, dest_tab.getDbName() + "@" + dest_tab.getTableName() + "@" + ppath, partSpec);
output = new WriteEntity(p, getWriteType(dest), false);
output.setDynamicPartitionWrite(true);
outputs.add(output);
} catch (HiveException e) {
throw new SemanticException(e.getMessage(), e);
}
}
}
ctx.getLoadTableOutputMap().put(ltd, output);
break;
}
case QBMetaData.DEST_PARTITION:
{
dest_part = qbm.getDestPartitionForAlias(dest);
dest_tab = dest_part.getTable();
destTableIsAcid = AcidUtils.isAcidTable(dest_tab);
if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE)) {
throw new SemanticException(ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName()));
}
Path tabPath = dest_tab.getPath();
Path partPath = dest_part.getDataLocation();
// verify that our destination is empty before proceeding
if (dest_tab.isImmutable() && qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())) {
try {
FileSystem fs = partPath.getFileSystem(conf);
if (!MetaStoreUtils.isDirEmpty(fs, partPath)) {
LOG.warn("Attempted write into an immutable table partition : " + dest_tab.getTableName() + " : " + partPath);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName()));
}
} catch (IOException ioe) {
LOG.warn("Error while trying to determine if immutable table partition has any data : " + dest_tab.getTableName() + " : " + partPath);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage()));
}
}
// if the table is in a different dfs than the partition,
// replace the partition's dfs with the table's dfs.
dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
queryTmpdir = ctx.getTempDirForPath(dest_path, true);
table_desc = Utilities.getTableDesc(dest_tab);
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
currentTableId = destTableId;
destTableId++;
lbCtx = constructListBucketingCtx(dest_part.getSkewedColNames(), dest_part.getSkewedColValues(), dest_part.getSkewedColValueLocationMaps(), dest_part.isStoredAsSubDirectories(), conf);
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
if (destTableIsAcid) {
acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
checkAcidConstraints(qb, table_desc, dest_tab);
}
ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp);
ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
ltd.setLbCtx(lbCtx);
loadTableWork.add(ltd);
if (!outputs.add(new WriteEntity(dest_part, determineWriteType(ltd, dest_tab.isNonNative(), dest)))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName() + "@" + dest_part.getName()));
}
break;
}
case QBMetaData.DEST_LOCAL_FILE:
isLocal = true;
// fall through
case QBMetaData.DEST_DFS_FILE:
{
dest_path = new Path(qbm.getDestFileForAlias(dest));
if (isLocal) {
// for local directory - we always write to map-red intermediate
// store and then copy to local fs
queryTmpdir = ctx.getMRTmpPath();
} else {
try {
Path qPath = FileUtils.makeQualified(dest_path, conf);
queryTmpdir = ctx.getTempDirForPath(qPath, true);
} catch (Exception e) {
throw new SemanticException("Error creating temporary folder on: " + dest_path, e);
}
}
String cols = "";
String colTypes = "";
ArrayList<ColumnInfo> colInfos = inputRR.getColumnInfos();
// CTAS case: the file output format and serde are defined by the create
// table command rather than taking the default value
List<FieldSchema> field_schemas = null;
CreateTableDesc tblDesc = qb.getTableDesc();
CreateViewDesc viewDesc = qb.getViewDesc();
if (tblDesc != null) {
field_schemas = new ArrayList<FieldSchema>();
destTableIsTemporary = tblDesc.isTemporary();
destTableIsMaterialization = tblDesc.isMaterialization();
} else if (viewDesc != null) {
field_schemas = new ArrayList<FieldSchema>();
destTableIsTemporary = false;
}
boolean first = true;
for (ColumnInfo colInfo : colInfos) {
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
if (nm[1] != null) {
// non-null column alias
colInfo.setAlias(nm[1]);
}
//default column name
String colName = colInfo.getInternalName();
if (field_schemas != null) {
FieldSchema col = new FieldSchema();
if (!("".equals(nm[0])) && nm[1] != null) {
// remove ``
colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase();
}
colName = fixCtasColumnName(colName);
col.setName(colName);
String typeName = colInfo.getType().getTypeName();
// CTAS should NOT create a VOID type
if (typeName.equals(serdeConstants.VOID_TYPE_NAME)) {
throw new SemanticException(ErrorMsg.CTAS_CREATES_VOID_TYPE.getMsg(colName));
}
col.setType(typeName);
field_schemas.add(col);
}
if (!first) {
cols = cols.concat(",");
colTypes = colTypes.concat(":");
}
first = false;
cols = cols.concat(colName);
// Replace VOID type with string when the output is a temp table or
// local files.
// A VOID type can be generated under the query:
//
// select NULL from tt;
// or
// insert overwrite local directory "abc" select NULL from tt;
//
// where there is no column type to which the NULL value should be
// converted.
//
String tName = colInfo.getType().getTypeName();
if (tName.equals(serdeConstants.VOID_TYPE_NAME)) {
colTypes = colTypes.concat(serdeConstants.STRING_TYPE_NAME);
} else {
colTypes = colTypes.concat(tName);
}
}
// update the create table descriptor with the resulting schema.
if (tblDesc != null) {
tblDesc.setCols(new ArrayList<FieldSchema>(field_schemas));
} else if (viewDesc != null) {
viewDesc.setSchema(new ArrayList<FieldSchema>(field_schemas));
}
boolean isDestTempFile = true;
if (!ctx.isMRTmpFileURI(dest_path.toUri().toString())) {
idToTableNameMap.put(String.valueOf(destTableId), dest_path.toUri().toString());
currentTableId = destTableId;
destTableId++;
isDestTempFile = false;
}
boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE);
loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, dest_path, isDfsDir, cols, colTypes));
if (tblDesc == null) {
if (viewDesc != null) {
table_desc = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
} else if (qb.getIsQuery()) {
String fileFormat;
if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
fileFormat = "SequenceFile";
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT, fileFormat);
table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, ThriftJDBCBinarySerDe.class);
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// write out formatted thrift objects to SequenceFile
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
} else {
fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, LazySimpleSerDe.class);
}
} else {
table_desc = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
}
} else {
table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
}
if (!outputs.add(new WriteEntity(dest_path, !isDfsDir, isDestTempFile))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_path.toUri().toString()));
}
break;
}
default:
throw new SemanticException("Unknown destination type: " + dest_type);
}
input = genConversionSelectOperator(dest, qb, input, table_desc, dpCtx);
inputRR = opParseCtx.get(input).getRowResolver();
ArrayList<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();
if (updating(dest) || deleting(dest)) {
vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), "", true));
} else {
try {
StructObjectInspector rowObjectInspector = (StructObjectInspector) table_desc.getDeserializer(conf).getObjectInspector();
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), "", false));
}
} catch (Exception e) {
throw new SemanticException(e.getMessage(), e);
}
}
RowSchema fsRS = new RowSchema(vecCol);
// The output files of a FileSink can be merged if they are either not being written to a table
// or are being written to a table which is not bucketed
// and table the table is not sorted
boolean canBeMerged = (dest_tab == null || !((dest_tab.getNumBuckets() > 0) || (dest_tab.getSortCols() != null && dest_tab.getSortCols().size() > 0)));
// If this table is working with ACID semantics, turn off merging
canBeMerged &= !destTableIsAcid;
// Generate the partition columns from the parent input
if (dest_type.intValue() == QBMetaData.DEST_TABLE || dest_type.intValue() == QBMetaData.DEST_PARTITION) {
genPartnCols(dest, input, qb, table_desc, dest_tab, rsCtx);
}
FileSinkDesc fileSinkDesc = new FileSinkDesc(queryTmpdir, table_desc, conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), currentTableId, rsCtx.isMultiFileSpray(), canBeMerged, rsCtx.getNumFiles(), rsCtx.getTotalFiles(), rsCtx.getPartnCols(), dpCtx, dest_path);
boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
fileSinkDesc.setHiveServerQuery(isHiveServerQuery);
// FileSinkOperator knows how to properly write to it.
if (destTableIsAcid) {
AcidUtils.Operation wt = updating(dest) ? AcidUtils.Operation.UPDATE : (deleting(dest) ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT);
fileSinkDesc.setWriteType(wt);
acidFileSinks.add(fileSinkDesc);
}
fileSinkDesc.setTemporary(destTableIsTemporary);
fileSinkDesc.setMaterialization(destTableIsMaterialization);
/* Set List Bucketing context. */
if (lbCtx != null) {
lbCtx.processRowSkewedIndex(fsRS);
lbCtx.calculateSkewedValueSubDirList();
}
fileSinkDesc.setLbCtx(lbCtx);
// set the stats publishing/aggregating key prefix
// the same as directory name. The directory name
// can be changed in the optimizer but the key should not be changed
// it should be the same as the MoveWork's sourceDir.
fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName().toString());
if (!destTableIsMaterialization && HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
String statsTmpLoc = ctx.getTempDirForPath(dest_path).toString();
fileSinkDesc.setStatsTmpDir(statsTmpLoc);
LOG.debug("Set stats collection dir : " + statsTmpLoc);
}
if (dest_part != null) {
try {
String staticSpec = Warehouse.makePartPath(dest_part.getSpec());
fileSinkDesc.setStaticSpec(staticSpec);
} catch (MetaException e) {
throw new SemanticException(e);
}
} else if (dpCtx != null) {
fileSinkDesc.setStaticSpec(dpCtx.getSPPath());
}
if (isHiveServerQuery && null != table_desc && table_desc.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(true);
} else {
fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(false);
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc, fsRS, input), inputRR);
if (ltd != null && SessionState.get() != null) {
SessionState.get().getLineageState().mapDirToOp(ltd.getSourcePath(), (FileSinkOperator) output);
} else if (queryState.getCommandType().equals(HiveOperation.CREATETABLE_AS_SELECT.getOperationName())) {
Path tlocation = null;
String tName = Utilities.getDbTableName(tableDesc.getTableName())[1];
try {
Warehouse wh = new Warehouse(conf);
tlocation = wh.getTablePath(db.getDatabase(tableDesc.getDatabaseName()), tName);
} catch (MetaException | HiveException e) {
throw new SemanticException(e);
}
SessionState.get().getLineageState().mapDirToOp(tlocation, (FileSinkOperator) output);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " + dest_path + " row schema: " + inputRR.toString());
}
FileSinkOperator fso = (FileSinkOperator) output;
fso.getConf().setTable(dest_tab);
fsopToTable.put(fso, dest_tab);
// and it is an insert overwrite or insert into table
if (dest_tab != null && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
if (dest_type.intValue() == QBMetaData.DEST_TABLE) {
genAutoColumnStatsGatheringPipeline(qb, table_desc, partSpec, input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
} else if (dest_type.intValue() == QBMetaData.DEST_PARTITION) {
genAutoColumnStatsGatheringPipeline(qb, table_desc, dest_part.getSpec(), input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
}
}
return output;
}
use of org.apache.hadoop.hive.ql.plan.ListBucketingCtx in project hive by apache.
the class DDLSemanticAnalyzer method analyzeTruncateTable.
private void analyzeTruncateTable(ASTNode ast) throws SemanticException {
// TOK_TABLE_PARTITION
ASTNode root = (ASTNode) ast.getChild(0);
String tableName = getUnescapedName((ASTNode) root.getChild(0));
Table table = getTable(tableName, true);
if (table.getTableType() != TableType.MANAGED_TABLE) {
throw new SemanticException(ErrorMsg.TRUNCATE_FOR_NON_MANAGED_TABLE.format(tableName));
}
if (table.isNonNative()) {
//TODO
throw new SemanticException(ErrorMsg.TRUNCATE_FOR_NON_NATIVE_TABLE.format(tableName));
}
if (!table.isPartitioned() && root.getChildCount() > 1) {
throw new SemanticException(ErrorMsg.PARTSPEC_FOR_NON_PARTITIONED_TABLE.format(tableName));
}
Map<String, String> partSpec = getPartSpec((ASTNode) root.getChild(1));
if (partSpec == null) {
if (!table.isPartitioned()) {
outputs.add(new WriteEntity(table, WriteEntity.WriteType.DDL_EXCLUSIVE));
} else {
for (Partition partition : getPartitions(table, null, false)) {
outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
}
}
} else {
if (isFullSpec(table, partSpec)) {
validatePartSpec(table, partSpec, (ASTNode) root.getChild(1), conf, true);
Partition partition = getPartition(table, partSpec, true);
outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
} else {
validatePartSpec(table, partSpec, (ASTNode) root.getChild(1), conf, false);
for (Partition partition : getPartitions(table, partSpec, false)) {
outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
}
}
}
TruncateTableDesc truncateTblDesc = new TruncateTableDesc(tableName, partSpec);
DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), truncateTblDesc);
Task<? extends Serializable> truncateTask = TaskFactory.get(ddlWork, conf);
// Is this a truncate column command
List<String> columnNames = null;
if (ast.getChildCount() == 2) {
try {
columnNames = getColumnNames((ASTNode) ast.getChild(1));
// Throw an error if the table is indexed
List<Index> indexes = db.getIndexes(table.getDbName(), tableName, (short) 1);
if (indexes != null && indexes.size() > 0) {
throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_INDEXED_TABLE.getMsg());
}
List<String> bucketCols = null;
Class<? extends InputFormat> inputFormatClass = null;
boolean isArchived = false;
Path newTblPartLoc = null;
Path oldTblPartLoc = null;
List<FieldSchema> cols = null;
ListBucketingCtx lbCtx = null;
boolean isListBucketed = false;
List<String> listBucketColNames = null;
if (table.isPartitioned()) {
Partition part = db.getPartition(table, partSpec, false);
Path tabPath = table.getPath();
Path partPath = part.getDataLocation();
// if the table is in a different dfs than the partition,
// replace the partition's dfs with the table's dfs.
newTblPartLoc = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
oldTblPartLoc = partPath;
cols = part.getCols();
bucketCols = part.getBucketCols();
inputFormatClass = part.getInputFormatClass();
isArchived = ArchiveUtils.isArchived(part);
lbCtx = constructListBucketingCtx(part.getSkewedColNames(), part.getSkewedColValues(), part.getSkewedColValueLocationMaps(), part.isStoredAsSubDirectories(), conf);
isListBucketed = part.isStoredAsSubDirectories();
listBucketColNames = part.getSkewedColNames();
} else {
// input and output are the same
oldTblPartLoc = table.getPath();
newTblPartLoc = table.getPath();
cols = table.getCols();
bucketCols = table.getBucketCols();
inputFormatClass = table.getInputFormatClass();
lbCtx = constructListBucketingCtx(table.getSkewedColNames(), table.getSkewedColValues(), table.getSkewedColValueLocationMaps(), table.isStoredAsSubDirectories(), conf);
isListBucketed = table.isStoredAsSubDirectories();
listBucketColNames = table.getSkewedColNames();
}
// throw a HiveException for non-rcfile.
if (!inputFormatClass.equals(RCFileInputFormat.class)) {
throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_NOT_RC.getMsg());
}
// throw a HiveException if the table/partition is archived
if (isArchived) {
throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_ARCHIVED.getMsg());
}
Set<Integer> columnIndexes = new HashSet<Integer>();
for (String columnName : columnNames) {
boolean found = false;
for (int columnIndex = 0; columnIndex < cols.size(); columnIndex++) {
if (columnName.equalsIgnoreCase(cols.get(columnIndex).getName())) {
columnIndexes.add(columnIndex);
found = true;
break;
}
}
// Throw an exception if the user is trying to truncate a column which doesn't exist
if (!found) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(columnName));
}
// Throw an exception if the table/partition is bucketed on one of the columns
for (String bucketCol : bucketCols) {
if (bucketCol.equalsIgnoreCase(columnName)) {
throw new SemanticException(ErrorMsg.TRUNCATE_BUCKETED_COLUMN.getMsg(columnName));
}
}
if (isListBucketed) {
for (String listBucketCol : listBucketColNames) {
if (listBucketCol.equalsIgnoreCase(columnName)) {
throw new SemanticException(ErrorMsg.TRUNCATE_LIST_BUCKETED_COLUMN.getMsg(columnName));
}
}
}
}
truncateTblDesc.setColumnIndexes(new ArrayList<Integer>(columnIndexes));
truncateTblDesc.setInputDir(oldTblPartLoc);
truncateTblDesc.setLbCtx(lbCtx);
addInputsOutputsAlterTable(tableName, partSpec, AlterTableTypes.TRUNCATE);
ddlWork.setNeedLock(true);
TableDesc tblDesc = Utilities.getTableDesc(table);
// Write the output to temporary directory and move it to the final location at the end
// so the operation is atomic.
Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
truncateTblDesc.setOutputDir(queryTmpdir);
LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap<String, String>() : partSpec);
ltd.setLbCtx(lbCtx);
Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf);
truncateTask.addDependentTask(moveTsk);
// Recalculate the HDFS stats if auto gather stats is set
if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsWork statDesc;
if (oldTblPartLoc.equals(newTblPartLoc)) {
// If we're merging to the same location, we can avoid some metastore calls
TableSpec tablepart = new TableSpec(this.db, conf, root);
statDesc = new StatsWork(tablepart);
} else {
statDesc = new StatsWork(ltd);
}
statDesc.setNoStatsAggregator(true);
statDesc.setClearAggregatorStats(true);
statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
Task<? extends Serializable> statTask = TaskFactory.get(statDesc, conf);
moveTsk.addDependentTask(statTask);
}
} catch (HiveException e) {
throw new SemanticException(e);
}
}
rootTasks.add(truncateTask);
}
use of org.apache.hadoop.hive.ql.plan.ListBucketingCtx in project hive by apache.
the class DDLTask method mergeFiles.
/**
* First, make sure the source table/partition is not
* archived/indexes/non-rcfile. If either of these is true, throw an
* exception.
*
* The way how it does the merge is to create a BlockMergeTask from the
* mergeFilesDesc.
*
* @param db
* @param mergeFilesDesc
* @return
* @throws HiveException
*/
private int mergeFiles(Hive db, AlterTablePartMergeFilesDesc mergeFilesDesc, DriverContext driverContext) throws HiveException {
ListBucketingCtx lbCtx = mergeFilesDesc.getLbCtx();
boolean lbatc = lbCtx == null ? false : lbCtx.isSkewedStoredAsDir();
int lbd = lbCtx == null ? 0 : lbCtx.calculateListBucketingLevel();
// merge work only needs input and output.
MergeFileWork mergeWork = new MergeFileWork(mergeFilesDesc.getInputDir(), mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass().getName());
LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
ArrayList<String> inputDirstr = new ArrayList<String>(1);
inputDirstr.add(mergeFilesDesc.getInputDir().toString());
pathToAliases.put(mergeFilesDesc.getInputDir().get(0), inputDirstr);
mergeWork.setPathToAliases(pathToAliases);
mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx());
mergeWork.resolveConcatenateMerge(db.getConf());
mergeWork.setMapperCannotSpanPartns(true);
mergeWork.setSourceTableInputFormat(mergeFilesDesc.getInputFormatClass().getName());
final FileMergeDesc fmd;
if (mergeFilesDesc.getInputFormatClass().equals(RCFileInputFormat.class)) {
fmd = new RCFileMergeDesc();
} else {
// safe to assume else is ORC as semantic analyzer will check for RC/ORC
fmd = new OrcFileMergeDesc();
}
fmd.setDpCtx(null);
fmd.setHasDynamicPartitions(false);
fmd.setListBucketingAlterTableConcatenate(lbatc);
fmd.setListBucketingDepth(lbd);
fmd.setOutputPath(mergeFilesDesc.getOutputDir());
CompilationOpContext opContext = driverContext.getCtx().getOpContext();
Operator<? extends OperatorDesc> mergeOp = OperatorFactory.get(opContext, fmd);
LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
aliasToWork.put(mergeFilesDesc.getInputDir().toString(), mergeOp);
mergeWork.setAliasToWork(aliasToWork);
DriverContext driverCxt = new DriverContext();
Task task;
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
TezWork tezWork = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
mergeWork.setName("File Merge");
tezWork.add(mergeWork);
task = new TezTask();
task.setWork(tezWork);
} else {
task = new MergeFileTask();
task.setWork(mergeWork);
}
// initialize the task and execute
task.initialize(queryState, getQueryPlan(), driverCxt, opContext);
subtask = task;
int ret = task.execute(driverCxt);
if (subtask.getException() != null) {
setException(subtask.getException());
}
return ret;
}
Aggregations