use of org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx in project hive by apache.
the class FileSinkOperator method jobCloseOp.
@Override
public void jobCloseOp(Configuration hconf, boolean success) throws HiveException {
try {
if ((conf != null) && isNativeTable) {
Path specPath = conf.getDirName();
DynamicPartitionCtx dpCtx = conf.getDynPartCtx();
if (conf.isLinkedFileSink() && (dpCtx != null)) {
specPath = conf.getParentDir();
}
Utilities.mvFileToFinalPath(specPath, hconf, success, LOG, dpCtx, conf, reporter);
}
} catch (IOException e) {
throw new HiveException(e);
}
super.jobCloseOp(hconf, success);
}
use of org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx in project hive by apache.
the class StatsTask method getPartitionsList.
/**
* Get the list of partitions that need to update statistics.
* TODO: we should reuse the Partitions generated at compile time
* since getting the list of partitions is quite expensive.
*
* @return a list of partitions that need to update statistics.
* @throws HiveException
*/
private List<Partition> getPartitionsList(Hive db) throws HiveException {
if (work.getLoadFileDesc() != null) {
//we are in CTAS, so we know there are no partitions
return null;
}
List<Partition> list = new ArrayList<Partition>();
if (work.getTableSpecs() != null) {
// ANALYZE command
TableSpec tblSpec = work.getTableSpecs();
table = tblSpec.tableHandle;
if (!table.isPartitioned()) {
return null;
}
// get all partitions that matches with the partition spec
List<Partition> partitions = tblSpec.partitions;
if (partitions != null) {
for (Partition partn : partitions) {
list.add(partn);
}
}
} else if (work.getLoadTableDesc() != null) {
// INSERT OVERWRITE command
LoadTableDesc tbd = work.getLoadTableDesc();
table = db.getTable(tbd.getTable().getTableName());
if (!table.isPartitioned()) {
return null;
}
DynamicPartitionCtx dpCtx = tbd.getDPCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
// If no dynamic partitions are generated, dpPartSpecs may not be initialized
if (dpPartSpecs != null) {
// load the list of DP partitions and return the list of partition specs
list.addAll(dpPartSpecs);
}
} else {
// static partition
Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
list.add(partn);
}
}
return list;
}
use of org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx in project hive by apache.
the class GenMapRedUtils method createMRWorkForMergingFiles.
/**
* @param fsInput The FileSink operator.
* @param ctx The MR processing context.
* @param finalName the final destination path the merge job should output.
* @param dependencyTask
* @param mvTasks
* @param conf
* @param currTask
* @throws SemanticException
* create a Map-only merge job using CombineHiveInputFormat for all partitions with
* following operators:
* MR job J0:
* ...
* |
* v
* FileSinkOperator_1 (fsInput)
* |
* v
* Merge job J1:
* |
* v
* TableScan (using CombineHiveInputFormat) (tsMerge)
* |
* v
* FileSinkOperator (fsMerge)
*
* Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths
* do
* not contain the dynamic partitions (their parent). So after the dynamic partitions are
* created (after the first job finished before the moveTask or ConditionalTask start),
* we need to change the pathToPartitionInfo & pathToAlias to include the dynamic
* partition
* directories.
*
*/
public static void createMRWorkForMergingFiles(FileSinkOperator fsInput, Path finalName, DependencyCollectionTask dependencyTask, List<Task<MoveWork>> mvTasks, HiveConf conf, Task<? extends Serializable> currTask) throws SemanticException {
//
// 1. create the operator tree
//
FileSinkDesc fsInputDesc = fsInput.getConf();
// Create a TableScan operator
RowSchema inputRS = fsInput.getSchema();
TableScanOperator tsMerge = GenMapRedUtils.createTemporaryTableScanOperator(fsInput.getCompilationOpContext(), inputRS);
// Create a FileSink operator
TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
FileSinkDesc fsOutputDesc = new FileSinkDesc(finalName, ts, conf.getBoolVar(ConfVars.COMPRESSRESULT));
FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge);
// If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
// needs to include the partition column, and the fsOutput should have
// a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
// adding DP ColumnInfo to the RowSchema signature
ArrayList<ColumnInfo> signature = inputRS.getSignature();
String tblAlias = fsInputDesc.getTableInfo().getTableName();
for (String dpCol : dpCtx.getDPColNames()) {
ColumnInfo colInfo = new ColumnInfo(dpCol, // all partition column type should be string
TypeInfoFactory.stringTypeInfo, tblAlias, // partition column is virtual column
true);
signature.add(colInfo);
}
inputRS.setSignature(signature);
// create another DynamicPartitionCtx, which has a different input-to-DP column mapping
DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
fsOutputDesc.setDynPartCtx(dpCtx2);
// update the FileSinkOperator to include partition columns
usePartitionColumns(fsInputDesc.getTableInfo().getProperties(), dpCtx.getDPColNames());
} else {
// non-partitioned table
fsInputDesc.getTableInfo().getProperties().remove(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
}
//
// 2. Constructing a conditional task consisting of a move task and a map reduce task
//
MoveWork dummyMv = new MoveWork(null, null, null, new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false);
MapWork cplan;
Serializable work;
if ((conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) || (conf.getBoolVar(ConfVars.HIVEMERGEORCFILESTRIPELEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class))) {
cplan = GenMapRedUtils.createMergeTask(fsInputDesc, finalName, dpCtx != null && dpCtx.getNumDPCols() > 0, fsInput.getCompilationOpContext());
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
cplan.setName("File Merge");
((TezWork) work).add(cplan);
} else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("Spark Merge File Work");
((SparkWork) work).add(cplan);
} else {
work = cplan;
}
} else {
cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
cplan.setName("File Merge");
((TezWork) work).add(cplan);
} else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("Spark Merge File Work");
((SparkWork) work).add(cplan);
} else {
work = new MapredWork();
((MapredWork) work).setMapWork(cplan);
}
}
// use CombineHiveInputFormat for map-only merging
cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
// NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
// know if merge MR2 will be triggered at execution time
Task<MoveWork> mvTask = GenMapRedUtils.findMoveTask(mvTasks, fsOutput);
ConditionalTask cndTsk = GenMapRedUtils.createCondTask(conf, currTask, dummyMv, work, fsInputDesc.getFinalDirName(), finalName, mvTask, dependencyTask);
// keep the dynamic partition context in conditional task resolver context
ConditionalResolverMergeFilesCtx mrCtx = (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx();
mrCtx.setDPCtx(fsInputDesc.getDynPartCtx());
mrCtx.setLbCtx(fsInputDesc.getLbCtx());
}
use of org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx in project hive by apache.
the class SemanticAnalyzer method genFileSinkPlan.
@SuppressWarnings("nls")
protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
QBMetaData qbm = qb.getMetaData();
Integer dest_type = qbm.getDestTypeForAlias(dest);
// destination table if any
Table dest_tab = null;
// should the destination table be written to using ACID
boolean destTableIsAcid = false;
boolean destTableIsTemporary = false;
boolean destTableIsMaterialization = false;
// destination partition if any
Partition dest_part = null;
// the intermediate destination directory
Path queryTmpdir = null;
// the final destination directory
Path dest_path = null;
TableDesc table_desc = null;
int currentTableId = 0;
boolean isLocal = false;
SortBucketRSCtx rsCtx = new SortBucketRSCtx();
DynamicPartitionCtx dpCtx = null;
LoadTableDesc ltd = null;
ListBucketingCtx lbCtx = null;
Map<String, String> partSpec = null;
switch(dest_type.intValue()) {
case QBMetaData.DEST_TABLE:
{
dest_tab = qbm.getDestTableForAlias(dest);
destTableIsAcid = AcidUtils.isAcidTable(dest_tab);
destTableIsTemporary = dest_tab.isTemporary();
// Is the user trying to insert into a external tables
if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && (dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE))) {
throw new SemanticException(ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName()));
}
partSpec = qbm.getPartSpecForAlias(dest);
dest_path = dest_tab.getPath();
// verify that our destination is empty before proceeding
if (dest_tab.isImmutable() && qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())) {
try {
FileSystem fs = dest_path.getFileSystem(conf);
if (!MetaStoreUtils.isDirEmpty(fs, dest_path)) {
LOG.warn("Attempted write into an immutable table : " + dest_tab.getTableName() + " : " + dest_path);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName()));
}
} catch (IOException ioe) {
LOG.warn("Error while trying to determine if immutable table has any data : " + dest_tab.getTableName() + " : " + dest_path);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage()));
}
}
// check for partition
List<FieldSchema> parts = dest_tab.getPartitionKeys();
if (parts != null && parts.size() > 0) {
// table is partitioned
if (partSpec == null || partSpec.size() == 0) {
// user did NOT specify partition
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg()));
}
dpCtx = qbm.getDPCtx(dest);
if (dpCtx == null) {
dest_tab.validatePartColumnNames(partSpec, false);
dpCtx = new DynamicPartitionCtx(dest_tab, partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
qbm.setDPCtx(dest, dpCtx);
}
if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) {
// allow DP
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg()));
}
if (dpCtx.getSPPath() != null) {
dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath());
}
if ((dest_tab.getNumBuckets() > 0)) {
dpCtx.setNumBuckets(dest_tab.getNumBuckets());
}
}
boolean isNonNativeTable = dest_tab.isNonNative();
if (isNonNativeTable) {
queryTmpdir = dest_path;
} else {
queryTmpdir = ctx.getTempDirForPath(dest_path, true);
}
if (dpCtx != null) {
// set the root of the temporary path where dynamic partition columns will populate
dpCtx.setRootPath(queryTmpdir);
}
// this table_desc does not contain the partitioning columns
table_desc = Utilities.getTableDesc(dest_tab);
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
currentTableId = destTableId;
destTableId++;
lbCtx = constructListBucketingCtx(dest_tab.getSkewedColNames(), dest_tab.getSkewedColValues(), dest_tab.getSkewedColValueLocationMaps(), dest_tab.isStoredAsSubDirectories(), conf);
// NOTE: specify Dynamic partitions in dest_tab for WriteEntity
if (!isNonNativeTable) {
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
if (destTableIsAcid) {
acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
checkAcidConstraints(qb, table_desc, dest_tab);
}
ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp);
ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
ltd.setLbCtx(lbCtx);
loadTableWork.add(ltd);
} else {
// This is a non-native table.
// We need to set stats as inaccurate.
setStatsForNonNativeTable(dest_tab);
// true if it is insert overwrite.
boolean overwrite = !qb.getParseInfo().isInsertIntoTable(String.format("%s.%s", dest_tab.getDbName(), dest_tab.getTableName()));
createInsertDesc(dest_tab, overwrite);
}
WriteEntity output = null;
// list of dynamically created partitions are known.
if ((dpCtx == null || dpCtx.getNumDPCols() == 0)) {
output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest));
if (!outputs.add(output)) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName()));
}
}
if ((dpCtx != null) && (dpCtx.getNumDPCols() >= 0)) {
// No static partition specified
if (dpCtx.getNumSPCols() == 0) {
output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest), false);
outputs.add(output);
output.setDynamicPartitionWrite(true);
} else // part of the partition specified
// Create a DummyPartition in this case. Since, the metastore does not store partial
// partitions currently, we need to store dummy partitions
{
try {
String ppath = dpCtx.getSPPath();
ppath = ppath.substring(0, ppath.length() - 1);
DummyPartition p = new DummyPartition(dest_tab, dest_tab.getDbName() + "@" + dest_tab.getTableName() + "@" + ppath, partSpec);
output = new WriteEntity(p, getWriteType(dest), false);
output.setDynamicPartitionWrite(true);
outputs.add(output);
} catch (HiveException e) {
throw new SemanticException(e.getMessage(), e);
}
}
}
ctx.getLoadTableOutputMap().put(ltd, output);
break;
}
case QBMetaData.DEST_PARTITION:
{
dest_part = qbm.getDestPartitionForAlias(dest);
dest_tab = dest_part.getTable();
destTableIsAcid = AcidUtils.isAcidTable(dest_tab);
if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE)) {
throw new SemanticException(ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName()));
}
Path tabPath = dest_tab.getPath();
Path partPath = dest_part.getDataLocation();
// verify that our destination is empty before proceeding
if (dest_tab.isImmutable() && qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())) {
try {
FileSystem fs = partPath.getFileSystem(conf);
if (!MetaStoreUtils.isDirEmpty(fs, partPath)) {
LOG.warn("Attempted write into an immutable table partition : " + dest_tab.getTableName() + " : " + partPath);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName()));
}
} catch (IOException ioe) {
LOG.warn("Error while trying to determine if immutable table partition has any data : " + dest_tab.getTableName() + " : " + partPath);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage()));
}
}
// if the table is in a different dfs than the partition,
// replace the partition's dfs with the table's dfs.
dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
queryTmpdir = ctx.getTempDirForPath(dest_path, true);
table_desc = Utilities.getTableDesc(dest_tab);
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
currentTableId = destTableId;
destTableId++;
lbCtx = constructListBucketingCtx(dest_part.getSkewedColNames(), dest_part.getSkewedColValues(), dest_part.getSkewedColValueLocationMaps(), dest_part.isStoredAsSubDirectories(), conf);
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
if (destTableIsAcid) {
acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
checkAcidConstraints(qb, table_desc, dest_tab);
}
ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp);
ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
ltd.setLbCtx(lbCtx);
loadTableWork.add(ltd);
if (!outputs.add(new WriteEntity(dest_part, determineWriteType(ltd, dest_tab.isNonNative(), dest)))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName() + "@" + dest_part.getName()));
}
break;
}
case QBMetaData.DEST_LOCAL_FILE:
isLocal = true;
// fall through
case QBMetaData.DEST_DFS_FILE:
{
dest_path = new Path(qbm.getDestFileForAlias(dest));
if (isLocal) {
// for local directory - we always write to map-red intermediate
// store and then copy to local fs
queryTmpdir = ctx.getMRTmpPath();
} else {
try {
Path qPath = FileUtils.makeQualified(dest_path, conf);
queryTmpdir = ctx.getTempDirForPath(qPath, true);
} catch (Exception e) {
throw new SemanticException("Error creating temporary folder on: " + dest_path, e);
}
}
String cols = "";
String colTypes = "";
ArrayList<ColumnInfo> colInfos = inputRR.getColumnInfos();
// CTAS case: the file output format and serde are defined by the create
// table command rather than taking the default value
List<FieldSchema> field_schemas = null;
CreateTableDesc tblDesc = qb.getTableDesc();
CreateViewDesc viewDesc = qb.getViewDesc();
if (tblDesc != null) {
field_schemas = new ArrayList<FieldSchema>();
destTableIsTemporary = tblDesc.isTemporary();
destTableIsMaterialization = tblDesc.isMaterialization();
} else if (viewDesc != null) {
field_schemas = new ArrayList<FieldSchema>();
destTableIsTemporary = false;
}
boolean first = true;
for (ColumnInfo colInfo : colInfos) {
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
if (nm[1] != null) {
// non-null column alias
colInfo.setAlias(nm[1]);
}
//default column name
String colName = colInfo.getInternalName();
if (field_schemas != null) {
FieldSchema col = new FieldSchema();
if (!("".equals(nm[0])) && nm[1] != null) {
// remove ``
colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase();
}
colName = fixCtasColumnName(colName);
col.setName(colName);
String typeName = colInfo.getType().getTypeName();
// CTAS should NOT create a VOID type
if (typeName.equals(serdeConstants.VOID_TYPE_NAME)) {
throw new SemanticException(ErrorMsg.CTAS_CREATES_VOID_TYPE.getMsg(colName));
}
col.setType(typeName);
field_schemas.add(col);
}
if (!first) {
cols = cols.concat(",");
colTypes = colTypes.concat(":");
}
first = false;
cols = cols.concat(colName);
// Replace VOID type with string when the output is a temp table or
// local files.
// A VOID type can be generated under the query:
//
// select NULL from tt;
// or
// insert overwrite local directory "abc" select NULL from tt;
//
// where there is no column type to which the NULL value should be
// converted.
//
String tName = colInfo.getType().getTypeName();
if (tName.equals(serdeConstants.VOID_TYPE_NAME)) {
colTypes = colTypes.concat(serdeConstants.STRING_TYPE_NAME);
} else {
colTypes = colTypes.concat(tName);
}
}
// update the create table descriptor with the resulting schema.
if (tblDesc != null) {
tblDesc.setCols(new ArrayList<FieldSchema>(field_schemas));
} else if (viewDesc != null) {
viewDesc.setSchema(new ArrayList<FieldSchema>(field_schemas));
}
boolean isDestTempFile = true;
if (!ctx.isMRTmpFileURI(dest_path.toUri().toString())) {
idToTableNameMap.put(String.valueOf(destTableId), dest_path.toUri().toString());
currentTableId = destTableId;
destTableId++;
isDestTempFile = false;
}
boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE);
loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, dest_path, isDfsDir, cols, colTypes));
if (tblDesc == null) {
if (viewDesc != null) {
table_desc = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
} else if (qb.getIsQuery()) {
String fileFormat;
if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
fileFormat = "SequenceFile";
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT, fileFormat);
table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, ThriftJDBCBinarySerDe.class);
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// write out formatted thrift objects to SequenceFile
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
} else {
fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, LazySimpleSerDe.class);
}
} else {
table_desc = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
}
} else {
table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
}
if (!outputs.add(new WriteEntity(dest_path, !isDfsDir, isDestTempFile))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_path.toUri().toString()));
}
break;
}
default:
throw new SemanticException("Unknown destination type: " + dest_type);
}
input = genConversionSelectOperator(dest, qb, input, table_desc, dpCtx);
inputRR = opParseCtx.get(input).getRowResolver();
ArrayList<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();
if (updating(dest) || deleting(dest)) {
vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), "", true));
} else {
try {
StructObjectInspector rowObjectInspector = (StructObjectInspector) table_desc.getDeserializer(conf).getObjectInspector();
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), "", false));
}
} catch (Exception e) {
throw new SemanticException(e.getMessage(), e);
}
}
RowSchema fsRS = new RowSchema(vecCol);
// The output files of a FileSink can be merged if they are either not being written to a table
// or are being written to a table which is not bucketed
// and table the table is not sorted
boolean canBeMerged = (dest_tab == null || !((dest_tab.getNumBuckets() > 0) || (dest_tab.getSortCols() != null && dest_tab.getSortCols().size() > 0)));
// If this table is working with ACID semantics, turn off merging
canBeMerged &= !destTableIsAcid;
// Generate the partition columns from the parent input
if (dest_type.intValue() == QBMetaData.DEST_TABLE || dest_type.intValue() == QBMetaData.DEST_PARTITION) {
genPartnCols(dest, input, qb, table_desc, dest_tab, rsCtx);
}
FileSinkDesc fileSinkDesc = new FileSinkDesc(queryTmpdir, table_desc, conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), currentTableId, rsCtx.isMultiFileSpray(), canBeMerged, rsCtx.getNumFiles(), rsCtx.getTotalFiles(), rsCtx.getPartnCols(), dpCtx, dest_path);
boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
fileSinkDesc.setHiveServerQuery(isHiveServerQuery);
// FileSinkOperator knows how to properly write to it.
if (destTableIsAcid) {
AcidUtils.Operation wt = updating(dest) ? AcidUtils.Operation.UPDATE : (deleting(dest) ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT);
fileSinkDesc.setWriteType(wt);
acidFileSinks.add(fileSinkDesc);
}
fileSinkDesc.setTemporary(destTableIsTemporary);
fileSinkDesc.setMaterialization(destTableIsMaterialization);
/* Set List Bucketing context. */
if (lbCtx != null) {
lbCtx.processRowSkewedIndex(fsRS);
lbCtx.calculateSkewedValueSubDirList();
}
fileSinkDesc.setLbCtx(lbCtx);
// set the stats publishing/aggregating key prefix
// the same as directory name. The directory name
// can be changed in the optimizer but the key should not be changed
// it should be the same as the MoveWork's sourceDir.
fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName().toString());
if (!destTableIsMaterialization && HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
String statsTmpLoc = ctx.getTempDirForPath(dest_path).toString();
fileSinkDesc.setStatsTmpDir(statsTmpLoc);
LOG.debug("Set stats collection dir : " + statsTmpLoc);
}
if (dest_part != null) {
try {
String staticSpec = Warehouse.makePartPath(dest_part.getSpec());
fileSinkDesc.setStaticSpec(staticSpec);
} catch (MetaException e) {
throw new SemanticException(e);
}
} else if (dpCtx != null) {
fileSinkDesc.setStaticSpec(dpCtx.getSPPath());
}
if (isHiveServerQuery && null != table_desc && table_desc.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(true);
} else {
fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(false);
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc, fsRS, input), inputRR);
if (ltd != null && SessionState.get() != null) {
SessionState.get().getLineageState().mapDirToOp(ltd.getSourcePath(), (FileSinkOperator) output);
} else if (queryState.getCommandType().equals(HiveOperation.CREATETABLE_AS_SELECT.getOperationName())) {
Path tlocation = null;
String tName = Utilities.getDbTableName(tableDesc.getTableName())[1];
try {
Warehouse wh = new Warehouse(conf);
tlocation = wh.getTablePath(db.getDatabase(tableDesc.getDatabaseName()), tName);
} catch (MetaException | HiveException e) {
throw new SemanticException(e);
}
SessionState.get().getLineageState().mapDirToOp(tlocation, (FileSinkOperator) output);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " + dest_path + " row schema: " + inputRR.toString());
}
FileSinkOperator fso = (FileSinkOperator) output;
fso.getConf().setTable(dest_tab);
fsopToTable.put(fso, dest_tab);
// and it is an insert overwrite or insert into table
if (dest_tab != null && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
if (dest_type.intValue() == QBMetaData.DEST_TABLE) {
genAutoColumnStatsGatheringPipeline(qb, table_desc, partSpec, input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
} else if (dest_type.intValue() == QBMetaData.DEST_PARTITION) {
genAutoColumnStatsGatheringPipeline(qb, table_desc, dest_part.getSpec(), input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
}
}
return output;
}
use of org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx in project hive by apache.
the class MoveTask method execute.
@Override
public int execute(DriverContext driverContext) {
try {
if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) {
return 0;
}
Hive db = getHive();
// Do any hive related operations like moving tables and files
// to appropriate locations
LoadFileDesc lfd = work.getLoadFileWork();
if (lfd != null) {
Path targetPath = lfd.getTargetDir();
Path sourcePath = lfd.getSourcePath();
moveFile(sourcePath, targetPath, lfd.getIsDfsDir());
}
// Multi-file load is for dynamic partitions when some partitions do not
// need to merge and they can simply be moved to the target directory.
LoadMultiFilesDesc lmfd = work.getLoadMultiFilesWork();
if (lmfd != null) {
boolean isDfsDir = lmfd.getIsDfsDir();
int i = 0;
while (i < lmfd.getSourceDirs().size()) {
Path srcPath = lmfd.getSourceDirs().get(i);
Path destPath = lmfd.getTargetDirs().get(i);
FileSystem fs = destPath.getFileSystem(conf);
if (!fs.exists(destPath.getParent())) {
fs.mkdirs(destPath.getParent());
}
moveFile(srcPath, destPath, isDfsDir);
i++;
}
}
// Next we do this for tables and partitions
LoadTableDesc tbd = work.getLoadTableWork();
if (tbd != null) {
StringBuilder mesg = new StringBuilder("Loading data to table ").append(tbd.getTable().getTableName());
if (tbd.getPartitionSpec().size() > 0) {
mesg.append(" partition (");
Map<String, String> partSpec = tbd.getPartitionSpec();
for (String key : partSpec.keySet()) {
mesg.append(key).append('=').append(partSpec.get(key)).append(", ");
}
mesg.setLength(mesg.length() - 2);
mesg.append(')');
}
String mesg_detail = " from " + tbd.getSourcePath();
console.printInfo(mesg.toString(), mesg_detail);
Table table = db.getTable(tbd.getTable().getTableName());
if (work.getCheckFileFormat()) {
// Get all files from the src directory
FileStatus[] dirs;
ArrayList<FileStatus> files;
// source filesystem
FileSystem srcFs;
try {
srcFs = tbd.getSourcePath().getFileSystem(conf);
dirs = srcFs.globStatus(tbd.getSourcePath());
files = new ArrayList<FileStatus>();
for (int i = 0; (dirs != null && i < dirs.length); i++) {
files.addAll(Arrays.asList(srcFs.listStatus(dirs[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER)));
// one.
if (files.size() > 0) {
break;
}
}
} catch (IOException e) {
throw new HiveException("addFiles: filesystem error in check phase", e);
}
// handle file format check for table level
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVECHECKFILEFORMAT)) {
boolean flag = true;
// dynamic partition context is null
if (tbd.getDPCtx() == null) {
if (tbd.getPartitionSpec() == null || tbd.getPartitionSpec().isEmpty()) {
// Check if the file format of the file matches that of the table.
flag = HiveFileFormatUtils.checkInputFormat(srcFs, conf, tbd.getTable().getInputFileFormatClass(), files);
} else {
// Check if the file format of the file matches that of the partition
Partition oldPart = db.getPartition(table, tbd.getPartitionSpec(), false);
if (oldPart == null) {
// this means we have just created a table and are specifying partition in the
// load statement (without pre-creating the partition), in which case lets use
// table input format class. inheritTableSpecs defaults to true so when a new
// partition is created later it will automatically inherit input format
// from table object
flag = HiveFileFormatUtils.checkInputFormat(srcFs, conf, tbd.getTable().getInputFileFormatClass(), files);
} else {
flag = HiveFileFormatUtils.checkInputFormat(srcFs, conf, oldPart.getInputFormatClass(), files);
}
}
if (!flag) {
throw new HiveException("Wrong file format. Please check the file's format.");
}
} else {
LOG.warn("Skipping file format check as dpCtx is not null");
}
}
}
// Create a data container
DataContainer dc = null;
if (tbd.getPartitionSpec().size() == 0) {
dc = new DataContainer(table.getTTable());
db.loadTable(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getReplace(), work.isSrcLocal(), isSkewedStoredAsDirs(tbd), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, hasFollowingStatsTask());
if (work.getOutputs() != null) {
DDLTask.addIfAbsentByName(new WriteEntity(table, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs());
}
} else {
LOG.info("Partition is: " + tbd.getPartitionSpec().toString());
// Check if the bucketing and/or sorting columns were inferred
List<BucketCol> bucketCols = null;
List<SortCol> sortCols = null;
int numBuckets = -1;
Task task = this;
String path = tbd.getSourcePath().toUri().toString();
// (Either standard, local, or a merge)
while (task.getParentTasks() != null && task.getParentTasks().size() == 1) {
task = (Task) task.getParentTasks().get(0);
// If it was a merge task or a local map reduce task, nothing can be inferred
if (task instanceof MergeFileTask || task instanceof MapredLocalTask) {
break;
}
// the directory this move task is moving
if (task instanceof MapRedTask) {
MapredWork work = (MapredWork) task.getWork();
MapWork mapWork = work.getMapWork();
bucketCols = mapWork.getBucketedColsByDirectory().get(path);
sortCols = mapWork.getSortedColsByDirectory().get(path);
if (work.getReduceWork() != null) {
numBuckets = work.getReduceWork().getNumReduceTasks();
}
if (bucketCols != null || sortCols != null) {
// operator that writes the final output)
assert work.isFinalMapRed();
}
break;
}
// condition for merging is not met, see GenMRFileSink1.
if (task instanceof MoveTask) {
if (((MoveTask) task).getWork().getLoadFileWork() != null) {
path = ((MoveTask) task).getWork().getLoadFileWork().getSourcePath().toUri().toString();
}
}
}
// deal with dynamic partitions
DynamicPartitionCtx dpCtx = tbd.getDPCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
// dynamic partitions
List<LinkedHashMap<String, String>> dps = Utilities.getFullDPSpecs(conf, dpCtx);
console.printInfo(System.getProperty("line.separator"));
long startTime = System.currentTimeMillis();
// load the list of DP partitions and return the list of partition specs
// TODO: In a follow-up to HIVE-1361, we should refactor loadDynamicPartitions
// to use Utilities.getFullDPSpecs() to get the list of full partSpecs.
// After that check the number of DPs created to not exceed the limit and
// iterate over it and call loadPartition() here.
// The reason we don't do inside HIVE-1361 is the latter is large and we
// want to isolate any potential issue it may introduce.
Map<Map<String, String>, Partition> dp = db.loadDynamicPartitions(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), dpCtx.getNumDPCols(), isSkewedStoredAsDirs(tbd), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, SessionState.get().getTxnMgr().getCurrentTxnId(), hasFollowingStatsTask(), work.getLoadTableWork().getWriteType());
// publish DP columns to its subscribers
if (dps != null && dps.size() > 0) {
pushFeed(FeedType.DYNAMIC_PARTITIONS, dp.values());
}
String loadTime = "\t Time taken to load dynamic partitions: " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds";
console.printInfo(loadTime);
LOG.info(loadTime);
if (dp.size() == 0 && conf.getBoolVar(HiveConf.ConfVars.HIVE_ERROR_ON_EMPTY_PARTITION)) {
throw new HiveException("This query creates no partitions." + " To turn off this error, set hive.error.on.empty.partition=false.");
}
startTime = System.currentTimeMillis();
// and put it to WriteEntity for post-exec hook
for (Map.Entry<Map<String, String>, Partition> entry : dp.entrySet()) {
Partition partn = entry.getValue();
if (bucketCols != null || sortCols != null) {
updatePartitionBucketSortColumns(db, table, partn, bucketCols, numBuckets, sortCols);
}
WriteEntity enty = new WriteEntity(partn, getWriteType(tbd, work.getLoadTableWork().getWriteType()));
if (work.getOutputs() != null) {
DDLTask.addIfAbsentByName(enty, work.getOutputs());
}
// queryPlan here.
if (queryPlan.getOutputs() == null) {
queryPlan.setOutputs(new LinkedHashSet<WriteEntity>());
}
queryPlan.getOutputs().add(enty);
// update columnar lineage for each partition
dc = new DataContainer(table.getTTable(), partn.getTPartition());
// Don't set lineage on delete as we don't have all the columns
if (SessionState.get() != null && work.getLoadTableWork().getWriteType() != AcidUtils.Operation.DELETE && work.getLoadTableWork().getWriteType() != AcidUtils.Operation.UPDATE) {
SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc, table.getCols());
}
LOG.info("\tLoading partition " + entry.getKey());
}
console.printInfo("\t Time taken for adding to write entity : " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
// reset data container to prevent it being added again.
dc = null;
} else {
// static partitions
List<String> partVals = MetaStoreUtils.getPvals(table.getPartCols(), tbd.getPartitionSpec());
db.validatePartitionNameCharacters(partVals);
db.loadPartition(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, hasFollowingStatsTask());
Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
if (bucketCols != null || sortCols != null) {
updatePartitionBucketSortColumns(db, table, partn, bucketCols, numBuckets, sortCols);
}
dc = new DataContainer(table.getTTable(), partn.getTPartition());
// add this partition to post-execution hook
if (work.getOutputs() != null) {
DDLTask.addIfAbsentByName(new WriteEntity(partn, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs());
}
}
}
if (SessionState.get() != null && dc != null) {
// If we are doing an update or a delete the number of columns in the table will not
// match the number of columns in the file sink. For update there will be one too many
// (because of the ROW__ID), and in the case of the delete there will be just the
// ROW__ID, which we don't need to worry about from a lineage perspective.
List<FieldSchema> tableCols = null;
switch(work.getLoadTableWork().getWriteType()) {
case DELETE:
case UPDATE:
// Pass an empty list as no columns will be written to the file.
// TODO I should be able to make this work for update
tableCols = new ArrayList<FieldSchema>();
break;
default:
tableCols = table.getCols();
break;
}
SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc, tableCols);
}
releaseLocks(tbd);
}
return 0;
} catch (Exception e) {
console.printError("Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e));
setException(e);
return (1);
}
}
Aggregations