use of org.apache.hadoop.hive.metastore.api.FieldSchema in project hive by apache.
the class SemanticAnalyzer method genFileSinkPlan.
@SuppressWarnings("nls")
protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
QBMetaData qbm = qb.getMetaData();
Integer dest_type = qbm.getDestTypeForAlias(dest);
// destination table if any
Table dest_tab = null;
// should the destination table be written to using ACID
boolean destTableIsAcid = false;
boolean destTableIsTemporary = false;
boolean destTableIsMaterialization = false;
// destination partition if any
Partition dest_part = null;
// the intermediate destination directory
Path queryTmpdir = null;
// the final destination directory
Path dest_path = null;
TableDesc table_desc = null;
int currentTableId = 0;
boolean isLocal = false;
SortBucketRSCtx rsCtx = new SortBucketRSCtx();
DynamicPartitionCtx dpCtx = null;
LoadTableDesc ltd = null;
ListBucketingCtx lbCtx = null;
Map<String, String> partSpec = null;
switch(dest_type.intValue()) {
case QBMetaData.DEST_TABLE:
{
dest_tab = qbm.getDestTableForAlias(dest);
destTableIsAcid = AcidUtils.isAcidTable(dest_tab);
destTableIsTemporary = dest_tab.isTemporary();
// Is the user trying to insert into a external tables
if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && (dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE))) {
throw new SemanticException(ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName()));
}
partSpec = qbm.getPartSpecForAlias(dest);
dest_path = dest_tab.getPath();
// verify that our destination is empty before proceeding
if (dest_tab.isImmutable() && qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())) {
try {
FileSystem fs = dest_path.getFileSystem(conf);
if (!MetaStoreUtils.isDirEmpty(fs, dest_path)) {
LOG.warn("Attempted write into an immutable table : " + dest_tab.getTableName() + " : " + dest_path);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName()));
}
} catch (IOException ioe) {
LOG.warn("Error while trying to determine if immutable table has any data : " + dest_tab.getTableName() + " : " + dest_path);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage()));
}
}
// check for partition
List<FieldSchema> parts = dest_tab.getPartitionKeys();
if (parts != null && parts.size() > 0) {
// table is partitioned
if (partSpec == null || partSpec.size() == 0) {
// user did NOT specify partition
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg()));
}
dpCtx = qbm.getDPCtx(dest);
if (dpCtx == null) {
dest_tab.validatePartColumnNames(partSpec, false);
dpCtx = new DynamicPartitionCtx(dest_tab, partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
qbm.setDPCtx(dest, dpCtx);
}
if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) {
// allow DP
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg()));
}
if (dpCtx.getSPPath() != null) {
dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath());
}
if ((dest_tab.getNumBuckets() > 0)) {
dpCtx.setNumBuckets(dest_tab.getNumBuckets());
}
}
boolean isNonNativeTable = dest_tab.isNonNative();
if (isNonNativeTable) {
queryTmpdir = dest_path;
} else {
queryTmpdir = ctx.getTempDirForPath(dest_path, true);
}
if (dpCtx != null) {
// set the root of the temporary path where dynamic partition columns will populate
dpCtx.setRootPath(queryTmpdir);
}
// this table_desc does not contain the partitioning columns
table_desc = Utilities.getTableDesc(dest_tab);
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
currentTableId = destTableId;
destTableId++;
lbCtx = constructListBucketingCtx(dest_tab.getSkewedColNames(), dest_tab.getSkewedColValues(), dest_tab.getSkewedColValueLocationMaps(), dest_tab.isStoredAsSubDirectories(), conf);
// NOTE: specify Dynamic partitions in dest_tab for WriteEntity
if (!isNonNativeTable) {
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
if (destTableIsAcid) {
acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
checkAcidConstraints(qb, table_desc, dest_tab);
}
ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp);
ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
ltd.setLbCtx(lbCtx);
loadTableWork.add(ltd);
} else {
// This is a non-native table.
// We need to set stats as inaccurate.
setStatsForNonNativeTable(dest_tab);
// true if it is insert overwrite.
boolean overwrite = !qb.getParseInfo().isInsertIntoTable(String.format("%s.%s", dest_tab.getDbName(), dest_tab.getTableName()));
createInsertDesc(dest_tab, overwrite);
}
WriteEntity output = null;
// list of dynamically created partitions are known.
if ((dpCtx == null || dpCtx.getNumDPCols() == 0)) {
output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest));
if (!outputs.add(output)) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName()));
}
}
if ((dpCtx != null) && (dpCtx.getNumDPCols() >= 0)) {
// No static partition specified
if (dpCtx.getNumSPCols() == 0) {
output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest), false);
outputs.add(output);
output.setDynamicPartitionWrite(true);
} else // part of the partition specified
// Create a DummyPartition in this case. Since, the metastore does not store partial
// partitions currently, we need to store dummy partitions
{
try {
String ppath = dpCtx.getSPPath();
ppath = ppath.substring(0, ppath.length() - 1);
DummyPartition p = new DummyPartition(dest_tab, dest_tab.getDbName() + "@" + dest_tab.getTableName() + "@" + ppath, partSpec);
output = new WriteEntity(p, getWriteType(dest), false);
output.setDynamicPartitionWrite(true);
outputs.add(output);
} catch (HiveException e) {
throw new SemanticException(e.getMessage(), e);
}
}
}
ctx.getLoadTableOutputMap().put(ltd, output);
break;
}
case QBMetaData.DEST_PARTITION:
{
dest_part = qbm.getDestPartitionForAlias(dest);
dest_tab = dest_part.getTable();
destTableIsAcid = AcidUtils.isAcidTable(dest_tab);
if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE)) {
throw new SemanticException(ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName()));
}
Path tabPath = dest_tab.getPath();
Path partPath = dest_part.getDataLocation();
// verify that our destination is empty before proceeding
if (dest_tab.isImmutable() && qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())) {
try {
FileSystem fs = partPath.getFileSystem(conf);
if (!MetaStoreUtils.isDirEmpty(fs, partPath)) {
LOG.warn("Attempted write into an immutable table partition : " + dest_tab.getTableName() + " : " + partPath);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName()));
}
} catch (IOException ioe) {
LOG.warn("Error while trying to determine if immutable table partition has any data : " + dest_tab.getTableName() + " : " + partPath);
throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage()));
}
}
// if the table is in a different dfs than the partition,
// replace the partition's dfs with the table's dfs.
dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
queryTmpdir = ctx.getTempDirForPath(dest_path, true);
table_desc = Utilities.getTableDesc(dest_tab);
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
currentTableId = destTableId;
destTableId++;
lbCtx = constructListBucketingCtx(dest_part.getSkewedColNames(), dest_part.getSkewedColValues(), dest_part.getSkewedColValueLocationMaps(), dest_part.isStoredAsSubDirectories(), conf);
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
if (destTableIsAcid) {
acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
checkAcidConstraints(qb, table_desc, dest_tab);
}
ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp);
ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
ltd.setLbCtx(lbCtx);
loadTableWork.add(ltd);
if (!outputs.add(new WriteEntity(dest_part, determineWriteType(ltd, dest_tab.isNonNative(), dest)))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName() + "@" + dest_part.getName()));
}
break;
}
case QBMetaData.DEST_LOCAL_FILE:
isLocal = true;
// fall through
case QBMetaData.DEST_DFS_FILE:
{
dest_path = new Path(qbm.getDestFileForAlias(dest));
if (isLocal) {
// for local directory - we always write to map-red intermediate
// store and then copy to local fs
queryTmpdir = ctx.getMRTmpPath();
} else {
try {
Path qPath = FileUtils.makeQualified(dest_path, conf);
queryTmpdir = ctx.getTempDirForPath(qPath, true);
} catch (Exception e) {
throw new SemanticException("Error creating temporary folder on: " + dest_path, e);
}
}
String cols = "";
String colTypes = "";
ArrayList<ColumnInfo> colInfos = inputRR.getColumnInfos();
// CTAS case: the file output format and serde are defined by the create
// table command rather than taking the default value
List<FieldSchema> field_schemas = null;
CreateTableDesc tblDesc = qb.getTableDesc();
CreateViewDesc viewDesc = qb.getViewDesc();
if (tblDesc != null) {
field_schemas = new ArrayList<FieldSchema>();
destTableIsTemporary = tblDesc.isTemporary();
destTableIsMaterialization = tblDesc.isMaterialization();
} else if (viewDesc != null) {
field_schemas = new ArrayList<FieldSchema>();
destTableIsTemporary = false;
}
boolean first = true;
for (ColumnInfo colInfo : colInfos) {
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
if (nm[1] != null) {
// non-null column alias
colInfo.setAlias(nm[1]);
}
//default column name
String colName = colInfo.getInternalName();
if (field_schemas != null) {
FieldSchema col = new FieldSchema();
if (!("".equals(nm[0])) && nm[1] != null) {
// remove ``
colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase();
}
colName = fixCtasColumnName(colName);
col.setName(colName);
String typeName = colInfo.getType().getTypeName();
// CTAS should NOT create a VOID type
if (typeName.equals(serdeConstants.VOID_TYPE_NAME)) {
throw new SemanticException(ErrorMsg.CTAS_CREATES_VOID_TYPE.getMsg(colName));
}
col.setType(typeName);
field_schemas.add(col);
}
if (!first) {
cols = cols.concat(",");
colTypes = colTypes.concat(":");
}
first = false;
cols = cols.concat(colName);
// Replace VOID type with string when the output is a temp table or
// local files.
// A VOID type can be generated under the query:
//
// select NULL from tt;
// or
// insert overwrite local directory "abc" select NULL from tt;
//
// where there is no column type to which the NULL value should be
// converted.
//
String tName = colInfo.getType().getTypeName();
if (tName.equals(serdeConstants.VOID_TYPE_NAME)) {
colTypes = colTypes.concat(serdeConstants.STRING_TYPE_NAME);
} else {
colTypes = colTypes.concat(tName);
}
}
// update the create table descriptor with the resulting schema.
if (tblDesc != null) {
tblDesc.setCols(new ArrayList<FieldSchema>(field_schemas));
} else if (viewDesc != null) {
viewDesc.setSchema(new ArrayList<FieldSchema>(field_schemas));
}
boolean isDestTempFile = true;
if (!ctx.isMRTmpFileURI(dest_path.toUri().toString())) {
idToTableNameMap.put(String.valueOf(destTableId), dest_path.toUri().toString());
currentTableId = destTableId;
destTableId++;
isDestTempFile = false;
}
boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE);
loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, dest_path, isDfsDir, cols, colTypes));
if (tblDesc == null) {
if (viewDesc != null) {
table_desc = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
} else if (qb.getIsQuery()) {
String fileFormat;
if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
fileFormat = "SequenceFile";
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT, fileFormat);
table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, ThriftJDBCBinarySerDe.class);
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// write out formatted thrift objects to SequenceFile
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
} else {
fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, LazySimpleSerDe.class);
}
} else {
table_desc = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
}
} else {
table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
}
if (!outputs.add(new WriteEntity(dest_path, !isDfsDir, isDestTempFile))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_path.toUri().toString()));
}
break;
}
default:
throw new SemanticException("Unknown destination type: " + dest_type);
}
input = genConversionSelectOperator(dest, qb, input, table_desc, dpCtx);
inputRR = opParseCtx.get(input).getRowResolver();
ArrayList<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();
if (updating(dest) || deleting(dest)) {
vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), "", true));
} else {
try {
StructObjectInspector rowObjectInspector = (StructObjectInspector) table_desc.getDeserializer(conf).getObjectInspector();
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), "", false));
}
} catch (Exception e) {
throw new SemanticException(e.getMessage(), e);
}
}
RowSchema fsRS = new RowSchema(vecCol);
// The output files of a FileSink can be merged if they are either not being written to a table
// or are being written to a table which is not bucketed
// and table the table is not sorted
boolean canBeMerged = (dest_tab == null || !((dest_tab.getNumBuckets() > 0) || (dest_tab.getSortCols() != null && dest_tab.getSortCols().size() > 0)));
// If this table is working with ACID semantics, turn off merging
canBeMerged &= !destTableIsAcid;
// Generate the partition columns from the parent input
if (dest_type.intValue() == QBMetaData.DEST_TABLE || dest_type.intValue() == QBMetaData.DEST_PARTITION) {
genPartnCols(dest, input, qb, table_desc, dest_tab, rsCtx);
}
FileSinkDesc fileSinkDesc = new FileSinkDesc(queryTmpdir, table_desc, conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), currentTableId, rsCtx.isMultiFileSpray(), canBeMerged, rsCtx.getNumFiles(), rsCtx.getTotalFiles(), rsCtx.getPartnCols(), dpCtx, dest_path);
boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
fileSinkDesc.setHiveServerQuery(isHiveServerQuery);
// FileSinkOperator knows how to properly write to it.
if (destTableIsAcid) {
AcidUtils.Operation wt = updating(dest) ? AcidUtils.Operation.UPDATE : (deleting(dest) ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT);
fileSinkDesc.setWriteType(wt);
acidFileSinks.add(fileSinkDesc);
}
fileSinkDesc.setTemporary(destTableIsTemporary);
fileSinkDesc.setMaterialization(destTableIsMaterialization);
/* Set List Bucketing context. */
if (lbCtx != null) {
lbCtx.processRowSkewedIndex(fsRS);
lbCtx.calculateSkewedValueSubDirList();
}
fileSinkDesc.setLbCtx(lbCtx);
// set the stats publishing/aggregating key prefix
// the same as directory name. The directory name
// can be changed in the optimizer but the key should not be changed
// it should be the same as the MoveWork's sourceDir.
fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName().toString());
if (!destTableIsMaterialization && HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
String statsTmpLoc = ctx.getTempDirForPath(dest_path).toString();
fileSinkDesc.setStatsTmpDir(statsTmpLoc);
LOG.debug("Set stats collection dir : " + statsTmpLoc);
}
if (dest_part != null) {
try {
String staticSpec = Warehouse.makePartPath(dest_part.getSpec());
fileSinkDesc.setStaticSpec(staticSpec);
} catch (MetaException e) {
throw new SemanticException(e);
}
} else if (dpCtx != null) {
fileSinkDesc.setStaticSpec(dpCtx.getSPPath());
}
if (isHiveServerQuery && null != table_desc && table_desc.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(true);
} else {
fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(false);
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc, fsRS, input), inputRR);
if (ltd != null && SessionState.get() != null) {
SessionState.get().getLineageState().mapDirToOp(ltd.getSourcePath(), (FileSinkOperator) output);
} else if (queryState.getCommandType().equals(HiveOperation.CREATETABLE_AS_SELECT.getOperationName())) {
Path tlocation = null;
String tName = Utilities.getDbTableName(tableDesc.getTableName())[1];
try {
Warehouse wh = new Warehouse(conf);
tlocation = wh.getTablePath(db.getDatabase(tableDesc.getDatabaseName()), tName);
} catch (MetaException | HiveException e) {
throw new SemanticException(e);
}
SessionState.get().getLineageState().mapDirToOp(tlocation, (FileSinkOperator) output);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " + dest_path + " row schema: " + inputRR.toString());
}
FileSinkOperator fso = (FileSinkOperator) output;
fso.getConf().setTable(dest_tab);
fsopToTable.put(fso, dest_tab);
// and it is an insert overwrite or insert into table
if (dest_tab != null && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
if (dest_type.intValue() == QBMetaData.DEST_TABLE) {
genAutoColumnStatsGatheringPipeline(qb, table_desc, partSpec, input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
} else if (dest_type.intValue() == QBMetaData.DEST_PARTITION) {
genAutoColumnStatsGatheringPipeline(qb, table_desc, dest_part.getSpec(), input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
}
}
return output;
}
use of org.apache.hadoop.hive.metastore.api.FieldSchema in project hive by apache.
the class SemanticAnalyzer method analyzeCreateTable.
/**
* Analyze the create table command. If it is a regular create-table or
* create-table-like statements, we create a DDLWork and return true. If it is
* a create-table-as-select, we get the necessary info such as the SerDe and
* Storage Format and put it in QB, and return false, indicating the rest of
* the semantic analyzer need to deal with the select statement with respect
* to the SerDe and Storage Format.
*/
ASTNode analyzeCreateTable(ASTNode ast, QB qb, PlannerContext plannerCtx) throws SemanticException {
String[] qualifiedTabName = getQualifiedTableName((ASTNode) ast.getChild(0));
String dbDotTab = getDotName(qualifiedTabName);
String likeTableName = null;
List<FieldSchema> cols = new ArrayList<FieldSchema>();
List<FieldSchema> partCols = new ArrayList<FieldSchema>();
List<String> bucketCols = new ArrayList<String>();
List<SQLPrimaryKey> primaryKeys = new ArrayList<SQLPrimaryKey>();
List<SQLForeignKey> foreignKeys = new ArrayList<SQLForeignKey>();
List<Order> sortCols = new ArrayList<Order>();
int numBuckets = -1;
String comment = null;
String location = null;
Map<String, String> tblProps = null;
boolean ifNotExists = false;
boolean isExt = false;
boolean isTemporary = false;
boolean isMaterialization = false;
ASTNode selectStmt = null;
// regular CREATE TABLE
final int CREATE_TABLE = 0;
// CREATE TABLE LIKE ... (CTLT)
final int CTLT = 1;
// CREATE TABLE AS SELECT ... (CTAS)
final int CTAS = 2;
int command_type = CREATE_TABLE;
List<String> skewedColNames = new ArrayList<String>();
List<List<String>> skewedValues = new ArrayList<List<String>>();
Map<List<String>, String> listBucketColValuesMapping = new HashMap<List<String>, String>();
boolean storedAsDirs = false;
boolean isUserStorageFormat = false;
RowFormatParams rowFormatParams = new RowFormatParams();
StorageFormat storageFormat = new StorageFormat(conf);
LOG.info("Creating table " + dbDotTab + " position=" + ast.getCharPositionInLine());
int numCh = ast.getChildCount();
/*
* Check the 1st-level children and do simple semantic checks: 1) CTLT and
* CTAS should not coexists. 2) CTLT or CTAS should not coexists with column
* list (target table schema). 3) CTAS does not support partitioning (for
* now).
*/
for (int num = 1; num < numCh; num++) {
ASTNode child = (ASTNode) ast.getChild(num);
if (storageFormat.fillStorageFormat(child)) {
isUserStorageFormat = true;
continue;
}
switch(child.getToken().getType()) {
case HiveParser.TOK_IFNOTEXISTS:
ifNotExists = true;
break;
case HiveParser.KW_EXTERNAL:
isExt = true;
break;
case HiveParser.KW_TEMPORARY:
isTemporary = true;
isMaterialization = MATERIALIZATION_MARKER.equals(child.getText());
break;
case HiveParser.TOK_LIKETABLE:
if (child.getChildCount() > 0) {
likeTableName = getUnescapedName((ASTNode) child.getChild(0));
if (likeTableName != null) {
if (command_type == CTAS) {
throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg());
}
if (cols.size() != 0) {
throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE.getMsg());
}
}
command_type = CTLT;
}
break;
case // CTAS
HiveParser.TOK_QUERY:
if (command_type == CTLT) {
throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg());
}
if (cols.size() != 0) {
throw new SemanticException(ErrorMsg.CTAS_COLLST_COEXISTENCE.getMsg());
}
if (partCols.size() != 0 || bucketCols.size() != 0) {
boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
if (dynPart == false) {
throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg());
} else {
// TODO: support dynamic partition for CTAS
throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg());
}
}
if (isExt) {
throw new SemanticException(ErrorMsg.CTAS_EXTTBL_COEXISTENCE.getMsg());
}
command_type = CTAS;
if (plannerCtx != null) {
plannerCtx.setCTASToken(child);
}
selectStmt = child;
break;
case HiveParser.TOK_TABCOLLIST:
cols = getColumns(child, true, primaryKeys, foreignKeys);
break;
case HiveParser.TOK_TABLECOMMENT:
comment = unescapeSQLString(child.getChild(0).getText());
break;
case HiveParser.TOK_TABLEPARTCOLS:
partCols = getColumns((ASTNode) child.getChild(0), false);
break;
case HiveParser.TOK_ALTERTABLE_BUCKETS:
bucketCols = getColumnNames((ASTNode) child.getChild(0));
if (child.getChildCount() == 2) {
numBuckets = Integer.parseInt(child.getChild(1).getText());
} else {
sortCols = getColumnNamesOrder((ASTNode) child.getChild(1));
numBuckets = Integer.parseInt(child.getChild(2).getText());
}
break;
case HiveParser.TOK_TABLEROWFORMAT:
rowFormatParams.analyzeRowFormat(child);
break;
case HiveParser.TOK_TABLELOCATION:
location = unescapeSQLString(child.getChild(0).getText());
location = EximUtil.relativeToAbsolutePath(conf, location);
inputs.add(toReadEntity(location));
break;
case HiveParser.TOK_TABLEPROPERTIES:
tblProps = DDLSemanticAnalyzer.getProps((ASTNode) child.getChild(0));
break;
case HiveParser.TOK_TABLESERIALIZER:
child = (ASTNode) child.getChild(0);
storageFormat.setSerde(unescapeSQLString(child.getChild(0).getText()));
if (child.getChildCount() == 2) {
readProps((ASTNode) (child.getChild(1).getChild(0)), storageFormat.getSerdeProps());
}
break;
case HiveParser.TOK_TABLESKEWED:
/**
* Throw an error if the user tries to use the DDL with
* hive.internal.ddl.list.bucketing.enable set to false.
*/
HiveConf hiveConf = SessionState.get().getConf();
// skewed column names
skewedColNames = analyzeSkewedTablDDLColNames(skewedColNames, child);
// skewed value
analyzeDDLSkewedValues(skewedValues, child);
// stored as directories
storedAsDirs = analyzeStoredAdDirs(child);
break;
default:
throw new AssertionError("Unknown token: " + child.getToken());
}
}
if (command_type == CREATE_TABLE || command_type == CTLT) {
queryState.setCommandType(HiveOperation.CREATETABLE);
} else if (command_type == CTAS) {
queryState.setCommandType(HiveOperation.CREATETABLE_AS_SELECT);
} else {
throw new SemanticException("Unrecognized command.");
}
storageFormat.fillDefaultStorageFormat(isExt, false);
// check for existence of table
if (ifNotExists) {
try {
Table table = getTable(qualifiedTabName, false);
if (table != null) {
// table exists
return null;
}
} catch (HiveException e) {
// should not occur since second parameter to getTableWithQN is false
throw new IllegalStateException("Unexpected Exception thrown: " + e.getMessage(), e);
}
}
addDbAndTabToOutputs(qualifiedTabName, TableType.MANAGED_TABLE);
if (isTemporary) {
if (partCols.size() > 0) {
throw new SemanticException("Partition columns are not supported on temporary tables");
}
if (location == null) {
// it has the same life cycle as the tmp table
try {
// Generate a unique ID for temp table path.
// This path will be fixed for the life of the temp table.
Path path = new Path(SessionState.getTempTableSpace(conf), UUID.randomUUID().toString());
path = Warehouse.getDnsPath(path, conf);
location = path.toString();
} catch (MetaException err) {
throw new SemanticException("Error while generating temp table path:", err);
}
}
}
// Handle different types of CREATE TABLE command
switch(command_type) {
case // REGULAR CREATE TABLE DDL
CREATE_TABLE:
tblProps = addDefaultProperties(tblProps);
CreateTableDesc crtTblDesc = new CreateTableDesc(dbDotTab, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, skewedValues, primaryKeys, foreignKeys);
crtTblDesc.setStoredAsSubDirectories(storedAsDirs);
crtTblDesc.setNullFormat(rowFormatParams.nullFormat);
crtTblDesc.validate(conf);
// outputs is empty, which means this create table happens in the current
// database.
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblDesc), conf));
break;
case // create table like <tbl_name>
CTLT:
tblProps = addDefaultProperties(tblProps);
if (isTemporary) {
Table likeTable = getTable(likeTableName, false);
if (likeTable != null && likeTable.getPartCols().size() > 0) {
throw new SemanticException("Partition columns are not supported on temporary tables " + "and source table in CREATE TABLE LIKE is partitioned.");
}
}
CreateTableLikeDesc crtTblLikeDesc = new CreateTableLikeDesc(dbDotTab, isExt, isTemporary, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getSerdeProps(), tblProps, ifNotExists, likeTableName, isUserStorageFormat);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblLikeDesc), conf));
break;
case // create table as select
CTAS:
if (isTemporary) {
if (!ctx.isExplainSkipExecution() && !isMaterialization) {
String dbName = qualifiedTabName[0];
String tblName = qualifiedTabName[1];
SessionState ss = SessionState.get();
if (ss == null) {
throw new SemanticException("No current SessionState, cannot create temporary table " + dbName + "." + tblName);
}
Map<String, Table> tables = SessionHiveMetaStoreClient.getTempTablesForDatabase(dbName);
if (tables != null && tables.containsKey(tblName)) {
throw new SemanticException("Temporary table " + dbName + "." + tblName + " already exists");
}
}
} else {
// dumpTable is only used to check the conflict for non-temporary tables
try {
Table dumpTable = db.newTable(dbDotTab);
if (null != db.getTable(dumpTable.getDbName(), dumpTable.getTableName(), false) && !ctx.isExplainSkipExecution()) {
throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(dbDotTab));
}
} catch (HiveException e) {
throw new SemanticException(e);
}
}
if (location != null && location.length() != 0) {
Path locPath = new Path(location);
FileSystem curFs = null;
FileStatus locStats = null;
try {
curFs = locPath.getFileSystem(conf);
if (curFs != null) {
locStats = curFs.getFileStatus(locPath);
}
if (locStats != null && locStats.isDir()) {
FileStatus[] lStats = curFs.listStatus(locPath);
if (lStats != null && lStats.length != 0) {
// Don't throw an exception if the target location only contains the staging-dirs
for (FileStatus lStat : lStats) {
if (!lStat.getPath().getName().startsWith(HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR))) {
throw new SemanticException(ErrorMsg.CTAS_LOCATION_NONEMPTY.getMsg(location));
}
}
}
}
} catch (FileNotFoundException nfe) {
//we will create the folder if it does not exist.
} catch (IOException ioE) {
if (LOG.isDebugEnabled()) {
LOG.debug("Exception when validate folder ", ioE);
}
}
}
tblProps = addDefaultProperties(tblProps);
tableDesc = new CreateTableDesc(qualifiedTabName[0], dbDotTab, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, skewedValues, true, primaryKeys, foreignKeys);
tableDesc.setMaterialization(isMaterialization);
tableDesc.setStoredAsSubDirectories(storedAsDirs);
tableDesc.setNullFormat(rowFormatParams.nullFormat);
qb.setTableDesc(tableDesc);
return selectStmt;
default:
throw new SemanticException("Unrecognized command.");
}
return null;
}
use of org.apache.hadoop.hive.metastore.api.FieldSchema in project hive by apache.
the class SemanticAnalyzer method handleInsertStatementSpec.
/**
* This modifies the Select projections when the Select is part of an insert statement and
* the insert statement specifies a column list for the target table, e.g.
* create table source (a int, b int);
* create table target (x int, y int, z int);
* insert into target(z,x) select * from source
*
* Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks
* as if the original query was written as
* insert into target select b, null, a from source
*
* if target schema is not specified, this is no-op
*
* @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx)
* @throws SemanticException
*/
public RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest, RowResolver outputRR, RowResolver inputRR, QB qb, ASTNode selExprList) throws SemanticException {
//(z,x)
//specified in the query
List<String> targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest);
if (targetTableSchema == null) {
//no insert schema was specified
return outputRR;
}
if (targetTableSchema.size() != col_list.size()) {
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
throw new SemanticException(generateErrorMessage(selExprList, "Expected " + targetTableSchema.size() + " columns for " + dest + (target != null ? "/" + target.getCompleteName() : (partition != null ? "/" + partition.getCompleteName() : "")) + "; select produces " + col_list.size() + " columns"));
}
//e.g. map z->expr for a
Map<String, ExprNodeDesc> targetCol2Projection = new HashMap<String, ExprNodeDesc>();
//e.g. map z->ColumnInfo for a
Map<String, ColumnInfo> targetCol2ColumnInfo = new HashMap<String, ColumnInfo>();
int colListPos = 0;
for (String targetCol : targetTableSchema) {
targetCol2ColumnInfo.put(targetCol, outputRR.getColumnInfos().get(colListPos));
targetCol2Projection.put(targetCol, col_list.get(colListPos++));
}
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
if (target == null && partition == null) {
throw new SemanticException(generateErrorMessage(selExprList, "No table/partition found in QB metadata for dest='" + dest + "'"));
}
ArrayList<ExprNodeDesc> new_col_list = new ArrayList<ExprNodeDesc>();
colListPos = 0;
List<FieldSchema> targetTableCols = target != null ? target.getCols() : partition.getCols();
List<String> targetTableColNames = new ArrayList<String>();
List<TypeInfo> targetTableColTypes = new ArrayList<TypeInfo>();
for (FieldSchema fs : targetTableCols) {
targetTableColNames.add(fs.getName());
targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
}
Map<String, String> partSpec = qb.getMetaData().getPartSpecForAlias(dest);
if (partSpec != null) {
//relies on consistent order via LinkedHashMap
for (Map.Entry<String, String> partKeyVal : partSpec.entrySet()) {
if (partKeyVal.getValue() == null) {
//these must be after non-partition cols
targetTableColNames.add(partKeyVal.getKey());
targetTableColTypes.add(TypeInfoFactory.stringTypeInfo);
}
}
}
RowResolver newOutputRR = new RowResolver();
//where missing columns are NULL-filled
for (int i = 0; i < targetTableColNames.size(); i++) {
String f = targetTableColNames.get(i);
if (targetCol2Projection.containsKey(f)) {
//put existing column in new list to make sure it is in the right position
new_col_list.add(targetCol2Projection.get(f));
//todo: is this OK?
ColumnInfo ci = targetCol2ColumnInfo.get(f);
ci.setInternalName(getColumnInternalName(colListPos));
newOutputRR.put(ci.getTabAlias(), ci.getInternalName(), ci);
} else {
//add new 'synthetic' columns for projections not provided by Select
ExprNodeDesc exp = new ExprNodeConstantDesc(targetTableColTypes.get(i), null);
new_col_list.add(exp);
//this column doesn't come from any table
final String tableAlias = null;
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos), exp.getWritableObjectInspector(), tableAlias, false);
newOutputRR.put(colInfo.getTabAlias(), colInfo.getInternalName(), colInfo);
}
colListPos++;
}
col_list.clear();
col_list.addAll(new_col_list);
return newOutputRR;
}
use of org.apache.hadoop.hive.metastore.api.FieldSchema in project hive by apache.
the class SemanticAnalyzer method analyzeCreateView.
protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCtx) throws SemanticException {
String[] qualTabName = getQualifiedTableName((ASTNode) ast.getChild(0));
String dbDotTable = getDotName(qualTabName);
List<FieldSchema> cols = null;
boolean ifNotExists = false;
boolean rewriteEnabled = false;
boolean orReplace = false;
boolean isAlterViewAs = false;
String comment = null;
ASTNode selectStmt = null;
Map<String, String> tblProps = null;
List<String> partColNames = null;
boolean isMaterialized = ast.getToken().getType() == HiveParser.TOK_CREATE_MATERIALIZED_VIEW;
String location = null;
RowFormatParams rowFormatParams = new RowFormatParams();
StorageFormat storageFormat = new StorageFormat(conf);
LOG.info("Creating view " + dbDotTable + " position=" + ast.getCharPositionInLine());
int numCh = ast.getChildCount();
for (int num = 1; num < numCh; num++) {
ASTNode child = (ASTNode) ast.getChild(num);
if (storageFormat.fillStorageFormat(child)) {
continue;
}
switch(child.getToken().getType()) {
case HiveParser.TOK_IFNOTEXISTS:
ifNotExists = true;
break;
case HiveParser.TOK_REWRITE_ENABLED:
rewriteEnabled = true;
break;
case HiveParser.TOK_ORREPLACE:
orReplace = true;
break;
case HiveParser.TOK_QUERY:
// For CBO
if (plannerCtx != null) {
plannerCtx.setViewToken(child);
}
selectStmt = child;
break;
case HiveParser.TOK_TABCOLNAME:
cols = getColumns(child);
break;
case HiveParser.TOK_TABLECOMMENT:
comment = unescapeSQLString(child.getChild(0).getText());
break;
case HiveParser.TOK_TABLEPROPERTIES:
tblProps = DDLSemanticAnalyzer.getProps((ASTNode) child.getChild(0));
break;
case HiveParser.TOK_VIEWPARTCOLS:
partColNames = getColumnNames((ASTNode) child.getChild(0));
break;
case HiveParser.TOK_TABLEROWFORMAT:
rowFormatParams.analyzeRowFormat(child);
break;
case HiveParser.TOK_TABLELOCATION:
location = unescapeSQLString(child.getChild(0).getText());
location = EximUtil.relativeToAbsolutePath(conf, location);
inputs.add(toReadEntity(location));
break;
case HiveParser.TOK_TABLESERIALIZER:
child = (ASTNode) child.getChild(0);
storageFormat.setSerde(unescapeSQLString(child.getChild(0).getText()));
if (child.getChildCount() == 2) {
readProps((ASTNode) (child.getChild(1).getChild(0)), storageFormat.getSerdeProps());
}
break;
default:
assert false;
}
}
storageFormat.fillDefaultStorageFormat(false, isMaterialized);
if (ifNotExists && orReplace) {
throw new SemanticException("Can't combine IF NOT EXISTS and OR REPLACE.");
}
if (ast.getToken().getType() == HiveParser.TOK_ALTERVIEW && ast.getChild(1).getType() == HiveParser.TOK_QUERY) {
isAlterViewAs = true;
orReplace = true;
}
unparseTranslator.enable();
if (isMaterialized) {
createVwDesc = new CreateViewDesc(dbDotTable, cols, comment, tblProps, partColNames, ifNotExists, orReplace, rewriteEnabled, isAlterViewAs, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), storageFormat.getSerdeProps());
addDbAndTabToOutputs(qualTabName, TableType.MATERIALIZED_VIEW);
queryState.setCommandType(HiveOperation.CREATE_MATERIALIZED_VIEW);
} else {
createVwDesc = new CreateViewDesc(dbDotTable, cols, comment, tblProps, partColNames, ifNotExists, orReplace, isAlterViewAs, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), storageFormat.getSerde());
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), createVwDesc), conf));
addDbAndTabToOutputs(qualTabName, TableType.VIRTUAL_VIEW);
queryState.setCommandType(HiveOperation.CREATEVIEW);
}
qb.setViewDesc(createVwDesc);
return selectStmt;
}
use of org.apache.hadoop.hive.metastore.api.FieldSchema in project hive by apache.
the class SemanticAnalyzer method convertRowSchemaToResultSetSchema.
static List<FieldSchema> convertRowSchemaToResultSetSchema(RowResolver rr, boolean useTabAliasIfAvailable) {
List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>();
String[] qualifiedColName;
String colName;
for (ColumnInfo colInfo : rr.getColumnInfos()) {
if (colInfo.isHiddenVirtualCol()) {
continue;
}
qualifiedColName = rr.reverseLookup(colInfo.getInternalName());
if (useTabAliasIfAvailable && qualifiedColName[0] != null && !qualifiedColName[0].isEmpty()) {
colName = qualifiedColName[0] + "." + qualifiedColName[1];
} else {
colName = qualifiedColName[1];
}
fieldSchemas.add(new FieldSchema(colName, colInfo.getType().getTypeName(), null));
}
return fieldSchemas;
}
Aggregations