use of org.apache.hadoop.hive.serde2.SerDeSpec in project hive by apache.
the class DDLTask method createTableLike.
/**
* Create a new table like an existing table.
*
* @param db
* The database in question.
* @param crtTbl
* This is the table we're creating.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
* Throws this exception if an unexpected error occurs.
*/
private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exception {
// Get the existing table
Table oldtbl = db.getTable(crtTbl.getLikeTableName());
Table tbl;
if (oldtbl.getTableType() == TableType.VIRTUAL_VIEW || oldtbl.getTableType() == TableType.MATERIALIZED_VIEW) {
String targetTableName = crtTbl.getTableName();
tbl = db.newTable(targetTableName);
if (crtTbl.getTblProps() != null) {
tbl.getTTable().getParameters().putAll(crtTbl.getTblProps());
}
tbl.setTableType(TableType.MANAGED_TABLE);
if (crtTbl.isExternal()) {
tbl.setProperty("EXTERNAL", "TRUE");
tbl.setTableType(TableType.EXTERNAL_TABLE);
}
tbl.setFields(oldtbl.getCols());
tbl.setPartCols(oldtbl.getPartCols());
if (crtTbl.getDefaultSerName() == null) {
LOG.info("Default to LazySimpleSerDe for table " + crtTbl.getTableName());
tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
} else {
// let's validate that the serde exists
validateSerDe(crtTbl.getDefaultSerName());
tbl.setSerializationLib(crtTbl.getDefaultSerName());
}
if (crtTbl.getDefaultSerdeProps() != null) {
Iterator<Entry<String, String>> iter = crtTbl.getDefaultSerdeProps().entrySet().iterator();
while (iter.hasNext()) {
Entry<String, String> m = iter.next();
tbl.setSerdeParam(m.getKey(), m.getValue());
}
}
tbl.setInputFormatClass(crtTbl.getDefaultInputFormat());
tbl.setOutputFormatClass(crtTbl.getDefaultOutputFormat());
tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
} else {
tbl = oldtbl;
// find out database name and table name of target table
String targetTableName = crtTbl.getTableName();
String[] names = Utilities.getDbTableName(targetTableName);
tbl.setDbName(names[0]);
tbl.setTableName(names[1]);
// using old table object, hence reset the owner to current user for new table.
tbl.setOwner(SessionState.getUserFromAuthenticator());
if (crtTbl.getLocation() != null) {
tbl.setDataLocation(new Path(crtTbl.getLocation()));
} else {
tbl.unsetDataLocation();
}
Class<? extends Deserializer> serdeClass = oldtbl.getDeserializerClass();
Map<String, String> params = tbl.getParameters();
// We should copy only those table parameters that are specified in the config.
SerDeSpec spec = AnnotationUtils.getAnnotation(serdeClass, SerDeSpec.class);
String paramsStr = HiveConf.getVar(conf, HiveConf.ConfVars.DDL_CTL_PARAMETERS_WHITELIST);
Set<String> retainer = new HashSet<String>();
// for non-native table, property storage_handler should be retained
retainer.add(META_TABLE_STORAGE);
if (spec != null && spec.schemaProps() != null) {
retainer.addAll(Arrays.asList(spec.schemaProps()));
}
if (paramsStr != null) {
retainer.addAll(Arrays.asList(paramsStr.split(",")));
}
// Retain Parquet INT96 write zone property to keep Parquet timezone bugfixes.
if (params.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY) != null) {
retainer.add(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY);
}
if (!retainer.isEmpty()) {
params.keySet().retainAll(retainer);
} else {
params.clear();
}
if (crtTbl.getTblProps() != null) {
params.putAll(crtTbl.getTblProps());
}
if (crtTbl.isUserStorageFormat()) {
tbl.setInputFormatClass(crtTbl.getDefaultInputFormat());
tbl.setOutputFormatClass(crtTbl.getDefaultOutputFormat());
tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
if (crtTbl.getDefaultSerName() == null) {
LOG.info("Default to LazySimpleSerDe for like table " + crtTbl.getTableName());
tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
} else {
// let's validate that the serde exists
validateSerDe(crtTbl.getDefaultSerName());
tbl.setSerializationLib(crtTbl.getDefaultSerName());
}
}
tbl.getTTable().setTemporary(crtTbl.isTemporary());
if (crtTbl.isExternal()) {
tbl.setProperty("EXTERNAL", "TRUE");
tbl.setTableType(TableType.EXTERNAL_TABLE);
} else {
tbl.getParameters().remove("EXTERNAL");
}
}
if (!Utilities.isDefaultNameNode(conf)) {
// If location is specified - ensure that it is a full qualified name
makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tbl.getTableName(), conf);
}
if (crtTbl.getLocation() == null && !tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), StatsSetupConst.TRUE);
}
// create the table
db.createTable(tbl, crtTbl.getIfNotExists());
addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
return 0;
}
use of org.apache.hadoop.hive.serde2.SerDeSpec in project hive by apache.
the class FetchOperator method needConversion.
// if table and all partitions have the same schema and serde, no need to convert
private boolean needConversion(TableDesc tableDesc, List<PartitionDesc> partDescs) {
Class<?> tableSerDe = tableDesc.getDeserializerClass();
SerDeSpec spec = AnnotationUtils.getAnnotation(tableSerDe, SerDeSpec.class);
if (null == spec) {
// and say conversion is needed.
return true;
}
String[] schemaProps = spec.schemaProps();
Properties tableProps = tableDesc.getProperties();
for (PartitionDesc partitionDesc : partDescs) {
if (!tableSerDe.getName().equals(partitionDesc.getDeserializerClassName())) {
return true;
}
Properties partProps = partitionDesc.getProperties();
for (String schemaProp : schemaProps) {
if (!org.apache.commons.lang3.StringUtils.equals(tableProps.getProperty(schemaProp), partProps.getProperty(schemaProp))) {
return true;
}
}
}
return false;
}
Aggregations