use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class TestHCatUtil method testGetTableSchemaWithPtnColsSerDeReportedFields.
/**
* Hive represents tables in two ways:
* <ul>
* <li>org.apache.hadoop.hive.metastore.api.Table - exactly whats stored in the metastore</li>
* <li>org.apache.hadoop.hive.ql.metadata.Table - adds business logic over api.Table</li>
* </ul>
* Here we check SerDe-reported fields are included in the table schema.
*/
@Test
public void testGetTableSchemaWithPtnColsSerDeReportedFields() throws IOException {
Map<String, String> parameters = Maps.newHashMap();
parameters.put(serdeConstants.SERIALIZATION_CLASS, "org.apache.hadoop.hive.serde2.thrift.test.IntString");
parameters.put(serdeConstants.SERIALIZATION_FORMAT, "org.apache.thrift.protocol.TBinaryProtocol");
SerDeInfo serDeInfo = new SerDeInfo(null, "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer", parameters);
// StorageDescriptor has an empty list of fields - SerDe will report them.
StorageDescriptor sd = new StorageDescriptor(new ArrayList<FieldSchema>(), "location", "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", false, -1, serDeInfo, new ArrayList<String>(), new ArrayList<Order>(), new HashMap<String, String>());
org.apache.hadoop.hive.metastore.api.Table apiTable = new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", 0, 0, 0, sd, new ArrayList<FieldSchema>(), new HashMap<String, String>(), "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name());
Table table = new Table(apiTable);
List<HCatFieldSchema> expectedHCatSchema = Lists.newArrayList(new HCatFieldSchema("myint", HCatFieldSchema.Type.INT, null), new HCatFieldSchema("mystring", HCatFieldSchema.Type.STRING, null), new HCatFieldSchema("underscore_int", HCatFieldSchema.Type.INT, null));
Assert.assertEquals(new HCatSchema(expectedHCatSchema), HCatUtil.getTableSchemaWithPtnCols(table));
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class AlterTableSetSerdePropsOperation method doAlteration.
@Override
protected void doAlteration(Table table, Partition partition) throws HiveException {
StorageDescriptor sd = getStorageDescriptor(table, partition);
sd.getSerdeInfo().getParameters().putAll(desc.getProps());
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class AlterTableNotClusteredOperation method doAlteration.
@Override
protected void doAlteration(Table table, Partition partition) throws HiveException {
StorageDescriptor sd = getStorageDescriptor(table, partition);
sd.setBucketCols(new ArrayList<String>());
// -1 buckets means to turn off bucketing
sd.setNumBuckets(-1);
sd.setSortCols(new ArrayList<Order>());
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class AlterTableNotSortedOperation method doAlteration.
@Override
protected void doAlteration(Table table, Partition partition) throws HiveException {
StorageDescriptor sd = getStorageDescriptor(table, partition);
sd.setSortCols(new ArrayList<Order>());
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class HCatOutputFormat method setOutput.
/**
* Set the information about the output to write for the job. This queries the metadata server
* to find the StorageHandler to use for the table. It throws an error if the
* partition is already published.
* @param conf the Configuration object
* @param credentials the Credentials object
* @param outputJobInfo the table output information for the job
* @throws IOException the exception in communicating with the metadata server
*/
@SuppressWarnings("unchecked")
public static void setOutput(Configuration conf, Credentials credentials, OutputJobInfo outputJobInfo) throws IOException {
IMetaStoreClient client = null;
try {
HiveConf hiveConf = HCatUtil.getHiveConf(conf);
client = HCatUtil.getHiveMetastoreClient(hiveConf);
Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), outputJobInfo.getTableName());
StorageDescriptor sd = table.getTTable().getSd();
if (sd.isCompressed()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported");
}
if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported");
}
if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported");
}
if (AcidUtils.isTransactionalTable(table)) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a transactional table " + table.getFullyQualifiedName() + " from Pig/Mapreduce is not supported");
}
// Set up a common id hash for this job, so that when we create any temporary directory
// later on, it is guaranteed to be unique.
String idHash;
DecimalFormat df = new DecimalFormat("#.####################");
if ((idHash = conf.get(HCatConstants.HCAT_OUTPUT_ID_HASH)) == null) {
idHash = String.valueOf(df.format(Math.random()));
}
conf.set(HCatConstants.HCAT_OUTPUT_ID_HASH, idHash);
if (table.getTTable().getPartitionKeysSize() == 0) {
if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) {
// attempt made to save partition values in non-partitioned table - throw error.
throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Partition values specified for non-partitioned table");
}
// non-partitioned table
outputJobInfo.setPartitionValues(new HashMap<String, String>());
} else {
// partitioned table, we expect partition values
// convert user specified map to have lower case key names
Map<String, String> valueMap = new HashMap<String, String>();
if (outputJobInfo.getPartitionValues() != null) {
for (Map.Entry<String, String> entry : outputJobInfo.getPartitionValues().entrySet()) {
valueMap.put(entry.getKey().toLowerCase(), entry.getValue());
}
}
if ((outputJobInfo.getPartitionValues() == null) || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) {
// dynamic partition usecase - partition values were null, or not all were specified
// need to figure out which keys are not specified.
List<String> dynamicPartitioningKeys = new ArrayList<String>();
boolean firstItem = true;
for (FieldSchema fs : table.getPartitionKeys()) {
if (!valueMap.containsKey(fs.getName().toLowerCase())) {
dynamicPartitioningKeys.add(fs.getName().toLowerCase());
}
}
if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) {
// If this isn't equal, then bogus key values have been inserted, error out.
throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified");
}
outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys);
String dynHash;
if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) {
dynHash = String.valueOf(Math.random());
}
conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash);
// if custom pattern is set in case of dynamic partitioning, configure custom path
String customPattern = conf.get(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN);
if (customPattern != null) {
HCatFileUtil.setCustomPath(customPattern, outputJobInfo);
}
}
outputJobInfo.setPartitionValues(valueMap);
}
// To get around hbase failure on single node, see BUG-4383
conf.set("dfs.client.read.shortcircuit", "false");
HCatSchema tableSchema = HCatUtil.extractSchema(table);
StorerInfo storerInfo = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters());
List<String> partitionCols = new ArrayList<String>();
for (FieldSchema schema : table.getPartitionKeys()) {
partitionCols.add(schema.getName());
}
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo);
// Serialize the output info into the configuration
outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
outputJobInfo.setOutputSchema(tableSchema);
harRequested = getHarRequested(hiveConf);
outputJobInfo.setHarRequested(harRequested);
maxDynamicPartitions = getMaxDynamicPartitions(hiveConf);
outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions);
HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo);
Path tblPath = new Path(table.getTTable().getSd().getLocation());
/* Set the umask in conf such that files/dirs get created with table-dir
* permissions. Following three assumptions are made:
* 1. Actual files/dirs creation is done by RecordWriter of underlying
* output format. It is assumed that they use default permissions while creation.
* 2. Default Permissions = FsPermission.getDefault() = 777.
* 3. UMask is honored by underlying filesystem.
*/
FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask(tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission()));
if (Security.getInstance().isSecurityEnabled()) {
Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested);
}
} catch (Exception e) {
if (e instanceof HCatException) {
throw (HCatException) e;
} else {
throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e);
}
} finally {
HCatUtil.closeHiveClientQuietly(client);
}
}
Aggregations