use of org.apache.hadoop.hive.ql.metadata.HiveStorageHandler in project hive by apache.
the class HCatBaseInputFormat method getSplits.
/**
* Logically split the set of input files for the job. Returns the
* underlying InputFormat's splits
* @param jobContext the job context object
* @return the splits, an HCatInputSplit wrapper over the storage
* handler InputSplits
* @throws IOException or InterruptedException
*/
@Override
public List<InputSplit> getSplits(JobContext jobContext) throws IOException, InterruptedException {
Configuration conf = jobContext.getConfiguration();
//Get the job info from the configuration,
//throws exception if not initialized
InputJobInfo inputJobInfo;
try {
inputJobInfo = getJobInfo(conf);
} catch (Exception e) {
throw new IOException(e);
}
List<InputSplit> splits = new ArrayList<InputSplit>();
List<PartInfo> partitionInfoList = inputJobInfo.getPartitions();
if (partitionInfoList == null) {
//No partitions match the specified partition filter
return splits;
}
HiveStorageHandler storageHandler;
JobConf jobConf;
//For each matching partition, call getSplits on the underlying InputFormat
for (PartInfo partitionInfo : partitionInfoList) {
jobConf = HCatUtil.getJobConfFromContext(jobContext);
List<String> setInputPath = setInputPath(jobConf, partitionInfo.getLocation());
if (setInputPath.isEmpty()) {
continue;
}
Map<String, String> jobProperties = partitionInfo.getJobProperties();
HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf);
storageHandler = HCatUtil.getStorageHandler(jobConf, partitionInfo);
//Get the input format
Class inputFormatClass = storageHandler.getInputFormatClass();
org.apache.hadoop.mapred.InputFormat inputFormat = getMapRedInputFormat(jobConf, inputFormatClass);
//Call getSplit on the InputFormat, create an HCatSplit for each
//underlying split. When the desired number of input splits is missing,
//use a default number (denoted by zero).
//TODO(malewicz): Currently each partition is split independently into
//a desired number. However, we want the union of all partitions to be
//split into a desired number while maintaining balanced sizes of input
//splits.
int desiredNumSplits = conf.getInt(HCatConstants.HCAT_DESIRED_PARTITION_NUM_SPLITS, 0);
org.apache.hadoop.mapred.InputSplit[] baseSplits = inputFormat.getSplits(jobConf, desiredNumSplits);
for (org.apache.hadoop.mapred.InputSplit split : baseSplits) {
splits.add(new HCatSplit(partitionInfo, split));
}
}
return splits;
}
use of org.apache.hadoop.hive.ql.metadata.HiveStorageHandler in project hive by apache.
the class HCatBaseOutputFormat method getOutputFormat.
/**
* Gets the output format instance.
* @param context the job context
* @return the output format instance
* @throws IOException
*/
protected OutputFormat<WritableComparable<?>, HCatRecord> getOutputFormat(JobContext context) throws IOException {
OutputJobInfo jobInfo = getJobInfo(context.getConfiguration());
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo());
// Always configure storage handler with jobproperties/jobconf before calling any methods on it
configureOutputStorageHandler(context);
if (storageHandler instanceof FosterStorageHandler) {
return new FileOutputFormatContainer(ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), context.getConfiguration()));
} else {
return new DefaultOutputFormatContainer(ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), context.getConfiguration()));
}
}
use of org.apache.hadoop.hive.ql.metadata.HiveStorageHandler in project hive by apache.
the class HCatOutputFormat method setOutput.
/**
* Set the information about the output to write for the job. This queries the metadata server
* to find the StorageHandler to use for the table. It throws an error if the
* partition is already published.
* @param conf the Configuration object
* @param credentials the Credentials object
* @param outputJobInfo the table output information for the job
* @throws IOException the exception in communicating with the metadata server
*/
@SuppressWarnings("unchecked")
public static void setOutput(Configuration conf, Credentials credentials, OutputJobInfo outputJobInfo) throws IOException {
IMetaStoreClient client = null;
try {
HiveConf hiveConf = HCatUtil.getHiveConf(conf);
client = HCatUtil.getHiveMetastoreClient(hiveConf);
Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), outputJobInfo.getTableName());
List<String> indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE);
for (String indexName : indexList) {
Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName);
if (!index.isDeferredRebuild()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported");
}
}
StorageDescriptor sd = table.getTTable().getSd();
if (sd.isCompressed()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported");
}
if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported");
}
if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported");
}
// Set up a common id hash for this job, so that when we create any temporary directory
// later on, it is guaranteed to be unique.
String idHash;
if ((idHash = conf.get(HCatConstants.HCAT_OUTPUT_ID_HASH)) == null) {
idHash = String.valueOf(Math.random());
}
conf.set(HCatConstants.HCAT_OUTPUT_ID_HASH, idHash);
if (table.getTTable().getPartitionKeysSize() == 0) {
if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) {
// attempt made to save partition values in non-partitioned table - throw error.
throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Partition values specified for non-partitioned table");
}
// non-partitioned table
outputJobInfo.setPartitionValues(new HashMap<String, String>());
} else {
// partitioned table, we expect partition values
// convert user specified map to have lower case key names
Map<String, String> valueMap = new HashMap<String, String>();
if (outputJobInfo.getPartitionValues() != null) {
for (Map.Entry<String, String> entry : outputJobInfo.getPartitionValues().entrySet()) {
valueMap.put(entry.getKey().toLowerCase(), entry.getValue());
}
}
if ((outputJobInfo.getPartitionValues() == null) || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) {
// dynamic partition usecase - partition values were null, or not all were specified
// need to figure out which keys are not specified.
List<String> dynamicPartitioningKeys = new ArrayList<String>();
boolean firstItem = true;
for (FieldSchema fs : table.getPartitionKeys()) {
if (!valueMap.containsKey(fs.getName().toLowerCase())) {
dynamicPartitioningKeys.add(fs.getName().toLowerCase());
}
}
if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) {
// If this isn't equal, then bogus key values have been inserted, error out.
throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified");
}
outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys);
String dynHash;
if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) {
dynHash = String.valueOf(Math.random());
}
conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash);
// if custom pattern is set in case of dynamic partitioning, configure custom path
String customPattern = conf.get(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN);
if (customPattern != null) {
HCatFileUtil.setCustomPath(customPattern, outputJobInfo);
}
}
outputJobInfo.setPartitionValues(valueMap);
}
// To get around hbase failure on single node, see BUG-4383
conf.set("dfs.client.read.shortcircuit", "false");
HCatSchema tableSchema = HCatUtil.extractSchema(table);
StorerInfo storerInfo = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters());
List<String> partitionCols = new ArrayList<String>();
for (FieldSchema schema : table.getPartitionKeys()) {
partitionCols.add(schema.getName());
}
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo);
//Serialize the output info into the configuration
outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
outputJobInfo.setOutputSchema(tableSchema);
harRequested = getHarRequested(hiveConf);
outputJobInfo.setHarRequested(harRequested);
maxDynamicPartitions = getMaxDynamicPartitions(hiveConf);
outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions);
HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo);
Path tblPath = new Path(table.getTTable().getSd().getLocation());
/* Set the umask in conf such that files/dirs get created with table-dir
* permissions. Following three assumptions are made:
* 1. Actual files/dirs creation is done by RecordWriter of underlying
* output format. It is assumed that they use default permissions while creation.
* 2. Default Permissions = FsPermission.getDefault() = 777.
* 3. UMask is honored by underlying filesystem.
*/
FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask(tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission()));
if (Security.getInstance().isSecurityEnabled()) {
Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested);
}
} catch (Exception e) {
if (e instanceof HCatException) {
throw (HCatException) e;
} else {
throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e);
}
} finally {
HCatUtil.closeHiveClientQuietly(client);
}
}
use of org.apache.hadoop.hive.ql.metadata.HiveStorageHandler in project hive by apache.
the class CreateTableHook method postAnalyze.
@Override
public void postAnalyze(HiveSemanticAnalyzerHookContext context, List<Task<? extends Serializable>> rootTasks) throws SemanticException {
if (rootTasks.size() == 0) {
// NOT EXISTS
return;
}
CreateTableDesc desc = ((DDLTask) rootTasks.get(rootTasks.size() - 1)).getWork().getCreateTblDesc();
if (desc == null) {
// pre-hook. So, desc can never be null.
return;
}
Map<String, String> tblProps = desc.getTblProps();
if (tblProps == null) {
// tblProps will be null if user didnt use tblprops in his CREATE
// TABLE cmd.
tblProps = new HashMap<String, String>();
}
// first check if we will allow the user to create table.
String storageHandler = desc.getStorageHandler();
if (StringUtils.isEmpty(storageHandler)) {
} else {
try {
HiveStorageHandler storageHandlerInst = HCatUtil.getStorageHandler(context.getConf(), desc.getStorageHandler(), desc.getSerName(), desc.getInputFormat(), desc.getOutputFormat());
//Authorization checks are performed by the storageHandler.getAuthorizationProvider(), if
//StorageDelegationAuthorizationProvider is used.
} catch (IOException e) {
throw new SemanticException(e);
}
}
if (desc != null) {
try {
Table table = context.getHive().newTable(desc.getTableName());
if (desc.getLocation() != null) {
table.setDataLocation(new Path(desc.getLocation()));
}
if (desc.getStorageHandler() != null) {
table.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, desc.getStorageHandler());
}
for (Map.Entry<String, String> prop : tblProps.entrySet()) {
table.setProperty(prop.getKey(), prop.getValue());
}
for (Map.Entry<String, String> prop : desc.getSerdeProps().entrySet()) {
table.setSerdeParam(prop.getKey(), prop.getValue());
}
if (HCatAuthUtil.isAuthorizationEnabled(context.getConf())) {
authorize(table, Privilege.CREATE);
}
} catch (HiveException ex) {
throw new SemanticException(ex);
}
}
desc.setTblProps(tblProps);
context.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, tableName);
}
use of org.apache.hadoop.hive.ql.metadata.HiveStorageHandler in project hive by apache.
the class InitializeInput method extractPartInfo.
private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, Map<String, String> parameters, Configuration conf, InputJobInfo inputJobInfo) throws IOException {
StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters);
Properties hcatProperties = new Properties();
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo);
// copy the properties from storageHandler to jobProperties
Map<String, String> jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo);
for (String key : parameters.keySet()) {
hcatProperties.put(key, parameters.get(key));
}
// Bloating partinfo with inputJobInfo is not good
return new PartInfo(schema, storageHandler, sd.getLocation(), hcatProperties, jobProperties, inputJobInfo.getTableInfo());
}
Aggregations