use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class FileOutputFormatContainer method handleDuplicatePublish.
/**
* Handles duplicate publish of partition or data into an unpartitioned table
* if the table is immutable
*
* For partitioned tables, fails if partition already exists.
* For non partitioned tables, fails if files are present in table directory.
* For dynamic partitioned publish, does nothing - check would need to be done at recordwriter time
* @param context the job
* @param outputInfo the output info
* @param client the metastore client
* @param table the table being written to
* @throws IOException
* @throws org.apache.hadoop.hive.metastore.api.MetaException
* @throws org.apache.thrift.TException
*/
private static void handleDuplicatePublish(JobContext context, OutputJobInfo outputInfo, IMetaStoreClient client, Table table) throws IOException, MetaException, TException, NoSuchObjectException {
if (!table.isImmutable()) {
return;
}
if (table.getPartitionKeys().size() > 0) {
if (!outputInfo.isDynamicPartitioningUsed()) {
List<String> partitionValues = getPartitionValueList(table, outputInfo.getPartitionValues());
// fully-specified partition
List<String> currentParts = client.listPartitionNames(outputInfo.getDatabaseName(), outputInfo.getTableName(), partitionValues, (short) 1);
if (currentParts.size() > 0) {
// not need to check for emptiness to decide to throw an error
throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION);
}
}
} else {
List<String> partitionValues = getPartitionValueList(table, outputInfo.getPartitionValues());
// non-partitioned table
Path tablePath = new Path(table.getTTable().getSd().getLocation());
FileSystem fs = tablePath.getFileSystem(context.getConfiguration());
if (!MetaStoreUtils.isDirEmpty(fs, tablePath)) {
throw new HCatException(ErrorType.ERROR_NON_EMPTY_TABLE, table.getDbName() + "." + table.getTableName());
}
}
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class HCatInputFormatReader method prepareRead.
@Override
public ReaderContext prepareRead() throws HCatException {
try {
Job job = new Job(conf);
HCatInputFormat hcif = HCatInputFormat.setInput(job, re.getDbName(), re.getTableName(), re.getFilterString());
ReaderContextImpl cntxt = new ReaderContextImpl();
cntxt.setInputSplits(hcif.getSplits(ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null)));
cntxt.setConf(job.getConfiguration());
return cntxt;
} catch (IOException e) {
throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
} catch (InterruptedException e) {
throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
}
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class HCatOutputFormatWriter method prepareWrite.
@Override
public WriterContext prepareWrite() throws HCatException {
OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(), we.getTableName(), we.getPartitionKVs());
Job job;
try {
job = new Job(conf);
HCatOutputFormat.setOutput(job, jobInfo);
HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job.getConfiguration()));
HCatOutputFormat outFormat = new HCatOutputFormat();
outFormat.checkOutputSpecs(job);
outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job);
} catch (IOException e) {
throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
} catch (InterruptedException e) {
throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
}
WriterContextImpl cntxt = new WriterContextImpl();
cntxt.setConf(job.getConfiguration());
return cntxt;
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class HarOutputCommitterPostProcessor method makeHar.
/**
* Creates a har file from the contents of a given directory, using that as root.
* @param dir Directory to archive
* @param harFile The HAR file to create
*/
public static void makeHar(JobContext context, String dir, String harFile) throws IOException {
// Configuration conf = context.getConfiguration();
// Credentials creds = context.getCredentials();
// HCatUtil.logAllTokens(LOG,context);
int lastSep = harFile.lastIndexOf(Path.SEPARATOR_CHAR);
Path archivePath = new Path(harFile.substring(0, lastSep));
final String[] args = { "-archiveName", harFile.substring(lastSep + 1, harFile.length()), "-p", dir, "*", archivePath.toString() };
// }
try {
Configuration newConf = new Configuration();
FileSystem fs = archivePath.getFileSystem(newConf);
String hadoopTokenFileLocationEnvSetting = System.getenv(HCatConstants.SYSENV_HADOOP_TOKEN_FILE_LOCATION);
if ((hadoopTokenFileLocationEnvSetting != null) && (!hadoopTokenFileLocationEnvSetting.isEmpty())) {
newConf.set(HCatConstants.CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocationEnvSetting);
// LOG.info("System.getenv(\"HADOOP_TOKEN_FILE_LOCATION\") =["+ System.getenv("HADOOP_TOKEN_FILE_LOCATION")+"]");
}
// for (FileStatus ds : fs.globStatus(new Path(dir, "*"))){
// LOG.info("src : "+ds.getPath().toUri().toString());
// }
final HadoopArchives har = new HadoopArchives(newConf);
int rc = ToolRunner.run(har, args);
if (rc != 0) {
throw new Exception("Har returned error code " + rc);
}
// for (FileStatus hs : fs.globStatus(new Path(harFile, "*"))){
// LOG.info("dest : "+hs.getPath().toUri().toString());
// }
// doHarCheck(fs,harFile);
// LOG.info("Nuking " + dir);
fs.delete(new Path(dir), true);
} catch (Exception e) {
throw new HCatException("Error creating Har [" + harFile + "] from [" + dir + "]", e);
}
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class HCatOutputFormat method setOutput.
/**
* Set the information about the output to write for the job. This queries the metadata server
* to find the StorageHandler to use for the table. It throws an error if the
* partition is already published.
* @param conf the Configuration object
* @param credentials the Credentials object
* @param outputJobInfo the table output information for the job
* @throws IOException the exception in communicating with the metadata server
*/
@SuppressWarnings("unchecked")
public static void setOutput(Configuration conf, Credentials credentials, OutputJobInfo outputJobInfo) throws IOException {
IMetaStoreClient client = null;
try {
HiveConf hiveConf = HCatUtil.getHiveConf(conf);
client = HCatUtil.getHiveMetastoreClient(hiveConf);
Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), outputJobInfo.getTableName());
List<String> indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE);
for (String indexName : indexList) {
Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName);
if (!index.isDeferredRebuild()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported");
}
}
StorageDescriptor sd = table.getTTable().getSd();
if (sd.isCompressed()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported");
}
if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported");
}
if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported");
}
// Set up a common id hash for this job, so that when we create any temporary directory
// later on, it is guaranteed to be unique.
String idHash;
if ((idHash = conf.get(HCatConstants.HCAT_OUTPUT_ID_HASH)) == null) {
idHash = String.valueOf(Math.random());
}
conf.set(HCatConstants.HCAT_OUTPUT_ID_HASH, idHash);
if (table.getTTable().getPartitionKeysSize() == 0) {
if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) {
// attempt made to save partition values in non-partitioned table - throw error.
throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Partition values specified for non-partitioned table");
}
// non-partitioned table
outputJobInfo.setPartitionValues(new HashMap<String, String>());
} else {
// partitioned table, we expect partition values
// convert user specified map to have lower case key names
Map<String, String> valueMap = new HashMap<String, String>();
if (outputJobInfo.getPartitionValues() != null) {
for (Map.Entry<String, String> entry : outputJobInfo.getPartitionValues().entrySet()) {
valueMap.put(entry.getKey().toLowerCase(), entry.getValue());
}
}
if ((outputJobInfo.getPartitionValues() == null) || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) {
// dynamic partition usecase - partition values were null, or not all were specified
// need to figure out which keys are not specified.
List<String> dynamicPartitioningKeys = new ArrayList<String>();
boolean firstItem = true;
for (FieldSchema fs : table.getPartitionKeys()) {
if (!valueMap.containsKey(fs.getName().toLowerCase())) {
dynamicPartitioningKeys.add(fs.getName().toLowerCase());
}
}
if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) {
// If this isn't equal, then bogus key values have been inserted, error out.
throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified");
}
outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys);
String dynHash;
if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) {
dynHash = String.valueOf(Math.random());
}
conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash);
// if custom pattern is set in case of dynamic partitioning, configure custom path
String customPattern = conf.get(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN);
if (customPattern != null) {
HCatFileUtil.setCustomPath(customPattern, outputJobInfo);
}
}
outputJobInfo.setPartitionValues(valueMap);
}
// To get around hbase failure on single node, see BUG-4383
conf.set("dfs.client.read.shortcircuit", "false");
HCatSchema tableSchema = HCatUtil.extractSchema(table);
StorerInfo storerInfo = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters());
List<String> partitionCols = new ArrayList<String>();
for (FieldSchema schema : table.getPartitionKeys()) {
partitionCols.add(schema.getName());
}
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo);
//Serialize the output info into the configuration
outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
outputJobInfo.setOutputSchema(tableSchema);
harRequested = getHarRequested(hiveConf);
outputJobInfo.setHarRequested(harRequested);
maxDynamicPartitions = getMaxDynamicPartitions(hiveConf);
outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions);
HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo);
Path tblPath = new Path(table.getTTable().getSd().getLocation());
/* Set the umask in conf such that files/dirs get created with table-dir
* permissions. Following three assumptions are made:
* 1. Actual files/dirs creation is done by RecordWriter of underlying
* output format. It is assumed that they use default permissions while creation.
* 2. Default Permissions = FsPermission.getDefault() = 777.
* 3. UMask is honored by underlying filesystem.
*/
FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask(tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission()));
if (Security.getInstance().isSecurityEnabled()) {
Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested);
}
} catch (Exception e) {
if (e instanceof HCatException) {
throw (HCatException) e;
} else {
throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e);
}
} finally {
HCatUtil.closeHiveClientQuietly(client);
}
}
Aggregations