use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class HCatPartition method toHivePartition.
// For use from HCatClient.addPartitions(), to construct from user-input.
Partition toHivePartition() throws HCatException {
Partition hivePtn = new Partition();
hivePtn.setDbName(dbName);
hivePtn.setTableName(tableName);
hivePtn.setValues(values);
hivePtn.setParameters(parameters);
if (sd.getLocation() == null) {
LOG.warn("Partition location is not set! Attempting to construct default partition location.");
try {
String partName = Warehouse.makePartName(HCatSchemaUtils.getFieldSchemas(hcatTable.getPartCols()), values);
sd.setLocation(new Path(hcatTable.getSd().getLocation(), partName).toString());
} catch (MetaException exception) {
throw new HCatException("Could not construct default partition-path for " + hcatTable.getDbName() + "." + hcatTable.getTableName() + "[" + values + "]");
}
}
hivePtn.setSd(sd);
hivePtn.setCreateTime((int) (System.currentTimeMillis() / 1000));
hivePtn.setLastAccessTimeIsSet(false);
return hivePtn;
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class HCatTable method toHiveTable.
Table toHiveTable() throws HCatException {
Table newTable = new Table();
newTable.setDbName(dbName);
newTable.setTableName(tableName);
if (tblProps != null) {
newTable.setParameters(tblProps);
}
if (isExternal) {
newTable.putToParameters("EXTERNAL", "TRUE");
newTable.setTableType(TableType.EXTERNAL_TABLE.toString());
} else {
newTable.setTableType(TableType.MANAGED_TABLE.toString());
}
if (StringUtils.isNotBlank(this.comment)) {
newTable.putToParameters("comment", comment);
}
newTable.setSd(sd);
if (partCols != null) {
ArrayList<FieldSchema> hivePtnCols = new ArrayList<FieldSchema>();
for (HCatFieldSchema fs : partCols) {
hivePtnCols.add(HCatSchemaUtils.getFieldSchema(fs));
}
newTable.setPartitionKeys(hivePtnCols);
}
newTable.setCreateTime((int) (System.currentTimeMillis() / 1000));
newTable.setLastAccessTimeIsSet(false);
try {
// TODO: Verify that this works for systems using UGI.doAs() (e.g. Oozie).
newTable.setOwner(owner == null ? getConf().getUser() : owner);
} catch (Exception exception) {
throw new HCatException("Unable to determine owner of table (" + dbName + "." + tableName + ") from HiveConf.");
}
return newTable;
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class MetadataJSONSerializer method deserializePartitionSpec.
@Override
public HCatPartitionSpec deserializePartitionSpec(List<String> hcatPartitionSpecStrings) throws HCatException {
try {
List<PartitionSpec> partitionSpecList = new ArrayList<PartitionSpec>();
TDeserializer deserializer = new TDeserializer(new TJSONProtocol.Factory());
for (String stringRep : hcatPartitionSpecStrings) {
PartitionSpec partSpec = new PartitionSpec();
deserializer.deserialize(partSpec, stringRep, "UTF-8");
partitionSpecList.add(partSpec);
}
return new HCatPartitionSpec(null, PartitionSpecProxy.Factory.get(partitionSpecList));
} catch (TException deserializationException) {
throw new HCatException("Failed to deserialize!", deserializationException);
}
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class FileOutputCommitterContainer method getFinalPath.
/**
* Find the final name of a given output file, given the output directory
* and the work directory. If immutable, attempt to create file of name
* _aN till we find an item that does not exist.
* @param file the file to move
* @param src the source directory
* @param dest the target directory
* @return the final path for the specific output file
* @throws java.io.IOException
*/
private Path getFinalPath(FileSystem fs, Path file, Path src, Path dest, final boolean immutable) throws IOException {
URI taskOutputUri = file.toUri();
URI relativePath = src.toUri().relativize(taskOutputUri);
if (taskOutputUri == relativePath) {
throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Can not get the relative path: base = " + src + " child = " + file);
}
if (relativePath.getPath().length() > 0) {
Path itemDest = new Path(dest, relativePath.getPath());
if (!immutable) {
String name = relativePath.getPath();
String filetype;
int index = name.lastIndexOf('.');
if (index >= 0) {
filetype = name.substring(index);
name = name.substring(0, index);
} else {
filetype = "";
}
// Attempt to find maxAppendAttempts possible alternatives to a filename by
// appending _a_N and seeing if that destination also clashes. If we're
// still clashing after that, give up.
int counter = 1;
for (; fs.exists(itemDest) && counter < maxAppendAttempts; counter++) {
itemDest = new Path(dest, name + (APPEND_SUFFIX + counter) + filetype);
}
if (counter == maxAppendAttempts) {
throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Could not find a unique destination path for move: file = " + file + " , src = " + src + ", dest = " + dest);
} else if (counter > APPEND_COUNTER_WARN_THRESHOLD) {
LOG.warn("Append job used filename clash counter [" + counter + "] which is greater than warning limit [" + APPEND_COUNTER_WARN_THRESHOLD + "]. Please compact this table so that performance is not impacted." + " Please see HIVE-9381 for details.");
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("FinalPath(file:" + file + ":" + src + "->" + dest + "=" + itemDest);
}
return itemDest;
} else {
return dest;
}
}
use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.
the class FileOutputCommitterContainer method registerPartitions.
private void registerPartitions(JobContext context) throws IOException {
if (dynamicPartitioningUsed) {
discoverPartitions(context);
}
OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration());
Configuration conf = context.getConfiguration();
Table table = new Table(jobInfo.getTableInfo().getTable());
Path tblPath = new Path(table.getTTable().getSd().getLocation());
FileSystem fs = tblPath.getFileSystem(conf);
IMetaStoreClient client = null;
HCatTableInfo tableInfo = jobInfo.getTableInfo();
List<Partition> partitionsAdded = new ArrayList<Partition>();
try {
HiveConf hiveConf = HCatUtil.getHiveConf(conf);
client = HCatUtil.getHiveMetastoreClient(hiveConf);
if (table.getPartitionKeys().size() == 0) {
// Move data from temp directory the actual table directory
// No metastore operation required.
Path src = new Path(jobInfo.getLocation());
moveTaskOutputs(fs, src, src, tblPath, false, table.isImmutable());
if (!src.equals(tblPath)) {
fs.delete(src, true);
}
if (table.getParameters() != null && table.getParameters().containsKey(StatsSetupConst.COLUMN_STATS_ACCURATE)) {
table.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE);
client.alter_table(table.getDbName(), table.getTableName(), table.getTTable());
}
return;
}
StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters());
FileStatus tblStat = fs.getFileStatus(tblPath);
String grpName = tblStat.getGroup();
FsPermission perms = tblStat.getPermission();
List<Partition> partitionsToAdd = new ArrayList<Partition>();
if (!dynamicPartitioningUsed) {
partitionsToAdd.add(constructPartition(context, jobInfo, tblPath.toString(), null, jobInfo.getPartitionValues(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table, fs, grpName, perms));
} else {
for (Entry<String, Map<String, String>> entry : partitionsDiscoveredByPath.entrySet()) {
partitionsToAdd.add(constructPartition(context, jobInfo, getPartitionRootLocation(entry.getKey(), entry.getValue().size()), entry.getKey(), entry.getValue(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table, fs, grpName, perms));
}
}
ArrayList<Map<String, String>> ptnInfos = new ArrayList<Map<String, String>>();
for (Partition ptn : partitionsToAdd) {
ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn));
}
//Publish the new partition(s)
if (dynamicPartitioningUsed && harProcessor.isEnabled() && (!partitionsToAdd.isEmpty())) {
if (!customDynamicLocationUsed) {
Path src = new Path(ptnRootLocation);
// check here for each dir we're copying out, to see if it
// already exists, error out if so.
// Also, treat dyn-writes as writes to immutable tables.
// dryRun = true, immutable = true
moveTaskOutputs(fs, src, src, tblPath, true, true);
moveTaskOutputs(fs, src, src, tblPath, false, true);
if (!src.equals(tblPath)) {
fs.delete(src, true);
}
} else {
moveCustomLocationTaskOutputs(fs, table, hiveConf);
}
try {
updateTableSchema(client, table, jobInfo.getOutputSchema());
LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos);
client.add_partitions(partitionsToAdd);
partitionsAdded = partitionsToAdd;
} catch (Exception e) {
// There was an error adding partitions : rollback fs copy and rethrow
for (Partition p : partitionsToAdd) {
Path ptnPath = new Path(harProcessor.getParentFSPath(new Path(p.getSd().getLocation())));
if (fs.exists(ptnPath)) {
fs.delete(ptnPath, true);
}
}
throw e;
}
} else {
// no harProcessor, regular operation
updateTableSchema(client, table, jobInfo.getOutputSchema());
LOG.info("HAR not is not being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos);
if (partitionsToAdd.size() > 0) {
if (!dynamicPartitioningUsed) {
//Move data from temp directory the actual table directory
if (partitionsToAdd.size() > 1) {
throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, "More than one partition to publish in non-dynamic partitioning job");
}
Partition p = partitionsToAdd.get(0);
Path src = new Path(jobInfo.getLocation());
Path dest = new Path(p.getSd().getLocation());
moveTaskOutputs(fs, src, src, dest, true, table.isImmutable());
moveTaskOutputs(fs, src, src, dest, false, table.isImmutable());
if (!src.equals(dest)) {
if (src.toString().matches(".*" + Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+.*")) {
// src is scratch directory, need to trim the part key value pairs from path
String diff = StringUtils.difference(src.toString(), dest.toString());
fs.delete(new Path(StringUtils.substringBefore(src.toString(), diff)), true);
} else {
fs.delete(src, true);
}
}
// Now, we check if the partition already exists. If not, we go ahead.
// If so, we error out if immutable, and if mutable, check that the partition's IF
// matches our current job's IF (table's IF) to check for compatibility. If compatible, we
// ignore and do not add. If incompatible, we error out again.
boolean publishRequired = false;
try {
Partition existingP = client.getPartition(p.getDbName(), p.getTableName(), p.getValues());
if (existingP != null) {
if (table.isImmutable()) {
throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION, "Attempted duplicate partition publish on to immutable table");
} else {
if (!existingP.getSd().getInputFormat().equals(table.getInputFormatClass().getName())) {
throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, "Attempted partition append, where old partition format was " + existingP.getSd().getInputFormat() + " and table format was " + table.getInputFormatClass().getName());
}
}
} else {
publishRequired = true;
}
} catch (NoSuchObjectException e) {
// All good, no such partition exists, move on.
publishRequired = true;
}
if (publishRequired) {
client.add_partitions(partitionsToAdd);
partitionsAdded = partitionsToAdd;
}
} else {
// Dynamic partitioning usecase
if (!customDynamicLocationUsed) {
Path src = new Path(ptnRootLocation);
// dryRun = true, immutable = true
moveTaskOutputs(fs, src, src, tblPath, true, true);
moveTaskOutputs(fs, src, src, tblPath, false, true);
if (!src.equals(tblPath)) {
fs.delete(src, true);
}
} else {
moveCustomLocationTaskOutputs(fs, table, hiveConf);
}
client.add_partitions(partitionsToAdd);
partitionsAdded = partitionsToAdd;
}
}
// so as to have their permissions mimic the table permissions
for (Partition p : partitionsAdded) {
applyGroupAndPerms(fs, new Path(p.getSd().getLocation()), tblStat.getPermission(), tblStat.getGroup(), true);
}
}
} catch (Exception e) {
if (partitionsAdded.size() > 0) {
try {
// metastore
for (Partition p : partitionsAdded) {
client.dropPartition(tableInfo.getDatabaseName(), tableInfo.getTableName(), p.getValues(), true);
}
} catch (Exception te) {
// Keep cause as the original exception
throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e);
}
}
if (e instanceof HCatException) {
throw (HCatException) e;
} else {
throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e);
}
} finally {
HCatUtil.closeHiveClientQuietly(client);
}
}
Aggregations