use of org.apache.hadoop.hive.metastore.api.AlreadyExistsException in project hive by apache.
the class Hive method loadPartition.
/**
* Load a directory into a Hive Table Partition - Alters existing content of
* the partition with the contents of loadPath. - If the partition does not
* exist - one is created - files in loadPath are moved into Hive. But the
* directory itself is not removed.
*
* @param loadPath
* Directory containing files to load into Table
* @param tbl
* name of table to be loaded.
* @param partSpec
* defines which partition needs to be loaded
* @param loadFileType
* if REPLACE_ALL - replace files in the table,
* otherwise add files to table (KEEP_EXISTING, OVERWRITE_EXISTING)
* @param inheritTableSpecs if true, on [re]creating the partition, take the
* location/inputformat/outputformat/serde details from table spec
* @param isSrcLocal
* If the source directory is LOCAL
* @param isAcidIUDoperation
* true if this is an ACID operation Insert/Update/Delete operation
* @param hasFollowingStatsTask
* true if there is a following task which updates the stats, so, this method need not update.
* @param writeId write ID allocated for the current load operation
* @param stmtId statement ID of the current load statement
* @return Partition object being loaded with data
*/
public Partition loadPartition(Path loadPath, Table tbl, Map<String, String> partSpec, LoadFileType loadFileType, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcidIUDoperation, boolean hasFollowingStatsTask, Long writeId, int stmtId) throws HiveException {
Path tblDataLocationPath = tbl.getDataLocation();
boolean isMmTableWrite = AcidUtils.isInsertOnlyTable(tbl.getParameters());
assert tbl.getPath() != null : "null==getPath() for " + tbl.getTableName();
boolean isFullAcidTable = AcidUtils.isFullAcidTable(tbl);
try {
// Get the partition object if it already exists
Partition oldPart = getPartition(tbl, partSpec, false);
/**
* Move files before creating the partition since down stream processes
* check for existence of partition in metadata before accessing the data.
* If partition is created before data is moved, downstream waiting
* processes might move forward with partial data
*/
Path oldPartPath = (oldPart != null) ? oldPart.getDataLocation() : null;
Path newPartPath = null;
if (inheritTableSpecs) {
Path partPath = new Path(tbl.getDataLocation(), Warehouse.makePartPath(partSpec));
newPartPath = new Path(tblDataLocationPath.toUri().getScheme(), tblDataLocationPath.toUri().getAuthority(), partPath.toUri().getPath());
if (oldPart != null) {
/*
* If we are moving the partition across filesystem boundaries
* inherit from the table properties. Otherwise (same filesystem) use the
* original partition location.
*
* See: HIVE-1707 and HIVE-2117 for background
*/
FileSystem oldPartPathFS = oldPartPath.getFileSystem(getConf());
FileSystem loadPathFS = loadPath.getFileSystem(getConf());
if (FileUtils.equalsFileSystem(oldPartPathFS, loadPathFS)) {
newPartPath = oldPartPath;
}
}
} else {
newPartPath = oldPartPath;
}
List<Path> newFiles = null;
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin("MoveTask", "FileMoves");
// or dynamic partition inserts), the add partition event will capture the list of files added.
if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && (null != oldPart)) {
newFiles = Collections.synchronizedList(new ArrayList<Path>());
}
// Note: this assumes both paths are qualified; which they are, currently.
if (isMmTableWrite && loadPath.equals(newPartPath)) {
// MM insert query, move itself is a no-op.
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("not moving " + loadPath + " to " + newPartPath + " (MM)");
}
assert !isAcidIUDoperation;
if (areEventsForDmlNeeded(tbl, oldPart)) {
newFiles = listFilesCreatedByQuery(loadPath, writeId, stmtId);
}
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("maybe deleting stuff from " + oldPartPath + " (new " + newPartPath + ") for replace");
}
} else {
// Either a non-MM query, or a load into MM table from an external source.
PathFilter filter = FileUtils.HIDDEN_FILES_PATH_FILTER;
Path destPath = newPartPath;
if (isMmTableWrite) {
// We will load into MM directory, and delete from the parent if needed.
// TODO: this looks invalid after ACID integration. What about base dirs?
destPath = new Path(destPath, AcidUtils.deltaSubdir(writeId, writeId, stmtId));
// TODO: loadFileType for MM table will no longer be REPLACE_ALL
filter = (loadFileType == LoadFileType.REPLACE_ALL) ? new JavaUtils.IdPathFilter(writeId, stmtId, false, true) : filter;
} else if (!isAcidIUDoperation && isFullAcidTable) {
destPath = fixFullAcidPathForLoadData(loadFileType, destPath, writeId, stmtId, tbl);
}
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("moving " + loadPath + " to " + destPath);
}
// todo: why is "&& !isAcidIUDoperation" needed here?
if (!isFullAcidTable && ((loadFileType == LoadFileType.REPLACE_ALL) || (oldPart == null && !isAcidIUDoperation))) {
// for fullAcid tables we don't delete files for commands with OVERWRITE - we create a new
// base_x. (there is Insert Overwrite and Load Data Overwrite)
boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge"));
// TODO: this should never run for MM tables anymore. Remove the flag, and maybe the filter?
replaceFiles(tbl.getPath(), loadPath, destPath, oldPartPath, getConf(), isSrcLocal, isAutoPurge, newFiles, filter, isMmTableWrite, !tbl.isTemporary());
} else {
FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
copyFiles(conf, loadPath, destPath, fs, isSrcLocal, isAcidIUDoperation, (loadFileType == LoadFileType.OVERWRITE_EXISTING), newFiles, tbl.getNumBuckets() > 0, isFullAcidTable);
}
}
perfLogger.PerfLogEnd("MoveTask", "FileMoves");
Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath);
alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString());
validatePartition(newTPart);
// When inserting into a new partition, the add partition event takes care of insert event
if ((null != oldPart) && (null != newFiles)) {
fireInsertEvent(tbl, partSpec, (loadFileType == LoadFileType.REPLACE_ALL), newFiles);
} else {
LOG.debug("No new files were created, and is not a replace, or we're inserting into a " + "partition that does not exist yet. Skipping generating INSERT event.");
}
// column stats will be inaccurate
if (!hasFollowingStatsTask) {
StatsSetupConst.clearColumnStatsState(newTPart.getParameters());
}
// recreate the partition if it existed before
if (isSkewedStoreAsSubdir) {
org.apache.hadoop.hive.metastore.api.Partition newCreatedTpart = newTPart.getTPartition();
SkewedInfo skewedInfo = newCreatedTpart.getSd().getSkewedInfo();
/* Construct list bucketing location mappings from sub-directory name. */
Map<List<String>, String> skewedColValueLocationMaps = constructListBucketingLocationMap(newPartPath, skewedInfo);
/* Add list bucketing location mappings. */
skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps);
newCreatedTpart.getSd().setSkewedInfo(skewedInfo);
}
if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE);
}
if (oldPart == null) {
newTPart.getTPartition().setParameters(new HashMap<String, String>());
if (this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setStatsStateForCreateTable(newTPart.getParameters(), MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE);
}
// Note: we are creating a brand new the partition, so this is going to be valid for ACID.
List<FileStatus> filesForStats = null;
if (isFullAcidTable || isMmTableWrite) {
filesForStats = AcidUtils.getAcidFilesForStats(newTPart.getTable(), newPartPath, conf, null);
} else {
filesForStats = HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf));
}
if (filesForStats != null) {
MetaStoreUtils.populateQuickStats(filesForStats, newTPart.getParameters());
} else {
// The ACID state is probably absent. Warning is logged in the get method.
MetaStoreUtils.clearQuickStats(newTPart.getParameters());
}
try {
LOG.debug("Adding new partition " + newTPart.getSpec());
getSynchronizedMSC().add_partition(newTPart.getTPartition());
} catch (AlreadyExistsException aee) {
// With multiple users concurrently issuing insert statements on the same partition has
// a side effect that some queries may not see a partition at the time when they're issued,
// but will realize the partition is actually there when it is trying to add such partition
// to the metastore and thus get AlreadyExistsException, because some earlier query just created it (race condition).
// For example, imagine such a table is created:
// create table T (name char(50)) partitioned by (ds string);
// and the following two queries are launched at the same time, from different sessions:
// insert into table T partition (ds) values ('Bob', 'today'); -- creates the partition 'today'
// insert into table T partition (ds) values ('Joe', 'today'); -- will fail with AlreadyExistsException
// In that case, we want to retry with alterPartition.
LOG.debug("Caught AlreadyExistsException, trying to alter partition instead");
setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
} catch (Exception e) {
try {
final FileSystem newPathFileSystem = newPartPath.getFileSystem(this.getConf());
boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge"));
final FileStatus status = newPathFileSystem.getFileStatus(newPartPath);
Hive.trashFiles(newPathFileSystem, new FileStatus[] { status }, this.getConf(), isAutoPurge);
} catch (IOException io) {
LOG.error("Could not delete partition directory contents after failed partition creation: ", io);
}
throw e;
}
} else {
setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
}
return newTPart;
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (MetaException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (InvalidOperationException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (TException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.metastore.api.AlreadyExistsException in project hive by apache.
the class TestHiveClientCache method testHMSCBreakability.
/**
* Test that a long table name actually breaks the HMSC. Subsequently check that isOpen() reflects
* and tells if the client is broken
*/
@Ignore("hangs indefinitely")
@Test
public void testHMSCBreakability() throws IOException, MetaException, LoginException, TException, AlreadyExistsException, InvalidObjectException, NoSuchObjectException, InterruptedException {
// Setup
LocalMetaServer metaServer = new LocalMetaServer();
metaServer.start();
final HiveClientCache cache = new HiveClientCache(1000);
HiveClientCache.CacheableHiveMetaStoreClient client = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(metaServer.getHiveConf());
assertTrue(client.isOpen());
final String DB_NAME = "test_db";
final String LONG_TABLE_NAME = "long_table_name_" + new BigInteger(200, new Random()).toString(2);
try {
client.dropTable(DB_NAME, LONG_TABLE_NAME);
} catch (Exception e) {
}
try {
client.dropDatabase(DB_NAME);
} catch (Exception e) {
}
client.createDatabase(new Database(DB_NAME, "", null, null));
List<FieldSchema> fields = new ArrayList<FieldSchema>();
fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, ""));
Table tbl = new Table();
tbl.setDbName(DB_NAME);
tbl.setTableName(LONG_TABLE_NAME);
StorageDescriptor sd = new StorageDescriptor();
sd.setCols(fields);
tbl.setSd(sd);
sd.setSerdeInfo(new SerDeInfo());
// Break the client
try {
client.createTable(tbl);
fail("Exception was expected while creating table with long name");
} catch (Exception e) {
}
assertFalse(client.isOpen());
metaServer.shutDown();
}
use of org.apache.hadoop.hive.metastore.api.AlreadyExistsException in project hive by apache.
the class MetaStorePartitionHelper method createPartitionIfNotExists.
/**
* Creates the specified partition if it does not already exist. Does nothing if the table is unpartitioned.
*/
@Override
public void createPartitionIfNotExists(List<String> newPartitionValues) throws WorkerException {
if (newPartitionValues.isEmpty()) {
return;
}
try {
LOG.debug("Attempting to create partition (if not exists) {}.{}:{}", databaseName, tableName, newPartitionValues);
Table table = metaStoreClient.getTable(databaseName, tableName);
Partition partition = new Partition();
partition.setDbName(table.getDbName());
partition.setTableName(table.getTableName());
StorageDescriptor partitionSd = new StorageDescriptor(table.getSd());
partitionSd.setLocation(table.getSd().getLocation() + Path.SEPARATOR + Warehouse.makePartName(table.getPartitionKeys(), newPartitionValues));
partition.setSd(partitionSd);
partition.setValues(newPartitionValues);
metaStoreClient.add_partition(partition);
} catch (AlreadyExistsException e) {
LOG.debug("Partition already exisits: {}.{}:{}", databaseName, tableName, newPartitionValues);
} catch (NoSuchObjectException e) {
LOG.error("Failed to create partition : " + newPartitionValues, e);
throw new PartitionCreationException("Table not found '" + databaseName + "." + tableName + "'.", e);
} catch (TException e) {
LOG.error("Failed to create partition : " + newPartitionValues, e);
throw new PartitionCreationException("Failed to create partition '" + newPartitionValues + "' on table '" + databaseName + "." + tableName + "'", e);
}
}
use of org.apache.hadoop.hive.metastore.api.AlreadyExistsException in project hive by apache.
the class DDLTask method createDatabase.
/**
* Create a Database
* @param db
* @param crtDb
* @return Always returns 0
* @throws HiveException
*/
private int createDatabase(Hive db, CreateDatabaseDesc crtDb) throws HiveException {
Database database = new Database();
database.setName(crtDb.getName());
database.setDescription(crtDb.getComment());
database.setLocationUri(crtDb.getLocationUri());
database.setParameters(crtDb.getDatabaseProperties());
database.setOwnerName(SessionState.getUserFromAuthenticator());
database.setOwnerType(PrincipalType.USER);
try {
makeLocationQualified(database);
db.createDatabase(database, crtDb.getIfNotExists());
} catch (AlreadyExistsException ex) {
// it would be better if AlreadyExistsException had an errorCode field....
throw new HiveException(ex, ErrorMsg.DATABSAE_ALREADY_EXISTS, crtDb.getName());
}
return 0;
}
use of org.apache.hadoop.hive.metastore.api.AlreadyExistsException in project hive by apache.
the class TestHiveMetaStoreChecker method testErrorForMissingPartitionColumn.
/**
* Test if checker throws HiveException when the there is a dummy directory present in the nested level
* of sub-directories
* @throws AlreadyExistsException
* @throws IOException
* @throws HiveException
*/
@Test
public void testErrorForMissingPartitionColumn() throws AlreadyExistsException, IOException, HiveException {
Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0);
// add 10 partitions on the filesystem
createPartitionsDirectoriesOnFS(testTable, 10);
// create a fake directory to throw exception
StringBuilder sb = new StringBuilder(testTable.getDataLocation().toString());
sb.append(Path.SEPARATOR);
sb.append("dummyPart=error");
createDirectory(sb.toString());
// check result now
CheckResult result = new CheckResult();
Exception exception = null;
try {
checker.checkMetastore(dbName, tableName, null, result);
} catch (Exception e) {
exception = e;
}
assertTrue("Expected HiveException", exception != null && exception instanceof HiveException);
createFile(sb.toString(), "dummyFile");
result = new CheckResult();
exception = null;
try {
checker.checkMetastore(dbName, tableName, null, result);
} catch (Exception e) {
exception = e;
}
assertTrue("Expected HiveException", exception != null && exception instanceof HiveException);
}
Aggregations