Search in sources :

Example 16 with AlreadyExistsException

use of org.apache.hadoop.hive.metastore.api.AlreadyExistsException in project hive by apache.

the class Hive method loadPartition.

/**
 * Load a directory into a Hive Table Partition - Alters existing content of
 * the partition with the contents of loadPath. - If the partition does not
 * exist - one is created - files in loadPath are moved into Hive. But the
 * directory itself is not removed.
 *
 * @param loadPath
 *          Directory containing files to load into Table
 * @param  tbl
 *          name of table to be loaded.
 * @param partSpec
 *          defines which partition needs to be loaded
 * @param loadFileType
 *          if REPLACE_ALL - replace files in the table,
 *          otherwise add files to table (KEEP_EXISTING, OVERWRITE_EXISTING)
 * @param inheritTableSpecs if true, on [re]creating the partition, take the
 *          location/inputformat/outputformat/serde details from table spec
 * @param isSrcLocal
 *          If the source directory is LOCAL
 * @param isAcidIUDoperation
 *          true if this is an ACID operation Insert/Update/Delete operation
 * @param hasFollowingStatsTask
 *          true if there is a following task which updates the stats, so, this method need not update.
 * @param writeId write ID allocated for the current load operation
 * @param stmtId statement ID of the current load statement
 * @return Partition object being loaded with data
 */
public Partition loadPartition(Path loadPath, Table tbl, Map<String, String> partSpec, LoadFileType loadFileType, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcidIUDoperation, boolean hasFollowingStatsTask, Long writeId, int stmtId) throws HiveException {
    Path tblDataLocationPath = tbl.getDataLocation();
    boolean isMmTableWrite = AcidUtils.isInsertOnlyTable(tbl.getParameters());
    assert tbl.getPath() != null : "null==getPath() for " + tbl.getTableName();
    boolean isFullAcidTable = AcidUtils.isFullAcidTable(tbl);
    try {
        // Get the partition object if it already exists
        Partition oldPart = getPartition(tbl, partSpec, false);
        /**
         * Move files before creating the partition since down stream processes
         * check for existence of partition in metadata before accessing the data.
         * If partition is created before data is moved, downstream waiting
         * processes might move forward with partial data
         */
        Path oldPartPath = (oldPart != null) ? oldPart.getDataLocation() : null;
        Path newPartPath = null;
        if (inheritTableSpecs) {
            Path partPath = new Path(tbl.getDataLocation(), Warehouse.makePartPath(partSpec));
            newPartPath = new Path(tblDataLocationPath.toUri().getScheme(), tblDataLocationPath.toUri().getAuthority(), partPath.toUri().getPath());
            if (oldPart != null) {
                /*
           * If we are moving the partition across filesystem boundaries
           * inherit from the table properties. Otherwise (same filesystem) use the
           * original partition location.
           *
           * See: HIVE-1707 and HIVE-2117 for background
           */
                FileSystem oldPartPathFS = oldPartPath.getFileSystem(getConf());
                FileSystem loadPathFS = loadPath.getFileSystem(getConf());
                if (FileUtils.equalsFileSystem(oldPartPathFS, loadPathFS)) {
                    newPartPath = oldPartPath;
                }
            }
        } else {
            newPartPath = oldPartPath;
        }
        List<Path> newFiles = null;
        PerfLogger perfLogger = SessionState.getPerfLogger();
        perfLogger.PerfLogBegin("MoveTask", "FileMoves");
        // or dynamic partition inserts), the add partition event will capture the list of files added.
        if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && (null != oldPart)) {
            newFiles = Collections.synchronizedList(new ArrayList<Path>());
        }
        // Note: this assumes both paths are qualified; which they are, currently.
        if (isMmTableWrite && loadPath.equals(newPartPath)) {
            // MM insert query, move itself is a no-op.
            if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                Utilities.FILE_OP_LOGGER.trace("not moving " + loadPath + " to " + newPartPath + " (MM)");
            }
            assert !isAcidIUDoperation;
            if (areEventsForDmlNeeded(tbl, oldPart)) {
                newFiles = listFilesCreatedByQuery(loadPath, writeId, stmtId);
            }
            if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                Utilities.FILE_OP_LOGGER.trace("maybe deleting stuff from " + oldPartPath + " (new " + newPartPath + ") for replace");
            }
        } else {
            // Either a non-MM query, or a load into MM table from an external source.
            PathFilter filter = FileUtils.HIDDEN_FILES_PATH_FILTER;
            Path destPath = newPartPath;
            if (isMmTableWrite) {
                // We will load into MM directory, and delete from the parent if needed.
                // TODO: this looks invalid after ACID integration. What about base dirs?
                destPath = new Path(destPath, AcidUtils.deltaSubdir(writeId, writeId, stmtId));
                // TODO: loadFileType for MM table will no longer be REPLACE_ALL
                filter = (loadFileType == LoadFileType.REPLACE_ALL) ? new JavaUtils.IdPathFilter(writeId, stmtId, false, true) : filter;
            } else if (!isAcidIUDoperation && isFullAcidTable) {
                destPath = fixFullAcidPathForLoadData(loadFileType, destPath, writeId, stmtId, tbl);
            }
            if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                Utilities.FILE_OP_LOGGER.trace("moving " + loadPath + " to " + destPath);
            }
            // todo: why is "&& !isAcidIUDoperation" needed here?
            if (!isFullAcidTable && ((loadFileType == LoadFileType.REPLACE_ALL) || (oldPart == null && !isAcidIUDoperation))) {
                // for fullAcid tables we don't delete files for commands with OVERWRITE - we create a new
                // base_x.  (there is Insert Overwrite and Load Data Overwrite)
                boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge"));
                // TODO: this should never run for MM tables anymore. Remove the flag, and maybe the filter?
                replaceFiles(tbl.getPath(), loadPath, destPath, oldPartPath, getConf(), isSrcLocal, isAutoPurge, newFiles, filter, isMmTableWrite, !tbl.isTemporary());
            } else {
                FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
                copyFiles(conf, loadPath, destPath, fs, isSrcLocal, isAcidIUDoperation, (loadFileType == LoadFileType.OVERWRITE_EXISTING), newFiles, tbl.getNumBuckets() > 0, isFullAcidTable);
            }
        }
        perfLogger.PerfLogEnd("MoveTask", "FileMoves");
        Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath);
        alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString());
        validatePartition(newTPart);
        // When inserting into a new partition, the add partition event takes care of insert event
        if ((null != oldPart) && (null != newFiles)) {
            fireInsertEvent(tbl, partSpec, (loadFileType == LoadFileType.REPLACE_ALL), newFiles);
        } else {
            LOG.debug("No new files were created, and is not a replace, or we're inserting into a " + "partition that does not exist yet. Skipping generating INSERT event.");
        }
        // column stats will be inaccurate
        if (!hasFollowingStatsTask) {
            StatsSetupConst.clearColumnStatsState(newTPart.getParameters());
        }
        // recreate the partition if it existed before
        if (isSkewedStoreAsSubdir) {
            org.apache.hadoop.hive.metastore.api.Partition newCreatedTpart = newTPart.getTPartition();
            SkewedInfo skewedInfo = newCreatedTpart.getSd().getSkewedInfo();
            /* Construct list bucketing location mappings from sub-directory name. */
            Map<List<String>, String> skewedColValueLocationMaps = constructListBucketingLocationMap(newPartPath, skewedInfo);
            /* Add list bucketing location mappings. */
            skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps);
            newCreatedTpart.getSd().setSkewedInfo(skewedInfo);
        }
        if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
            StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE);
        }
        if (oldPart == null) {
            newTPart.getTPartition().setParameters(new HashMap<String, String>());
            if (this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
                StatsSetupConst.setStatsStateForCreateTable(newTPart.getParameters(), MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE);
            }
            // Note: we are creating a brand new the partition, so this is going to be valid for ACID.
            List<FileStatus> filesForStats = null;
            if (isFullAcidTable || isMmTableWrite) {
                filesForStats = AcidUtils.getAcidFilesForStats(newTPart.getTable(), newPartPath, conf, null);
            } else {
                filesForStats = HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf));
            }
            if (filesForStats != null) {
                MetaStoreUtils.populateQuickStats(filesForStats, newTPart.getParameters());
            } else {
                // The ACID state is probably absent. Warning is logged in the get method.
                MetaStoreUtils.clearQuickStats(newTPart.getParameters());
            }
            try {
                LOG.debug("Adding new partition " + newTPart.getSpec());
                getSynchronizedMSC().add_partition(newTPart.getTPartition());
            } catch (AlreadyExistsException aee) {
                // With multiple users concurrently issuing insert statements on the same partition has
                // a side effect that some queries may not see a partition at the time when they're issued,
                // but will realize the partition is actually there when it is trying to add such partition
                // to the metastore and thus get AlreadyExistsException, because some earlier query just created it (race condition).
                // For example, imagine such a table is created:
                // create table T (name char(50)) partitioned by (ds string);
                // and the following two queries are launched at the same time, from different sessions:
                // insert into table T partition (ds) values ('Bob', 'today'); -- creates the partition 'today'
                // insert into table T partition (ds) values ('Joe', 'today'); -- will fail with AlreadyExistsException
                // In that case, we want to retry with alterPartition.
                LOG.debug("Caught AlreadyExistsException, trying to alter partition instead");
                setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
            } catch (Exception e) {
                try {
                    final FileSystem newPathFileSystem = newPartPath.getFileSystem(this.getConf());
                    boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge"));
                    final FileStatus status = newPathFileSystem.getFileStatus(newPartPath);
                    Hive.trashFiles(newPathFileSystem, new FileStatus[] { status }, this.getConf(), isAutoPurge);
                } catch (IOException io) {
                    LOG.error("Could not delete partition directory contents after failed partition creation: ", io);
                }
                throw e;
            }
        } else {
            setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
        }
        return newTPart;
    } catch (IOException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    } catch (MetaException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    } catch (InvalidOperationException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    } catch (TException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    }
}
Also used : TException(org.apache.thrift.TException) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveMetaException(org.apache.hadoop.hive.metastore.HiveMetaException) Path(org.apache.hadoop.fs.Path) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) IOException(java.io.IOException) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) TException(org.apache.thrift.TException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveMetaException(org.apache.hadoop.hive.metastore.HiveMetaException) FileNotFoundException(java.io.FileNotFoundException) JDODataStoreException(javax.jdo.JDODataStoreException)

Example 17 with AlreadyExistsException

use of org.apache.hadoop.hive.metastore.api.AlreadyExistsException in project hive by apache.

the class TestHiveClientCache method testHMSCBreakability.

/**
 * Test that a long table name actually breaks the HMSC. Subsequently check that isOpen() reflects
 * and tells if the client is broken
 */
@Ignore("hangs indefinitely")
@Test
public void testHMSCBreakability() throws IOException, MetaException, LoginException, TException, AlreadyExistsException, InvalidObjectException, NoSuchObjectException, InterruptedException {
    // Setup
    LocalMetaServer metaServer = new LocalMetaServer();
    metaServer.start();
    final HiveClientCache cache = new HiveClientCache(1000);
    HiveClientCache.CacheableHiveMetaStoreClient client = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(metaServer.getHiveConf());
    assertTrue(client.isOpen());
    final String DB_NAME = "test_db";
    final String LONG_TABLE_NAME = "long_table_name_" + new BigInteger(200, new Random()).toString(2);
    try {
        client.dropTable(DB_NAME, LONG_TABLE_NAME);
    } catch (Exception e) {
    }
    try {
        client.dropDatabase(DB_NAME);
    } catch (Exception e) {
    }
    client.createDatabase(new Database(DB_NAME, "", null, null));
    List<FieldSchema> fields = new ArrayList<FieldSchema>();
    fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, ""));
    Table tbl = new Table();
    tbl.setDbName(DB_NAME);
    tbl.setTableName(LONG_TABLE_NAME);
    StorageDescriptor sd = new StorageDescriptor();
    sd.setCols(fields);
    tbl.setSd(sd);
    sd.setSerdeInfo(new SerDeInfo());
    // Break the client
    try {
        client.createTable(tbl);
        fail("Exception was expected while creating table with long name");
    } catch (Exception e) {
    }
    assertFalse(client.isOpen());
    metaServer.shutDown();
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) LoginException(javax.security.auth.login.LoginException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) TException(org.apache.thrift.TException) IOException(java.io.IOException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) ExecutionException(java.util.concurrent.ExecutionException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) Random(java.util.Random) Database(org.apache.hadoop.hive.metastore.api.Database) BigInteger(java.math.BigInteger) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 18 with AlreadyExistsException

use of org.apache.hadoop.hive.metastore.api.AlreadyExistsException in project hive by apache.

the class MetaStorePartitionHelper method createPartitionIfNotExists.

/**
 * Creates the specified partition if it does not already exist. Does nothing if the table is unpartitioned.
 */
@Override
public void createPartitionIfNotExists(List<String> newPartitionValues) throws WorkerException {
    if (newPartitionValues.isEmpty()) {
        return;
    }
    try {
        LOG.debug("Attempting to create partition (if not exists) {}.{}:{}", databaseName, tableName, newPartitionValues);
        Table table = metaStoreClient.getTable(databaseName, tableName);
        Partition partition = new Partition();
        partition.setDbName(table.getDbName());
        partition.setTableName(table.getTableName());
        StorageDescriptor partitionSd = new StorageDescriptor(table.getSd());
        partitionSd.setLocation(table.getSd().getLocation() + Path.SEPARATOR + Warehouse.makePartName(table.getPartitionKeys(), newPartitionValues));
        partition.setSd(partitionSd);
        partition.setValues(newPartitionValues);
        metaStoreClient.add_partition(partition);
    } catch (AlreadyExistsException e) {
        LOG.debug("Partition already exisits: {}.{}:{}", databaseName, tableName, newPartitionValues);
    } catch (NoSuchObjectException e) {
        LOG.error("Failed to create partition : " + newPartitionValues, e);
        throw new PartitionCreationException("Table not found '" + databaseName + "." + tableName + "'.", e);
    } catch (TException e) {
        LOG.error("Failed to create partition : " + newPartitionValues, e);
        throw new PartitionCreationException("Failed to create partition '" + newPartitionValues + "' on table '" + databaseName + "." + tableName + "'", e);
    }
}
Also used : TException(org.apache.thrift.TException) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException)

Example 19 with AlreadyExistsException

use of org.apache.hadoop.hive.metastore.api.AlreadyExistsException in project hive by apache.

the class DDLTask method createDatabase.

/**
 * Create a Database
 * @param db
 * @param crtDb
 * @return Always returns 0
 * @throws HiveException
 */
private int createDatabase(Hive db, CreateDatabaseDesc crtDb) throws HiveException {
    Database database = new Database();
    database.setName(crtDb.getName());
    database.setDescription(crtDb.getComment());
    database.setLocationUri(crtDb.getLocationUri());
    database.setParameters(crtDb.getDatabaseProperties());
    database.setOwnerName(SessionState.getUserFromAuthenticator());
    database.setOwnerType(PrincipalType.USER);
    try {
        makeLocationQualified(database);
        db.createDatabase(database, crtDb.getIfNotExists());
    } catch (AlreadyExistsException ex) {
        // it would be better if AlreadyExistsException had an errorCode field....
        throw new HiveException(ex, ErrorMsg.DATABSAE_ALREADY_EXISTS, crtDb.getName());
    }
    return 0;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) Database(org.apache.hadoop.hive.metastore.api.Database)

Example 20 with AlreadyExistsException

use of org.apache.hadoop.hive.metastore.api.AlreadyExistsException in project hive by apache.

the class TestHiveMetaStoreChecker method testErrorForMissingPartitionColumn.

/**
 * Test if checker throws HiveException when the there is a dummy directory present in the nested level
 * of sub-directories
 * @throws AlreadyExistsException
 * @throws IOException
 * @throws HiveException
 */
@Test
public void testErrorForMissingPartitionColumn() throws AlreadyExistsException, IOException, HiveException {
    Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0);
    // add 10 partitions on the filesystem
    createPartitionsDirectoriesOnFS(testTable, 10);
    // create a fake directory to throw exception
    StringBuilder sb = new StringBuilder(testTable.getDataLocation().toString());
    sb.append(Path.SEPARATOR);
    sb.append("dummyPart=error");
    createDirectory(sb.toString());
    // check result now
    CheckResult result = new CheckResult();
    Exception exception = null;
    try {
        checker.checkMetastore(dbName, tableName, null, result);
    } catch (Exception e) {
        exception = e;
    }
    assertTrue("Expected HiveException", exception != null && exception instanceof HiveException);
    createFile(sb.toString(), "dummyFile");
    result = new CheckResult();
    exception = null;
    try {
        checker.checkMetastore(dbName, tableName, null, result);
    } catch (Exception e) {
        exception = e;
    }
    assertTrue("Expected HiveException", exception != null && exception instanceof HiveException);
}
Also used : MetaException(org.apache.hadoop.hive.metastore.api.MetaException) TException(org.apache.thrift.TException) IOException(java.io.IOException) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) Test(org.junit.Test)

Aggregations

AlreadyExistsException (org.apache.hadoop.hive.metastore.api.AlreadyExistsException)30 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)24 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)24 TException (org.apache.thrift.TException)23 IOException (java.io.IOException)16 InvalidObjectException (org.apache.hadoop.hive.metastore.api.InvalidObjectException)16 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)13 Table (org.apache.hadoop.hive.metastore.api.Table)12 ArrayList (java.util.ArrayList)9 JDODataStoreException (javax.jdo.JDODataStoreException)9 Partition (org.apache.hadoop.hive.metastore.api.Partition)8 InvalidInputException (org.apache.hadoop.hive.metastore.api.InvalidInputException)7 ExecutionException (java.util.concurrent.ExecutionException)6 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)6 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)6 QualifiedName (com.netflix.metacat.common.QualifiedName)5 ConnectorException (com.netflix.metacat.common.server.connectors.exception.ConnectorException)5 InvalidMetaException (com.netflix.metacat.common.server.connectors.exception.InvalidMetaException)5 List (java.util.List)5 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)5