use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class StatsTask method aggregateStats.
private int aggregateStats(Hive db) {
StatsAggregator statsAggregator = null;
int ret = 0;
StatsCollectionContext scc = null;
EnvironmentContext environmentContext = null;
try {
// Stats setup:
final Warehouse wh = new Warehouse(conf);
if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) {
try {
scc = getContext();
statsAggregator = createStatsAggregator(scc, conf);
} catch (HiveException e) {
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw e;
}
console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString()));
}
}
List<Partition> partitions = getPartitionsList(db);
boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
String tableFullName = table.getDbName() + "." + table.getTableName();
if (partitions == null) {
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
// acidTable will not have accurate stats unless it is set through analyze command.
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
// non-partitioned tables:
if (!existStats(parameters) && atomic) {
return 0;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(wh, parameters, tTable.getSd());
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, null);
updateStats(statsAggregator, parameters, prefix, atomic);
}
// write table stats to metastore
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
getHive().alterTable(tableFullName, new Table(tTable), environmentContext);
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
}
LOG.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
} else {
// Partitioned table:
// Need to get the old stats of the partition
// and update the table stats based on the old and new stats.
List<Partition> updates = new ArrayList<Partition>();
//Get the file status up-front for all partitions. Beneficial in cases of blob storage systems
final Map<String, FileStatus[]> fileStatusMap = new ConcurrentHashMap<String, FileStatus[]>();
int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1);
// In case thread count is set to 0, use single thread.
poolSize = Math.max(poolSize, 1);
final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
final List<Future<Void>> futures = Lists.newLinkedList();
LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize);
try {
for (final Partition partn : partitions) {
final String partitionName = partn.getName();
final org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (!existStats(parameters) && atomic) {
continue;
}
futures.add(pool.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
FileStatus[] partfileStatus = wh.getFileStatusesForSD(tPart.getSd());
fileStatusMap.put(partitionName, partfileStatus);
return null;
}
}));
}
pool.shutdown();
for (Future<Void> future : futures) {
future.get();
}
} catch (InterruptedException e) {
LOG.debug("Cancelling " + futures.size() + " file stats lookup tasks");
//cancel other futures
for (Future future : futures) {
future.cancel(true);
}
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (pool != null) {
pool.shutdownNow();
}
LOG.debug("Finished getting file stats of all partitions");
}
for (Partition partn : partitions) {
//
// get the old partition stats
//
org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
//only when the stats exist, it is added to fileStatusMap
if (!fileStatusMap.containsKey(partn.getName())) {
continue;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(parameters, fileStatusMap.get(partn.getName()));
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, partn);
updateStats(statsAggregator, parameters, prefix, atomic);
}
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
updates.add(new Partition(table, tPart));
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
LOG.info("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
if (!updates.isEmpty()) {
db.alterPartitions(tableFullName, updates, environmentContext);
}
}
} catch (Exception e) {
console.printInfo("[Warning] could not update stats.", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (statsAggregator != null) {
statsAggregator.closeConnection(scc);
}
}
// anything else indicates failure
return ret;
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class ImportSemanticAnalyzer method prepareImport.
/**
* The same code is used from both the "repl load" as well as "import".
* Given that "repl load" now supports two modes "repl load dbName [location]" and
* "repl load [location]" in which case the database name has to be taken from the table metadata
* by default and then over-ridden if something specified on the command line.
*
* hence for import to work correctly we have to pass in the sessionState default Db via the
* parsedDbName parameter
*/
public static boolean prepareImport(boolean isImportCmd, boolean isLocationSet, boolean isExternalSet, boolean isPartSpecSet, boolean waitOnPrecursor, String parsedLocation, String parsedTableName, String overrideDBName, LinkedHashMap<String, String> parsedPartSpec, String fromLocn, EximUtil.SemanticAnalyzerWrapperContext x, UpdatedMetaDataTracker updatedMetadata) throws IOException, MetaException, HiveException, URISyntaxException {
// initialize load path
URI fromURI = EximUtil.getValidatedURI(x.getConf(), stripQuotes(fromLocn));
Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());
FileSystem fs = FileSystem.get(fromURI, x.getConf());
x.getInputs().add(toReadEntity(fromPath, x.getConf()));
MetaData rv;
try {
rv = EximUtil.readMetaData(fs, new Path(fromPath, EximUtil.METADATA_NAME));
} catch (IOException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
}
if (rv.getTable() == null) {
// nothing to do here, silently return.
return false;
}
ReplicationSpec replicationSpec = rv.getReplicationSpec();
if (replicationSpec.isNoop()) {
// nothing to do here, silently return.
x.getLOG().debug("Current update with ID:{} is noop", replicationSpec.getCurrentReplicationState());
return false;
}
if (isImportCmd) {
replicationSpec.setReplSpecType(ReplicationSpec.Type.IMPORT);
}
String dbname = rv.getTable().getDbName();
if ((overrideDBName != null) && (!overrideDBName.isEmpty())) {
// If the parsed statement contained a db.tablename specification, prefer that.
dbname = overrideDBName;
}
// Create table associated with the import
// Executed if relevant, and used to contain all the other details about the table if not.
ImportTableDesc tblDesc;
try {
tblDesc = getBaseCreateTableDescFromTable(dbname, rv.getTable());
} catch (Exception e) {
throw new HiveException(e);
}
boolean isSourceMm = AcidUtils.isInsertOnlyTable(tblDesc.getTblProps());
if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
tblDesc.setReplicationSpec(replicationSpec);
StatsSetupConst.setBasicStatsState(tblDesc.getTblProps(), StatsSetupConst.FALSE);
}
if (isExternalSet) {
if (isSourceMm) {
throw new SemanticException("Cannot import an MM table as external");
}
tblDesc.setExternal(isExternalSet);
// This condition-check could have been avoided, but to honour the old
// default of not calling if it wasn't set, we retain that behaviour.
// TODO:cleanup after verification that the outer if isn't really needed here
}
if (isLocationSet) {
tblDesc.setLocation(parsedLocation);
x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf()));
}
if ((parsedTableName != null) && (!parsedTableName.isEmpty())) {
tblDesc.setTableName(parsedTableName);
}
List<AddPartitionDesc> partitionDescs = new ArrayList<AddPartitionDesc>();
Iterable<Partition> partitions = rv.getPartitions();
for (Partition partition : partitions) {
// TODO: this should ideally not create AddPartitionDesc per partition
AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition);
if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
StatsSetupConst.setBasicStatsState(partsDesc.getPartition(0).getPartParams(), StatsSetupConst.FALSE);
}
partitionDescs.add(partsDesc);
}
if (isPartSpecSet) {
// The import specification asked for only a particular partition to be loaded
// We load only that, and ignore all the others.
boolean found = false;
for (Iterator<AddPartitionDesc> partnIter = partitionDescs.listIterator(); partnIter.hasNext(); ) {
AddPartitionDesc addPartitionDesc = partnIter.next();
if (!found && addPartitionDesc.getPartition(0).getPartSpec().equals(parsedPartSpec)) {
found = true;
} else {
partnIter.remove();
}
}
if (!found) {
throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(" - Specified partition not found in import directory"));
}
}
if (tblDesc.getTableName() == null) {
// or from the export dump.
throw new SemanticException(ErrorMsg.NEED_TABLE_SPECIFICATION.getMsg());
} else {
x.getConf().set("import.destination.table", tblDesc.getTableName());
for (AddPartitionDesc addPartitionDesc : partitionDescs) {
addPartitionDesc.setTableName(tblDesc.getTableName());
}
}
Warehouse wh = new Warehouse(x.getConf());
Table table = tableIfExists(tblDesc, x.getHive());
boolean tableExists = false;
if (table != null) {
checkTable(table, tblDesc, replicationSpec, x.getConf());
x.getLOG().debug("table " + tblDesc.getTableName() + " exists: metadata checked");
tableExists = true;
}
// Initialize with 0 for non-ACID and non-MM tables.
Long writeId = 0L;
if (((table != null) && AcidUtils.isTransactionalTable(table)) || AcidUtils.isTablePropertyTransactional(tblDesc.getTblProps())) {
// Explain plan doesn't open a txn and hence no need to allocate write id.
if (x.getCtx().getExplainConfig() == null) {
writeId = SessionState.get().getTxnMgr().getTableWriteId(tblDesc.getDatabaseName(), tblDesc.getTableName());
}
}
int stmtId = 0;
/*
if (isAcid(writeId)) {
tblDesc.setInitialMmWriteId(writeId);
}
*/
if (!replicationSpec.isInReplicationScope()) {
createRegularImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, fromURI, fs, wh, x, writeId, stmtId, isSourceMm);
} else {
createReplImportTasks(tblDesc, partitionDescs, replicationSpec, waitOnPrecursor, table, fromURI, fs, wh, x, writeId, stmtId, isSourceMm, updatedMetadata);
}
return tableExists;
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class TestReplicationScenariosExternalTables method differentCatalogIncrementalReplication.
@Test
public void differentCatalogIncrementalReplication() throws Throwable {
// Create the catalog
Catalog catalog = new Catalog();
catalog.setName("spark");
Warehouse wh = new Warehouse(conf);
catalog.setLocationUri(wh.getWhRootExternal().toString() + File.separator + catalog);
catalog.setDescription("Non-hive catalog");
Hive.get(primary.hiveConf).getMSC().createCatalog(catalog);
// Create database and table in spark catalog
String sparkDbName = "src_spark";
Database sparkdb = new Database();
sparkdb.setCatalogName("spark");
sparkdb.setName(sparkDbName);
Hive.get(primary.hiveConf).getMSC().createDatabase(sparkdb);
SerDeInfo serdeInfo = new SerDeInfo("LBCSerDe", LazyBinaryColumnarSerDe.class.getCanonicalName(), new HashMap<String, String>());
ArrayList<FieldSchema> cols = new ArrayList<FieldSchema>(1);
cols.add(new FieldSchema("place", serdeConstants.STRING_TYPE_NAME, ""));
StorageDescriptor sd = new StorageDescriptor(cols, null, "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", false, 0, serdeInfo, null, null, null);
Map<String, String> tableParameters = new HashMap<String, String>();
Table sparkTable = new Table("mgt1", sparkDbName, "", 0, 0, 0, sd, null, tableParameters, "", "", "");
sparkTable.setCatName("spark");
Hive.get(primary.hiveConf).getMSC().createTable(sparkTable);
// create same db in hive catalog
Map<String, String> params = new HashMap<>();
params.put(SOURCE_OF_REPLICATION, "1");
Database hiveDb = new Database();
hiveDb.setCatalogName("hive");
hiveDb.setName(sparkDbName);
hiveDb.setParameters(params);
Hive.get(primary.hiveConf).getMSC().createDatabase(hiveDb);
primary.dump(sparkDbName);
// spark tables are not replicated in bootstrap
replica.load(replicatedDbName, sparkDbName).run("use " + replicatedDbName).run("show tables like mgdt1").verifyResult(null);
Path externalTableLocation = new Path("/" + testName.getMethodName() + "/t1/");
DistributedFileSystem fs = primary.miniDFSCluster.getFileSystem();
fs.mkdirs(externalTableLocation, new FsPermission("777"));
// Create another table in spark
sparkTable = new Table("mgt2", sparkDbName, "", 0, 0, 0, sd, null, tableParameters, "", "", "");
sparkTable.setCatName("spark");
Hive.get(primary.hiveConf).getMSC().createTable(sparkTable);
// Incremental load shouldn't copy any events from spark catalog
primary.dump(sparkDbName);
replica.load(replicatedDbName, sparkDbName).run("use " + replicatedDbName).run("show tables like mgdt1").verifyResult(null).run("show tables like 'mgt2'").verifyResult(null);
primary.run("drop database if exists " + sparkDbName + " cascade");
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class HCatDriver method setFSPermsNGrp.
private int setFSPermsNGrp(SessionState ss, HiveConf conf) {
String tblName = conf.get(HCatConstants.HCAT_CREATE_TBL_NAME, "");
if (tblName.isEmpty()) {
tblName = conf.get("import.destination.table", "");
conf.set("import.destination.table", "");
}
String dbName = conf.get(HCatConstants.HCAT_CREATE_DB_NAME, "");
String grp = conf.get(HCatConstants.HCAT_GROUP, null);
String permsStr = conf.get(HCatConstants.HCAT_PERMS, null);
if (tblName.isEmpty() && dbName.isEmpty()) {
// it wasn't create db/table
return 0;
}
if (null == grp && null == permsStr) {
// there were no grp and perms to begin with.
return 0;
}
FsPermission perms = FsPermission.valueOf(permsStr);
if (!tblName.isEmpty()) {
Hive db = null;
try {
db = Hive.get();
Table tbl = db.getTable(tblName);
Path tblPath = tbl.getPath();
FileSystem fs = tblPath.getFileSystem(conf);
if (null != perms) {
fs.setPermission(tblPath, perms);
}
if (null != grp) {
fs.setOwner(tblPath, null, grp);
}
return 0;
} catch (Exception e) {
ss.err.println(String.format("Failed to set permissions/groups on TABLE: <%s> %s", tblName, e.getMessage()));
try {
// We need to drop the table.
if (null != db) {
db.dropTable(tblName);
}
} catch (HiveException he) {
ss.err.println(String.format("Failed to drop TABLE <%s> after failing to set permissions/groups on it. %s", tblName, e.getMessage()));
}
return 1;
}
} else {
// looks like a db operation
if (dbName.isEmpty() || dbName.equals(Warehouse.DEFAULT_DATABASE_NAME)) {
// We dont set perms or groups for default dir.
return 0;
} else {
try {
Hive db = Hive.get();
Path dbPath = new Warehouse(conf).getDatabasePath(db.getDatabase(dbName));
FileSystem fs = dbPath.getFileSystem(conf);
if (perms != null) {
fs.setPermission(dbPath, perms);
}
if (null != grp) {
fs.setOwner(dbPath, null, grp);
}
return 0;
} catch (Exception e) {
ss.err.println(String.format("Failed to set permissions and/or group on DB: <%s> %s", dbName, e.getMessage()));
try {
Hive.get().dropDatabase(dbName);
} catch (Exception e1) {
ss.err.println(String.format("Failed to drop DB <%s> after failing to set permissions/group on it. %s", dbName, e1.getMessage()));
}
return 1;
}
}
}
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class TestPermsGrp method setUp.
@Before
public void setUp() throws Exception {
if (isServerRunning) {
return;
}
hcatConf = new HiveConf(this.getClass());
MetaStoreTestUtils.startMetaStoreWithRetry(hcatConf);
isServerRunning = true;
securityManager = System.getSecurityManager();
System.setSecurityManager(new NoExitSecurityManager());
Policy.setPolicy(new DerbyPolicy());
hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3);
hcatConf.setTimeVar(HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, 60, TimeUnit.SECONDS);
hcatConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
hcatConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, MetastoreConf.getVar(hcatConf, MetastoreConf.ConfVars.WAREHOUSE));
hcatConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, MetastoreConf.getVar(hcatConf, MetastoreConf.ConfVars.CONNECT_URL_KEY));
hcatConf.set(HiveConf.ConfVars.METASTOREURIS.varname, MetastoreConf.getVar(hcatConf, MetastoreConf.ConfVars.THRIFT_URIS));
clientWH = new Warehouse(hcatConf);
msc = new HiveMetaStoreClient(hcatConf);
}
Aggregations