use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class TestReplicationScenariosExternalTables method differentCatalogIncrementalReplication.
@Test
public void differentCatalogIncrementalReplication() throws Throwable {
// Create the catalog
Catalog catalog = new Catalog();
catalog.setName("spark");
Warehouse wh = new Warehouse(conf);
catalog.setLocationUri(wh.getWhRootExternal().toString() + File.separator + catalog);
catalog.setDescription("Non-hive catalog");
Hive.get(primary.hiveConf).getMSC().createCatalog(catalog);
// Create database and table in spark catalog
String sparkDbName = "src_spark";
Database sparkdb = new Database();
sparkdb.setCatalogName("spark");
sparkdb.setName(sparkDbName);
Hive.get(primary.hiveConf).getMSC().createDatabase(sparkdb);
SerDeInfo serdeInfo = new SerDeInfo("LBCSerDe", LazyBinaryColumnarSerDe.class.getCanonicalName(), new HashMap<String, String>());
ArrayList<FieldSchema> cols = new ArrayList<FieldSchema>(1);
cols.add(new FieldSchema("place", serdeConstants.STRING_TYPE_NAME, ""));
StorageDescriptor sd = new StorageDescriptor(cols, null, "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", false, 0, serdeInfo, null, null, null);
Map<String, String> tableParameters = new HashMap<String, String>();
Table sparkTable = new Table("mgt1", sparkDbName, "", 0, 0, 0, sd, null, tableParameters, "", "", "");
sparkTable.setCatName("spark");
Hive.get(primary.hiveConf).getMSC().createTable(sparkTable);
// create same db in hive catalog
Map<String, String> params = new HashMap<>();
params.put(SOURCE_OF_REPLICATION, "1");
Database hiveDb = new Database();
hiveDb.setCatalogName("hive");
hiveDb.setName(sparkDbName);
hiveDb.setParameters(params);
Hive.get(primary.hiveConf).getMSC().createDatabase(hiveDb);
primary.dump(sparkDbName);
// spark tables are not replicated in bootstrap
replica.load(replicatedDbName, sparkDbName).run("use " + replicatedDbName).run("show tables like mgdt1").verifyResult(null);
Path externalTableLocation = new Path("/" + testName.getMethodName() + "/t1/");
DistributedFileSystem fs = primary.miniDFSCluster.getFileSystem();
fs.mkdirs(externalTableLocation, new FsPermission("777"));
// Create another table in spark
sparkTable = new Table("mgt2", sparkDbName, "", 0, 0, 0, sd, null, tableParameters, "", "", "");
sparkTable.setCatName("spark");
Hive.get(primary.hiveConf).getMSC().createTable(sparkTable);
// Incremental load shouldn't copy any events from spark catalog
primary.dump(sparkDbName);
replica.load(replicatedDbName, sparkDbName).run("use " + replicatedDbName).run("show tables like mgdt1").verifyResult(null).run("show tables like 'mgt2'").verifyResult(null);
primary.run("drop database if exists " + sparkDbName + " cascade");
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class TestMetastoreAuthorizationProvider method testSimplePrivileges.
@Test
public void testSimplePrivileges() throws Exception {
if (!isTestEnabled()) {
System.out.println("Skipping test " + this.getClass().getName());
return;
}
String dbName = getTestDbName();
String tblName = getTestTableName();
String userName = setupUser();
String loc = clientHiveConf.get(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL.varname) + "/" + dbName;
String mLoc = clientHiveConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname) + "/" + dbName;
allowCreateDatabase(userName);
driver.run("create database " + dbName + " location '" + loc + "' managedlocation '" + mLoc + "'");
Database db = msc.getDatabase(dbName);
String dbLocn = db.getManagedLocationUri();
validateCreateDb(db, dbName);
allowCreateInDb(dbName, userName, dbLocn);
disallowCreateInDb(dbName, userName, dbLocn);
disallowCreateDatabase(userName);
driver.run("use " + dbName);
try {
driver.run(String.format("create table %s (a string) partitioned by (b string)", tblName));
assert false;
} catch (CommandProcessorException e) {
assertEquals(40000, e.getResponseCode());
}
// Even if table location is specified table creation should fail
String tblNameLoc = tblName + "_loc";
String tblLocation = new Path(dbLocn).getParent().toUri() + "/" + tblNameLoc;
if (mayTestLocation()) {
driver.run("use " + dbName);
try {
driver.run(String.format("create table %s (a string) partitioned by (b string) location '" + tblLocation + "'", tblNameLoc));
} catch (CommandProcessorException e) {
assertEquals(40000, e.getResponseCode());
}
}
// failure from not having permissions to create table
ArrayList<FieldSchema> fields = new ArrayList<FieldSchema>(2);
fields.add(new FieldSchema("a", serdeConstants.STRING_TYPE_NAME, ""));
Table ttbl = new Table();
ttbl.setDbName(dbName);
ttbl.setTableName(tblName);
StorageDescriptor sd = new StorageDescriptor();
ttbl.setSd(sd);
sd.setCols(fields);
sd.setParameters(new HashMap<String, String>());
sd.getParameters().put("test_param_1", "Use this for comments etc");
sd.setSerdeInfo(new SerDeInfo());
sd.getSerdeInfo().setName(ttbl.getTableName());
sd.getSerdeInfo().setParameters(new HashMap<String, String>());
sd.getSerdeInfo().getParameters().put(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1");
sd.getSerdeInfo().setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
sd.setInputFormat(HiveInputFormat.class.getName());
sd.setOutputFormat(HiveOutputFormat.class.getName());
ttbl.setPartitionKeys(new ArrayList<FieldSchema>());
MetaException me = null;
try {
msc.createTable(ttbl);
} catch (MetaException e) {
me = e;
}
assertNoPrivileges(me);
allowCreateInDb(dbName, userName, dbLocn);
driver.run("use " + dbName);
driver.run(String.format("create table %s (a string) partitioned by (b string)", tblName));
Table tbl = msc.getTable(dbName, tblName);
Assert.assertTrue(tbl.isSetId());
tbl.unsetId();
validateCreateTable(tbl, tblName, dbName);
// Table creation should succeed even if location is specified
if (mayTestLocation()) {
driver.run("use " + dbName);
driver.run(String.format("create table %s (a string) partitioned by (b string) location '" + tblLocation + "'", tblNameLoc));
Table tblLoc = msc.getTable(dbName, tblNameLoc);
validateCreateTable(tblLoc, tblNameLoc, dbName);
}
String fakeUser = "mal";
List<String> fakeGroupNames = new ArrayList<String>();
fakeGroupNames.add("groupygroup");
InjectableDummyAuthenticator.injectUserName(fakeUser);
InjectableDummyAuthenticator.injectGroupNames(fakeGroupNames);
InjectableDummyAuthenticator.injectMode(true);
try {
driver.run(String.format("create table %s (a string) partitioned by (b string)", tblName + "mal"));
} catch (CommandProcessorException e) {
assertEquals(40000, e.getResponseCode());
}
ttbl.setTableName(tblName + "mal");
me = null;
try {
msc.createTable(ttbl);
} catch (MetaException e) {
me = e;
}
assertNoPrivileges(me);
allowCreateInTbl(tbl.getTableName(), userName, tbl.getSd().getLocation());
disallowCreateInTbl(tbl.getTableName(), userName, tbl.getSd().getLocation());
try {
driver.run("alter table " + tblName + " add partition (b='2011')");
} catch (CommandProcessorException e) {
assertEquals(40000, e.getResponseCode());
}
List<String> ptnVals = new ArrayList<String>();
ptnVals.add("b=2011");
Partition tpart = new Partition();
tpart.setDbName(dbName);
tpart.setTableName(tblName);
tpart.setValues(ptnVals);
tpart.setParameters(new HashMap<String, String>());
tpart.setSd(tbl.getSd().deepCopy());
tpart.getSd().setSerdeInfo(tbl.getSd().getSerdeInfo().deepCopy());
tpart.getSd().setLocation(tbl.getSd().getLocation() + "/tpart");
me = null;
try {
msc.add_partition(tpart);
} catch (MetaException e) {
me = e;
}
assertNoPrivileges(me);
InjectableDummyAuthenticator.injectMode(false);
allowCreateInTbl(tbl.getTableName(), userName, tbl.getSd().getLocation());
driver.run("alter table " + tblName + " add partition (b='2011')");
String proxyUserName = getProxyUserName();
if (proxyUserName != null) {
// for storage based authorization, user having proxy privilege should be allowed to do operation
// even if the file permission is not there.
InjectableDummyAuthenticator.injectUserName(proxyUserName);
InjectableDummyAuthenticator.injectGroupNames(Collections.singletonList(proxyUserName));
InjectableDummyAuthenticator.injectMode(true);
disallowCreateInTbl(tbl.getTableName(), proxyUserName, tbl.getSd().getLocation());
driver.run("alter table " + tblName + " add partition (b='2012')");
InjectableDummyAuthenticator.injectMode(false);
}
allowDropOnTable(tblName, userName, tbl.getSd().getLocation());
allowDropOnDb(dbName, userName, db.getLocationUri());
driver.run("drop database if exists " + getTestDbName() + " cascade");
InjectableDummyAuthenticator.injectUserName(userName);
InjectableDummyAuthenticator.injectGroupNames(Arrays.asList(ugi.getGroupNames()));
InjectableDummyAuthenticator.injectMode(true);
allowCreateDatabase(userName);
driver.run("create database " + dbName);
db = msc.getDatabase(dbName);
dbLocn = db.getLocationUri();
allowCreateInDb(dbName, userName, dbLocn);
dbLocn = db.getManagedLocationUri();
if (dbLocn != null) {
allowCreateInDb(dbName, userName, dbLocn);
}
tbl.setTableType("EXTERNAL_TABLE");
msc.createTable(tbl);
allowDropOnTable(tblName, userName, tbl.getSd().getLocation());
disallowDropOnTable(tblName, userName, tbl.getSd().getLocation());
try {
driver.run("drop table " + tbl.getTableName());
} catch (CommandProcessorException e) {
assertEquals(40000, e.getResponseCode());
}
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class TestReplChangeManager method testRecyclePartTable.
@Test
public void testRecyclePartTable() throws Exception {
// Create db1/t1/dt=20160101/part
// /dt=20160102/part
// /dt=20160103/part
// Test: recycle single file (dt=20160101/part)
// recycle single partition (dt=20160102)
// recycle table t1
String dbName = "db1";
client.dropDatabase(dbName, true, true);
Database db = new Database();
db.putToParameters(SOURCE_OF_REPLICATION, "1,2,3");
db.setName(dbName);
client.createDatabase(db);
String tblName = "t1";
List<FieldSchema> columns = new ArrayList<FieldSchema>();
columns.add(new FieldSchema("foo", "string", ""));
columns.add(new FieldSchema("bar", "string", ""));
List<FieldSchema> partColumns = new ArrayList<FieldSchema>();
partColumns.add(new FieldSchema("dt", "string", ""));
SerDeInfo serdeInfo = new SerDeInfo("LBCSerDe", LazyBinaryColumnarSerDe.class.getCanonicalName(), new HashMap<String, String>());
StorageDescriptor sd = new StorageDescriptor(columns, null, "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", false, 0, serdeInfo, null, null, null);
Map<String, String> tableParameters = new HashMap<String, String>();
Table tbl = new Table(tblName, dbName, "", 0, 0, 0, sd, partColumns, tableParameters, "", "", "");
client.createTable(tbl);
List<String> values = Arrays.asList("20160101");
Partition part1 = createPartition(dbName, tblName, columns, values, serdeInfo);
client.add_partition(part1);
values = Arrays.asList("20160102");
Partition part2 = createPartition(dbName, tblName, columns, values, serdeInfo);
client.add_partition(part2);
values = Arrays.asList("20160103");
Partition part3 = createPartition(dbName, tblName, columns, values, serdeInfo);
client.add_partition(part3);
Path part1Path = new Path(warehouse.getDefaultPartitionPath(db, tbl, ImmutableMap.of("dt", "20160101")), "part");
createFile(part1Path, "p1");
String path1Chksum = ReplChangeManager.checksumFor(part1Path, fs);
Path part2Path = new Path(warehouse.getDefaultPartitionPath(db, tbl, ImmutableMap.of("dt", "20160102")), "part");
createFile(part2Path, "p2");
String path2Chksum = ReplChangeManager.checksumFor(part2Path, fs);
Path part3Path = new Path(warehouse.getDefaultPartitionPath(db, tbl, ImmutableMap.of("dt", "20160103")), "part");
createFile(part3Path, "p3");
String path3Chksum = ReplChangeManager.checksumFor(part3Path, fs);
assertTrue(part1Path.getFileSystem(hiveConf).exists(part1Path));
assertTrue(part2Path.getFileSystem(hiveConf).exists(part2Path));
assertTrue(part3Path.getFileSystem(hiveConf).exists(part3Path));
ReplChangeManager cm = ReplChangeManager.getInstance(hiveConf);
// verify cm.recycle(db, table, part) api moves file to cmroot dir
int ret = cm.recycle(part1Path, RecycleType.MOVE, false);
Assert.assertEquals(ret, 1);
Path cmPart1Path = ReplChangeManager.getCMPath(hiveConf, part1Path.getName(), path1Chksum, cmroot.toString());
assertTrue(cmPart1Path.getFileSystem(hiveConf).exists(cmPart1Path));
// Verify dropPartition recycle part files
client.dropPartition(dbName, tblName, Arrays.asList("20160102"));
assertFalse(part2Path.getFileSystem(hiveConf).exists(part2Path));
Path cmPart2Path = ReplChangeManager.getCMPath(hiveConf, part2Path.getName(), path2Chksum, cmroot.toString());
assertTrue(cmPart2Path.getFileSystem(hiveConf).exists(cmPart2Path));
// Verify dropTable recycle partition files
client.dropTable(dbName, tblName);
assertFalse(part3Path.getFileSystem(hiveConf).exists(part3Path));
Path cmPart3Path = ReplChangeManager.getCMPath(hiveConf, part3Path.getName(), path3Chksum, cmroot.toString());
assertTrue(cmPart3Path.getFileSystem(hiveConf).exists(cmPart3Path));
client.dropDatabase(dbName, true, true);
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class TestTransactionalValidationListener method createOrcTable.
private Table createOrcTable(String catalog) throws Exception {
Table table = new Table();
StorageDescriptor sd = new StorageDescriptor();
List<FieldSchema> cols = new ArrayList<>();
table.setDbName("default");
table.setTableName("test_table");
cols.add(new FieldSchema("column_name", "int", null));
sd.setCols(cols);
sd.setSerdeInfo(new SerDeInfo());
sd.setInputFormat("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat");
sd.setOutputFormat("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat");
table.setSd(sd);
table.setCatName(catalog);
table.setTableType("MANAGED_TABLE");
client.createTable(table);
Table createdTable = client.getTable(catalog, table.getDbName(), table.getTableName());
return createdTable;
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class AlterTableReplaceColumnsOperation method doAlteration.
@Override
protected void doAlteration(Table table, Partition partition) throws HiveException {
StorageDescriptor sd = getStorageDescriptor(table, partition);
// change SerDe to LazySimpleSerDe if it is columnsetSerDe
String serializationLib = sd.getSerdeInfo().getSerializationLib();
if ("org.apache.hadoop.hive.serde.thrift.columnsetSerDe".equals(serializationLib)) {
context.getConsole().printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
} else if (!VALID_SERIALIZATION_LIBS.contains(serializationLib)) {
throw new HiveException(ErrorMsg.CANNOT_REPLACE_COLUMNS, desc.getDbTableName());
}
// adding columns and limited integer type promotion is not supported for ORC schema evolution
boolean isOrcSchemaEvolution = serializationLib.equals(OrcSerde.class.getName()) && AlterTableUtils.isSchemaEvolutionEnabled(table, context.getConf());
if (isOrcSchemaEvolution) {
List<FieldSchema> existingCols = sd.getCols();
List<FieldSchema> replaceCols = desc.getNewColumns();
if (replaceCols.size() < existingCols.size()) {
throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, desc.getDbTableName());
}
}
boolean droppingColumns = desc.getNewColumns().size() < sd.getCols().size();
if (ParquetHiveSerDe.isParquetTable(table) && AlterTableUtils.isSchemaEvolutionEnabled(table, context.getConf()) && !desc.isCascade() && droppingColumns && table.isPartitioned()) {
LOG.warn("Cannot drop columns from a partitioned parquet table without the CASCADE option");
throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, desc.getDbTableName());
}
sd.setCols(desc.getNewColumns());
}
Aggregations