Search in sources :

Example 81 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class TestReplicationScenariosExternalTables method differentCatalogIncrementalReplication.

@Test
public void differentCatalogIncrementalReplication() throws Throwable {
    // Create the catalog
    Catalog catalog = new Catalog();
    catalog.setName("spark");
    Warehouse wh = new Warehouse(conf);
    catalog.setLocationUri(wh.getWhRootExternal().toString() + File.separator + catalog);
    catalog.setDescription("Non-hive catalog");
    Hive.get(primary.hiveConf).getMSC().createCatalog(catalog);
    // Create database and table in spark catalog
    String sparkDbName = "src_spark";
    Database sparkdb = new Database();
    sparkdb.setCatalogName("spark");
    sparkdb.setName(sparkDbName);
    Hive.get(primary.hiveConf).getMSC().createDatabase(sparkdb);
    SerDeInfo serdeInfo = new SerDeInfo("LBCSerDe", LazyBinaryColumnarSerDe.class.getCanonicalName(), new HashMap<String, String>());
    ArrayList<FieldSchema> cols = new ArrayList<FieldSchema>(1);
    cols.add(new FieldSchema("place", serdeConstants.STRING_TYPE_NAME, ""));
    StorageDescriptor sd = new StorageDescriptor(cols, null, "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", false, 0, serdeInfo, null, null, null);
    Map<String, String> tableParameters = new HashMap<String, String>();
    Table sparkTable = new Table("mgt1", sparkDbName, "", 0, 0, 0, sd, null, tableParameters, "", "", "");
    sparkTable.setCatName("spark");
    Hive.get(primary.hiveConf).getMSC().createTable(sparkTable);
    // create same db in hive catalog
    Map<String, String> params = new HashMap<>();
    params.put(SOURCE_OF_REPLICATION, "1");
    Database hiveDb = new Database();
    hiveDb.setCatalogName("hive");
    hiveDb.setName(sparkDbName);
    hiveDb.setParameters(params);
    Hive.get(primary.hiveConf).getMSC().createDatabase(hiveDb);
    primary.dump(sparkDbName);
    // spark tables are not replicated in bootstrap
    replica.load(replicatedDbName, sparkDbName).run("use " + replicatedDbName).run("show tables like mgdt1").verifyResult(null);
    Path externalTableLocation = new Path("/" + testName.getMethodName() + "/t1/");
    DistributedFileSystem fs = primary.miniDFSCluster.getFileSystem();
    fs.mkdirs(externalTableLocation, new FsPermission("777"));
    // Create another table in spark
    sparkTable = new Table("mgt2", sparkDbName, "", 0, 0, 0, sd, null, tableParameters, "", "", "");
    sparkTable.setCatName("spark");
    Hive.get(primary.hiveConf).getMSC().createTable(sparkTable);
    // Incremental load shouldn't copy any events from spark catalog
    primary.dump(sparkDbName);
    replica.load(replicatedDbName, sparkDbName).run("use " + replicatedDbName).run("show tables like mgdt1").verifyResult(null).run("show tables like 'mgt2'").verifyResult(null);
    primary.run("drop database if exists " + sparkDbName + " cascade");
}
Also used : Path(org.apache.hadoop.fs.Path) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Catalog(org.apache.hadoop.hive.metastore.api.Catalog) Database(org.apache.hadoop.hive.metastore.api.Database) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) FsPermission(org.apache.hadoop.fs.permission.FsPermission) Test(org.junit.Test)

Example 82 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class TestMetastoreAuthorizationProvider method testSimplePrivileges.

@Test
public void testSimplePrivileges() throws Exception {
    if (!isTestEnabled()) {
        System.out.println("Skipping test " + this.getClass().getName());
        return;
    }
    String dbName = getTestDbName();
    String tblName = getTestTableName();
    String userName = setupUser();
    String loc = clientHiveConf.get(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL.varname) + "/" + dbName;
    String mLoc = clientHiveConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname) + "/" + dbName;
    allowCreateDatabase(userName);
    driver.run("create database " + dbName + " location '" + loc + "' managedlocation '" + mLoc + "'");
    Database db = msc.getDatabase(dbName);
    String dbLocn = db.getManagedLocationUri();
    validateCreateDb(db, dbName);
    allowCreateInDb(dbName, userName, dbLocn);
    disallowCreateInDb(dbName, userName, dbLocn);
    disallowCreateDatabase(userName);
    driver.run("use " + dbName);
    try {
        driver.run(String.format("create table %s (a string) partitioned by (b string)", tblName));
        assert false;
    } catch (CommandProcessorException e) {
        assertEquals(40000, e.getResponseCode());
    }
    // Even if table location is specified table creation should fail
    String tblNameLoc = tblName + "_loc";
    String tblLocation = new Path(dbLocn).getParent().toUri() + "/" + tblNameLoc;
    if (mayTestLocation()) {
        driver.run("use " + dbName);
        try {
            driver.run(String.format("create table %s (a string) partitioned by (b string) location '" + tblLocation + "'", tblNameLoc));
        } catch (CommandProcessorException e) {
            assertEquals(40000, e.getResponseCode());
        }
    }
    // failure from not having permissions to create table
    ArrayList<FieldSchema> fields = new ArrayList<FieldSchema>(2);
    fields.add(new FieldSchema("a", serdeConstants.STRING_TYPE_NAME, ""));
    Table ttbl = new Table();
    ttbl.setDbName(dbName);
    ttbl.setTableName(tblName);
    StorageDescriptor sd = new StorageDescriptor();
    ttbl.setSd(sd);
    sd.setCols(fields);
    sd.setParameters(new HashMap<String, String>());
    sd.getParameters().put("test_param_1", "Use this for comments etc");
    sd.setSerdeInfo(new SerDeInfo());
    sd.getSerdeInfo().setName(ttbl.getTableName());
    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().getParameters().put(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1");
    sd.getSerdeInfo().setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
    sd.setInputFormat(HiveInputFormat.class.getName());
    sd.setOutputFormat(HiveOutputFormat.class.getName());
    ttbl.setPartitionKeys(new ArrayList<FieldSchema>());
    MetaException me = null;
    try {
        msc.createTable(ttbl);
    } catch (MetaException e) {
        me = e;
    }
    assertNoPrivileges(me);
    allowCreateInDb(dbName, userName, dbLocn);
    driver.run("use " + dbName);
    driver.run(String.format("create table %s (a string) partitioned by (b string)", tblName));
    Table tbl = msc.getTable(dbName, tblName);
    Assert.assertTrue(tbl.isSetId());
    tbl.unsetId();
    validateCreateTable(tbl, tblName, dbName);
    // Table creation should succeed even if location is specified
    if (mayTestLocation()) {
        driver.run("use " + dbName);
        driver.run(String.format("create table %s (a string) partitioned by (b string) location '" + tblLocation + "'", tblNameLoc));
        Table tblLoc = msc.getTable(dbName, tblNameLoc);
        validateCreateTable(tblLoc, tblNameLoc, dbName);
    }
    String fakeUser = "mal";
    List<String> fakeGroupNames = new ArrayList<String>();
    fakeGroupNames.add("groupygroup");
    InjectableDummyAuthenticator.injectUserName(fakeUser);
    InjectableDummyAuthenticator.injectGroupNames(fakeGroupNames);
    InjectableDummyAuthenticator.injectMode(true);
    try {
        driver.run(String.format("create table %s (a string) partitioned by (b string)", tblName + "mal"));
    } catch (CommandProcessorException e) {
        assertEquals(40000, e.getResponseCode());
    }
    ttbl.setTableName(tblName + "mal");
    me = null;
    try {
        msc.createTable(ttbl);
    } catch (MetaException e) {
        me = e;
    }
    assertNoPrivileges(me);
    allowCreateInTbl(tbl.getTableName(), userName, tbl.getSd().getLocation());
    disallowCreateInTbl(tbl.getTableName(), userName, tbl.getSd().getLocation());
    try {
        driver.run("alter table " + tblName + " add partition (b='2011')");
    } catch (CommandProcessorException e) {
        assertEquals(40000, e.getResponseCode());
    }
    List<String> ptnVals = new ArrayList<String>();
    ptnVals.add("b=2011");
    Partition tpart = new Partition();
    tpart.setDbName(dbName);
    tpart.setTableName(tblName);
    tpart.setValues(ptnVals);
    tpart.setParameters(new HashMap<String, String>());
    tpart.setSd(tbl.getSd().deepCopy());
    tpart.getSd().setSerdeInfo(tbl.getSd().getSerdeInfo().deepCopy());
    tpart.getSd().setLocation(tbl.getSd().getLocation() + "/tpart");
    me = null;
    try {
        msc.add_partition(tpart);
    } catch (MetaException e) {
        me = e;
    }
    assertNoPrivileges(me);
    InjectableDummyAuthenticator.injectMode(false);
    allowCreateInTbl(tbl.getTableName(), userName, tbl.getSd().getLocation());
    driver.run("alter table " + tblName + " add partition (b='2011')");
    String proxyUserName = getProxyUserName();
    if (proxyUserName != null) {
        // for storage based authorization, user having proxy privilege should be allowed to do operation
        // even if the file permission is not there.
        InjectableDummyAuthenticator.injectUserName(proxyUserName);
        InjectableDummyAuthenticator.injectGroupNames(Collections.singletonList(proxyUserName));
        InjectableDummyAuthenticator.injectMode(true);
        disallowCreateInTbl(tbl.getTableName(), proxyUserName, tbl.getSd().getLocation());
        driver.run("alter table " + tblName + " add partition (b='2012')");
        InjectableDummyAuthenticator.injectMode(false);
    }
    allowDropOnTable(tblName, userName, tbl.getSd().getLocation());
    allowDropOnDb(dbName, userName, db.getLocationUri());
    driver.run("drop database if exists " + getTestDbName() + " cascade");
    InjectableDummyAuthenticator.injectUserName(userName);
    InjectableDummyAuthenticator.injectGroupNames(Arrays.asList(ugi.getGroupNames()));
    InjectableDummyAuthenticator.injectMode(true);
    allowCreateDatabase(userName);
    driver.run("create database " + dbName);
    db = msc.getDatabase(dbName);
    dbLocn = db.getLocationUri();
    allowCreateInDb(dbName, userName, dbLocn);
    dbLocn = db.getManagedLocationUri();
    if (dbLocn != null) {
        allowCreateInDb(dbName, userName, dbLocn);
    }
    tbl.setTableType("EXTERNAL_TABLE");
    msc.createTable(tbl);
    allowDropOnTable(tblName, userName, tbl.getSd().getLocation());
    disallowDropOnTable(tblName, userName, tbl.getSd().getLocation());
    try {
        driver.run("drop table " + tbl.getTableName());
    } catch (CommandProcessorException e) {
        assertEquals(40000, e.getResponseCode());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) CommandProcessorException(org.apache.hadoop.hive.ql.processors.CommandProcessorException) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) HiveOutputFormat(org.apache.hadoop.hive.ql.io.HiveOutputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) Database(org.apache.hadoop.hive.metastore.api.Database) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Test(org.junit.Test)

Example 83 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class TestReplChangeManager method testRecyclePartTable.

@Test
public void testRecyclePartTable() throws Exception {
    // Create db1/t1/dt=20160101/part
    // /dt=20160102/part
    // /dt=20160103/part
    // Test: recycle single file (dt=20160101/part)
    // recycle single partition (dt=20160102)
    // recycle table t1
    String dbName = "db1";
    client.dropDatabase(dbName, true, true);
    Database db = new Database();
    db.putToParameters(SOURCE_OF_REPLICATION, "1,2,3");
    db.setName(dbName);
    client.createDatabase(db);
    String tblName = "t1";
    List<FieldSchema> columns = new ArrayList<FieldSchema>();
    columns.add(new FieldSchema("foo", "string", ""));
    columns.add(new FieldSchema("bar", "string", ""));
    List<FieldSchema> partColumns = new ArrayList<FieldSchema>();
    partColumns.add(new FieldSchema("dt", "string", ""));
    SerDeInfo serdeInfo = new SerDeInfo("LBCSerDe", LazyBinaryColumnarSerDe.class.getCanonicalName(), new HashMap<String, String>());
    StorageDescriptor sd = new StorageDescriptor(columns, null, "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", false, 0, serdeInfo, null, null, null);
    Map<String, String> tableParameters = new HashMap<String, String>();
    Table tbl = new Table(tblName, dbName, "", 0, 0, 0, sd, partColumns, tableParameters, "", "", "");
    client.createTable(tbl);
    List<String> values = Arrays.asList("20160101");
    Partition part1 = createPartition(dbName, tblName, columns, values, serdeInfo);
    client.add_partition(part1);
    values = Arrays.asList("20160102");
    Partition part2 = createPartition(dbName, tblName, columns, values, serdeInfo);
    client.add_partition(part2);
    values = Arrays.asList("20160103");
    Partition part3 = createPartition(dbName, tblName, columns, values, serdeInfo);
    client.add_partition(part3);
    Path part1Path = new Path(warehouse.getDefaultPartitionPath(db, tbl, ImmutableMap.of("dt", "20160101")), "part");
    createFile(part1Path, "p1");
    String path1Chksum = ReplChangeManager.checksumFor(part1Path, fs);
    Path part2Path = new Path(warehouse.getDefaultPartitionPath(db, tbl, ImmutableMap.of("dt", "20160102")), "part");
    createFile(part2Path, "p2");
    String path2Chksum = ReplChangeManager.checksumFor(part2Path, fs);
    Path part3Path = new Path(warehouse.getDefaultPartitionPath(db, tbl, ImmutableMap.of("dt", "20160103")), "part");
    createFile(part3Path, "p3");
    String path3Chksum = ReplChangeManager.checksumFor(part3Path, fs);
    assertTrue(part1Path.getFileSystem(hiveConf).exists(part1Path));
    assertTrue(part2Path.getFileSystem(hiveConf).exists(part2Path));
    assertTrue(part3Path.getFileSystem(hiveConf).exists(part3Path));
    ReplChangeManager cm = ReplChangeManager.getInstance(hiveConf);
    // verify cm.recycle(db, table, part) api moves file to cmroot dir
    int ret = cm.recycle(part1Path, RecycleType.MOVE, false);
    Assert.assertEquals(ret, 1);
    Path cmPart1Path = ReplChangeManager.getCMPath(hiveConf, part1Path.getName(), path1Chksum, cmroot.toString());
    assertTrue(cmPart1Path.getFileSystem(hiveConf).exists(cmPart1Path));
    // Verify dropPartition recycle part files
    client.dropPartition(dbName, tblName, Arrays.asList("20160102"));
    assertFalse(part2Path.getFileSystem(hiveConf).exists(part2Path));
    Path cmPart2Path = ReplChangeManager.getCMPath(hiveConf, part2Path.getName(), path2Chksum, cmroot.toString());
    assertTrue(cmPart2Path.getFileSystem(hiveConf).exists(cmPart2Path));
    // Verify dropTable recycle partition files
    client.dropTable(dbName, tblName);
    assertFalse(part3Path.getFileSystem(hiveConf).exists(part3Path));
    Path cmPart3Path = ReplChangeManager.getCMPath(hiveConf, part3Path.getName(), path3Chksum, cmroot.toString());
    assertTrue(cmPart3Path.getFileSystem(hiveConf).exists(cmPart3Path));
    client.dropDatabase(dbName, true, true);
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Database(org.apache.hadoop.hive.metastore.api.Database) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Test(org.junit.Test)

Example 84 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class TestTransactionalValidationListener method createOrcTable.

private Table createOrcTable(String catalog) throws Exception {
    Table table = new Table();
    StorageDescriptor sd = new StorageDescriptor();
    List<FieldSchema> cols = new ArrayList<>();
    table.setDbName("default");
    table.setTableName("test_table");
    cols.add(new FieldSchema("column_name", "int", null));
    sd.setCols(cols);
    sd.setSerdeInfo(new SerDeInfo());
    sd.setInputFormat("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat");
    sd.setOutputFormat("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat");
    table.setSd(sd);
    table.setCatName(catalog);
    table.setTableType("MANAGED_TABLE");
    client.createTable(table);
    Table createdTable = client.getTable(catalog, table.getDbName(), table.getTableName());
    return createdTable;
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList)

Example 85 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class AlterTableReplaceColumnsOperation method doAlteration.

@Override
protected void doAlteration(Table table, Partition partition) throws HiveException {
    StorageDescriptor sd = getStorageDescriptor(table, partition);
    // change SerDe to LazySimpleSerDe if it is columnsetSerDe
    String serializationLib = sd.getSerdeInfo().getSerializationLib();
    if ("org.apache.hadoop.hive.serde.thrift.columnsetSerDe".equals(serializationLib)) {
        context.getConsole().printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
        sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
    } else if (!VALID_SERIALIZATION_LIBS.contains(serializationLib)) {
        throw new HiveException(ErrorMsg.CANNOT_REPLACE_COLUMNS, desc.getDbTableName());
    }
    // adding columns and limited integer type promotion is not supported for ORC schema evolution
    boolean isOrcSchemaEvolution = serializationLib.equals(OrcSerde.class.getName()) && AlterTableUtils.isSchemaEvolutionEnabled(table, context.getConf());
    if (isOrcSchemaEvolution) {
        List<FieldSchema> existingCols = sd.getCols();
        List<FieldSchema> replaceCols = desc.getNewColumns();
        if (replaceCols.size() < existingCols.size()) {
            throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, desc.getDbTableName());
        }
    }
    boolean droppingColumns = desc.getNewColumns().size() < sd.getCols().size();
    if (ParquetHiveSerDe.isParquetTable(table) && AlterTableUtils.isSchemaEvolutionEnabled(table, context.getConf()) && !desc.isCascade() && droppingColumns && table.isPartitioned()) {
        LOG.warn("Cannot drop columns from a partitioned parquet table without the CASCADE option");
        throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, desc.getDbTableName());
    }
    sd.setCols(desc.getNewColumns());
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Aggregations

StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)284 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)163 Table (org.apache.hadoop.hive.metastore.api.Table)159 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)155 ArrayList (java.util.ArrayList)134 Test (org.junit.Test)131 Partition (org.apache.hadoop.hive.metastore.api.Partition)97 HashMap (java.util.HashMap)61 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)38 List (java.util.List)35 Order (org.apache.hadoop.hive.metastore.api.Order)33 Path (org.apache.hadoop.fs.Path)30 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)30 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)30 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)29 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)29 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)27 Database (org.apache.hadoop.hive.metastore.api.Database)25 SkewedInfo (org.apache.hadoop.hive.metastore.api.SkewedInfo)23 IOException (java.io.IOException)15