Search in sources :

Example 56 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestObjectStoreSchemaMethods method alterSchemaVersion.

@Test
public void alterSchemaVersion() throws TException {
    String dbName = createUniqueDatabaseForTest();
    String schemaName = "schema371234";
    int version = 1;
    SchemaVersion schemaVersion = objectStore.getSchemaVersion(new SchemaVersionDescriptor(new ISchemaName(dbName, schemaName), version));
    Assert.assertNull(schemaVersion);
    ISchema schema = new ISchemaBuilder().setSchemaType(SchemaType.AVRO).setName(schemaName).setDbName(dbName).build();
    objectStore.createISchema(schema);
    schemaVersion = new SchemaVersionBuilder().versionOf(schema).setVersion(version).addCol("a", ColumnType.INT_TYPE_NAME).addCol("b", ColumnType.FLOAT_TYPE_NAME).setState(SchemaVersionState.INITIATED).build();
    objectStore.addSchemaVersion(schemaVersion);
    schemaVersion = objectStore.getSchemaVersion(new SchemaVersionDescriptor(new ISchemaName(dbName, schemaName), version));
    Assert.assertNotNull(schemaVersion);
    Assert.assertEquals(schemaName, schemaVersion.getSchema().getSchemaName());
    Assert.assertEquals(dbName, schemaVersion.getSchema().getDbName());
    Assert.assertEquals(version, schemaVersion.getVersion());
    Assert.assertEquals(SchemaVersionState.INITIATED, schemaVersion.getState());
    schemaVersion.setState(SchemaVersionState.REVIEWED);
    String serdeName = "serde for " + schemaName;
    SerDeInfo serde = new SerDeInfo(serdeName, "", Collections.emptyMap());
    String serializer = "org.apache.hadoop.hive.metastore.test.Serializer";
    String deserializer = "org.apache.hadoop.hive.metastore.test.Deserializer";
    serde.setSerializerClass(serializer);
    serde.setDeserializerClass(deserializer);
    schemaVersion.setSerDe(serde);
    objectStore.alterSchemaVersion(new SchemaVersionDescriptor(new ISchemaName(dbName, schemaName), version), schemaVersion);
    schemaVersion = objectStore.getSchemaVersion(new SchemaVersionDescriptor(new ISchemaName(dbName, schemaName), version));
    Assert.assertNotNull(schemaVersion);
    Assert.assertEquals(schemaName, schemaVersion.getSchema().getSchemaName());
    Assert.assertEquals(dbName, schemaVersion.getSchema().getDbName());
    Assert.assertEquals(version, schemaVersion.getVersion());
    Assert.assertEquals(SchemaVersionState.REVIEWED, schemaVersion.getState());
    Assert.assertEquals(serdeName, schemaVersion.getSerDe().getName());
    Assert.assertEquals(serializer, schemaVersion.getSerDe().getSerializerClass());
    Assert.assertEquals(deserializer, schemaVersion.getSerDe().getDeserializerClass());
}
Also used : ISchemaBuilder(org.apache.hadoop.hive.metastore.client.builder.ISchemaBuilder) SchemaVersion(org.apache.hadoop.hive.metastore.api.SchemaVersion) ISchema(org.apache.hadoop.hive.metastore.api.ISchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) SchemaVersionDescriptor(org.apache.hadoop.hive.metastore.api.SchemaVersionDescriptor) ISchemaName(org.apache.hadoop.hive.metastore.api.ISchemaName) SchemaVersionBuilder(org.apache.hadoop.hive.metastore.client.builder.SchemaVersionBuilder) Test(org.junit.Test) MetastoreCheckinTest(org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)

Example 57 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestOldSchema method testPartitionOps.

/**
 * Tests partition operations
 */
@Test
public void testPartitionOps() throws Exception {
    String dbName = "default";
    String tableName = "snp";
    Database db1 = new Database(dbName, "description", "locationurl", null);
    store.createDatabase(db1);
    long now = System.currentTimeMillis();
    List<FieldSchema> cols = new ArrayList<>();
    cols.add(new FieldSchema("col1", "long", "nocomment"));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, Collections.emptyMap());
    List<FieldSchema> partCols = new ArrayList<>();
    partCols.add(new FieldSchema("ds", "string", ""));
    Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, Collections.emptyMap(), null, null, null);
    store.createTable(table);
    Deadline.startTimer("getPartition");
    for (int i = 0; i < 10; i++) {
        List<String> partVal = new ArrayList<>();
        partVal.add(String.valueOf(i));
        StorageDescriptor psd = new StorageDescriptor(sd);
        psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
        Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, Collections.emptyMap());
        store.addPartition(part);
        ColumnStatistics cs = new ColumnStatistics();
        ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
        desc.setLastAnalyzed(now);
        desc.setPartName("ds=" + String.valueOf(i));
        cs.setStatsDesc(desc);
        ColumnStatisticsObj obj = new ColumnStatisticsObj();
        obj.setColName("col1");
        obj.setColType("bigint");
        ColumnStatisticsData data = new ColumnStatisticsData();
        LongColumnStatsData dcsd = new LongColumnStatsData();
        dcsd.setHighValue(1000 + i);
        dcsd.setLowValue(-1000 - i);
        dcsd.setNumNulls(i);
        dcsd.setNumDVs(10 * i + 1);
        dcsd.setBitVectors(bitVectors[0]);
        data.setLongStats(dcsd);
        obj.setStatsData(data);
        cs.addToStatsObj(obj);
        store.updatePartitionColumnStatistics(cs, partVal);
    }
    Checker statChecker = new Checker() {

        @Override
        public void checkStats(AggrStats aggrStats) throws Exception {
            Assert.assertEquals(10, aggrStats.getPartsFound());
            Assert.assertEquals(1, aggrStats.getColStatsSize());
            ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
            Assert.assertEquals("col1", cso.getColName());
            Assert.assertEquals("bigint", cso.getColType());
            LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
            Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
            Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
            Assert.assertEquals(45, lcsd.getNumNulls());
            Assert.assertEquals(91, lcsd.getNumDVs());
        }
    };
    List<String> partNames = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        partNames.add("ds=" + i);
    }
    AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, Arrays.asList("col1"));
    statChecker.checkStats(aggrStats);
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) AggrStats(org.apache.hadoop.hive.metastore.api.AggrStats) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) Database(org.apache.hadoop.hive.metastore.api.Database) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) MetastoreUnitTest(org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest) Test(org.junit.Test)

Example 58 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestCachedStore method testAggrStatsRepeatedRead.

@Test
public void testAggrStatsRepeatedRead() throws Exception {
    String dbName = "testTableColStatsOps";
    String tblName = "tbl";
    String colName = "f1";
    Database db = new Database(dbName, null, "some_location", null);
    cachedStore.createDatabase(db);
    List<FieldSchema> cols = new ArrayList<>();
    cols.add(new FieldSchema(colName, "int", null));
    List<FieldSchema> partCols = new ArrayList<>();
    partCols.add(new FieldSchema("col", "int", null));
    StorageDescriptor sd = new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap<>()), null, null, null);
    Table tbl = new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<>(), null, null, TableType.MANAGED_TABLE.toString());
    cachedStore.createTable(tbl);
    List<String> partVals1 = new ArrayList<>();
    partVals1.add("1");
    List<String> partVals2 = new ArrayList<>();
    partVals2.add("2");
    Partition ptn1 = new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap<>());
    cachedStore.addPartition(ptn1);
    Partition ptn2 = new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap<>());
    cachedStore.addPartition(ptn2);
    ColumnStatistics stats = new ColumnStatistics();
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName);
    statsDesc.setPartName("col");
    List<ColumnStatisticsObj> colStatObjs = new ArrayList<>();
    ColumnStatisticsData data = new ColumnStatisticsData();
    ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
    LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
    longStats.setLowValue(0);
    longStats.setHighValue(100);
    longStats.setNumNulls(50);
    longStats.setNumDVs(30);
    data.setLongStats(longStats);
    colStatObjs.add(colStats);
    stats.setStatsDesc(statsDesc);
    stats.setStatsObj(colStatObjs);
    cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1);
    cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2);
    List<String> colNames = new ArrayList<>();
    colNames.add(colName);
    List<String> aggrPartVals = new ArrayList<>();
    aggrPartVals.add("1");
    aggrPartVals.add("2");
    AggrStats aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
    aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) AggrStats(org.apache.hadoop.hive.metastore.api.AggrStats) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) Database(org.apache.hadoop.hive.metastore.api.Database) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test) MetastoreCheckinTest(org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)

Example 59 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestCachedStore method createTestTbl.

private Table createTestTbl(String dbName, String tblName, String tblOwner, List<FieldSchema> cols, List<FieldSchema> ptnCols) {
    String serdeLocation = "file:/tmp";
    Map<String, String> serdeParams = new HashMap<>();
    Map<String, String> tblParams = new HashMap<>();
    SerDeInfo serdeInfo = new SerDeInfo("serde", "seriallib", new HashMap<>());
    StorageDescriptor sd = new StorageDescriptor(cols, serdeLocation, "input", "output", false, 0, serdeInfo, null, null, serdeParams);
    sd.setStoredAsSubDirectories(false);
    Table tbl = new Table(tblName, dbName, tblOwner, 0, 0, 0, sd, ptnCols, tblParams, null, null, TableType.MANAGED_TABLE.toString());
    return tbl;
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 60 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestTablesCreateDropAlterTruncate method testCreateTableDefaultValues.

@Test
public void testCreateTableDefaultValues() throws Exception {
    Table table = new Table();
    StorageDescriptor sd = new StorageDescriptor();
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    table.setDbName(DEFAULT_DATABASE);
    table.setTableName("test_table_2");
    cols.add(new FieldSchema("column_name", "int", null));
    sd.setCols(cols);
    sd.setSerdeInfo(new SerDeInfo());
    table.setSd(sd);
    client.createTable(table);
    Table createdTable = client.getTable(table.getDbName(), table.getTableName());
    Assert.assertNull("Comparing OwnerName", createdTable.getOwner());
    Assert.assertNotEquals("Comparing CreateTime", 0, createdTable.getCreateTime());
    Assert.assertEquals("Comparing LastAccessTime", 0, createdTable.getLastAccessTime());
    Assert.assertEquals("Comparing Retention", 0, createdTable.getRetention());
    Assert.assertEquals("Comparing PartitionKeys", 0, createdTable.getPartitionKeys().size());
    // TODO: If this test method is the first to run, then the parameters does not contain totalSize
    // and numFiles, if this runs after other tests (setUp/dropDatabase is successful), then the
    // totalSize and the numFiles are set.
    Assert.assertEquals("Comparing Parameters length", 1, createdTable.getParameters().size());
    Assert.assertNotEquals("Comparing Parameters(transient_lastDdlTime)", "0", createdTable.getParameters().get("transient_lastDdlTime"));
    // Assert.assertEquals("Comparing Parameters(totalSize)", "0",
    // createdTable.getParameters().get("totalSize"));
    // Assert.assertEquals("Comparing Parameters(numFiles)", "0",
    // createdTable.getParameters().get("numFiles"));
    Assert.assertNull("Comparing ViewOriginalText", createdTable.getViewOriginalText());
    Assert.assertNull("Comparing ViewExpandedText", createdTable.getViewExpandedText());
    Assert.assertEquals("Comparing TableType", "MANAGED_TABLE", createdTable.getTableType());
    Assert.assertTrue("Creation metadata should be empty", createdTable.getCreationMetadata() == null);
    // Storage Descriptor data
    StorageDescriptor createdSd = createdTable.getSd();
    Assert.assertEquals("Storage descriptor cols", 1, createdSd.getCols().size());
    Assert.assertNull("Storage descriptor cols[0].comment", createdSd.getCols().get(0).getComment());
    Assert.assertEquals("Storage descriptor location", metaStore.getWarehouseRoot() + "/" + table.getTableName(), createdSd.getLocation());
    Assert.assertTrue("Table path should be created", metaStore.isPathExists(new Path(createdSd.getLocation())));
    // TODO: Embedded MetaStore changes the table object when client.createTable is called
    // Assert.assertNull("Original table storage descriptor location should be null",
    // table.getSd().getLocation());
    Assert.assertNull("Storage descriptor input format", createdSd.getInputFormat());
    Assert.assertNull("Storage descriptor output format", createdSd.getOutputFormat());
    Assert.assertFalse("Storage descriptor compressed", createdSd.isCompressed());
    Assert.assertEquals("Storage descriptor num buckets", 0, createdSd.getNumBuckets());
    Assert.assertEquals("Storage descriptor bucket cols", 0, createdSd.getBucketCols().size());
    Assert.assertEquals("Storage descriptor sort cols", 0, createdSd.getSortCols().size());
    Assert.assertEquals("Storage descriptor parameters", 0, createdSd.getParameters().size());
    Assert.assertFalse("Storage descriptor stored as subdir", createdSd.isStoredAsSubDirectories());
    // Serde info
    SerDeInfo serDeInfo = createdSd.getSerdeInfo();
    Assert.assertNull("SerDeInfo name", serDeInfo.getName());
    Assert.assertNull("SerDeInfo serialization lib", serDeInfo.getSerializationLib());
    Assert.assertEquals("SerDeInfo parameters", 0, serDeInfo.getParameters().size());
    // Skewed info
    SkewedInfo skewedInfo = createdSd.getSkewedInfo();
    Assert.assertEquals("Skewed info col names", 0, skewedInfo.getSkewedColNames().size());
    Assert.assertEquals("Skewed info col values", 0, skewedInfo.getSkewedColValues().size());
    Assert.assertEquals("Skewed info col value maps", 0, skewedInfo.getSkewedColValueLocationMaps().size());
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hive.metastore.api.Table) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) Test(org.junit.Test) MetastoreCheckinTest(org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)

Aggregations

SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)152 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)137 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)115 Table (org.apache.hadoop.hive.metastore.api.Table)114 ArrayList (java.util.ArrayList)112 Test (org.junit.Test)105 Partition (org.apache.hadoop.hive.metastore.api.Partition)65 HashMap (java.util.HashMap)44 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)31 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)31 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)31 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)31 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)30 List (java.util.List)26 Order (org.apache.hadoop.hive.metastore.api.Order)25 Database (org.apache.hadoop.hive.metastore.api.Database)22 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 MetastoreCheckinTest (org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)13 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)13 NotificationEvent (org.apache.hadoop.hive.metastore.api.NotificationEvent)12