Search in sources :

Example 76 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class CompactorTest method newStorageDescriptor.

private StorageDescriptor newStorageDescriptor(String location, List<Order> sortCols) {
    StorageDescriptor sd = new StorageDescriptor();
    List<FieldSchema> cols = new ArrayList<FieldSchema>(2);
    cols.add(new FieldSchema("a", "varchar(25)", "still no comment"));
    cols.add(new FieldSchema("b", "int", "comment"));
    sd.setCols(cols);
    sd.setLocation(location);
    sd.setInputFormat(MockInputFormat.class.getName());
    sd.setOutputFormat(MockOutputFormat.class.getName());
    sd.setNumBuckets(1);
    SerDeInfo serde = new SerDeInfo();
    serde.setSerializationLib(LazySimpleSerDe.class.getName());
    sd.setSerdeInfo(serde);
    List<String> bucketCols = new ArrayList<String>(1);
    bucketCols.add("a");
    sd.setBucketCols(bucketCols);
    if (sortCols != null) {
        sd.setSortCols(sortCols);
    }
    return sd;
}
Also used : LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList)

Example 77 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestDbNotificationListener method createIndex.

@Test
public void createIndex() throws Exception {
    String indexName = "createIndex";
    String dbName = "default";
    String tableName = "createIndexTable";
    String indexTableName = tableName + "__" + indexName + "__";
    int startTime = (int) (System.currentTimeMillis() / 1000);
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema("col1", "int", ""));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    Map<String, String> params = new HashMap<String, String>();
    params.put("key", "value");
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17, serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params);
    Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, null, emptyParameters, null, null, null);
    // Event 1
    msClient.createTable(table);
    Index index = new Index(indexName, null, "default", tableName, startTime, startTime, indexTableName, sd, emptyParameters, false);
    Table indexTable = new Table(indexTableName, dbName, "me", startTime, startTime, 0, sd, null, emptyParameters, null, null, null);
    // Event 2, 3 (index table and index)
    msClient.createIndex(index, indexTable);
    // Get notifications from metastore
    NotificationEventResponse rsp = msClient.getNextNotification(firstEventId, 0, null);
    assertEquals(3, rsp.getEventsSize());
    NotificationEvent event = rsp.getEvents().get(2);
    assertEquals(firstEventId + 3, event.getEventId());
    assertTrue(event.getEventTime() >= startTime);
    assertEquals(EventType.CREATE_INDEX.toString(), event.getEventType());
    assertEquals(dbName, event.getDbName());
    // Parse the message field
    CreateIndexMessage createIdxMessage = md.getCreateIndexMessage(event.getMessage());
    assertEquals(dbName, createIdxMessage.getDB());
    Index indexObj = createIdxMessage.getIndexObj();
    assertEquals(dbName, indexObj.getDbName());
    assertEquals(indexName, indexObj.getIndexName());
    assertEquals(tableName, indexObj.getOrigTableName());
    assertEquals(indexTableName, indexObj.getIndexTableName());
    // When hive.metastore.transactional.event.listeners is set,
    // a failed event should not create a new notification
    DummyRawStoreFailEvent.setEventSucceed(false);
    index = new Index("createIndexTable2", null, "default", tableName, startTime, startTime, "createIndexTable2__createIndexTable2__", sd, emptyParameters, false);
    Table indexTable2 = new Table("createIndexTable2__createIndexTable2__", dbName, "me", startTime, startTime, 0, sd, null, emptyParameters, null, null, null);
    try {
        msClient.createIndex(index, indexTable2);
        fail("Error: create index should've failed");
    } catch (Exception ex) {
    // expected
    }
    rsp = msClient.getNextNotification(firstEventId, 0, null);
    assertEquals(3, rsp.getEventsSize());
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Index(org.apache.hadoop.hive.metastore.api.Index) NotificationEvent(org.apache.hadoop.hive.metastore.api.NotificationEvent) NotificationEventResponse(org.apache.hadoop.hive.metastore.api.NotificationEventResponse) CreateIndexMessage(org.apache.hadoop.hive.metastore.messaging.CreateIndexMessage) Test(org.junit.Test)

Example 78 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class HBaseUtils method serializeStorageDescriptor.

/**
   * Serialize a storage descriptor.
   * @param sd storage descriptor to serialize
   * @return serialized storage descriptor.
   */
static byte[] serializeStorageDescriptor(StorageDescriptor sd) {
    HbaseMetastoreProto.StorageDescriptor.Builder builder = HbaseMetastoreProto.StorageDescriptor.newBuilder();
    builder.addAllCols(convertFieldSchemaListToProto(sd.getCols()));
    if (sd.getInputFormat() != null) {
        builder.setInputFormat(sd.getInputFormat());
    }
    if (sd.getOutputFormat() != null) {
        builder.setOutputFormat(sd.getOutputFormat());
    }
    builder.setIsCompressed(sd.isCompressed());
    builder.setNumBuckets(sd.getNumBuckets());
    if (sd.getSerdeInfo() != null) {
        HbaseMetastoreProto.StorageDescriptor.SerDeInfo.Builder serdeBuilder = HbaseMetastoreProto.StorageDescriptor.SerDeInfo.newBuilder();
        SerDeInfo serde = sd.getSerdeInfo();
        if (serde.getName() != null) {
            serdeBuilder.setName(serde.getName());
        }
        if (serde.getSerializationLib() != null) {
            serdeBuilder.setSerializationLib(serde.getSerializationLib());
        }
        if (serde.getParameters() != null) {
            serdeBuilder.setParameters(buildParameters(serde.getParameters()));
        }
        builder.setSerdeInfo(serdeBuilder);
    }
    if (sd.getBucketCols() != null) {
        builder.addAllBucketCols(sd.getBucketCols());
    }
    if (sd.getSortCols() != null) {
        List<Order> orders = sd.getSortCols();
        List<HbaseMetastoreProto.StorageDescriptor.Order> protoList = new ArrayList<>(orders.size());
        for (Order order : orders) {
            protoList.add(HbaseMetastoreProto.StorageDescriptor.Order.newBuilder().setColumnName(order.getCol()).setOrder(order.getOrder()).build());
        }
        builder.addAllSortCols(protoList);
    }
    if (sd.getSkewedInfo() != null) {
        HbaseMetastoreProto.StorageDescriptor.SkewedInfo.Builder skewBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.newBuilder();
        SkewedInfo skewed = sd.getSkewedInfo();
        if (skewed.getSkewedColNames() != null) {
            skewBuilder.addAllSkewedColNames(skewed.getSkewedColNames());
        }
        if (skewed.getSkewedColValues() != null) {
            for (List<String> innerList : skewed.getSkewedColValues()) {
                HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueList.Builder listBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueList.newBuilder();
                listBuilder.addAllSkewedColValue(innerList);
                skewBuilder.addSkewedColValues(listBuilder);
            }
        }
        if (skewed.getSkewedColValueLocationMaps() != null) {
            for (Map.Entry<List<String>, String> e : skewed.getSkewedColValueLocationMaps().entrySet()) {
                HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueLocationMap.Builder mapBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueLocationMap.newBuilder();
                mapBuilder.addAllKey(e.getKey());
                mapBuilder.setValue(e.getValue());
                skewBuilder.addSkewedColValueLocationMaps(mapBuilder);
            }
        }
        builder.setSkewedInfo(skewBuilder);
    }
    builder.setStoredAsSubDirectories(sd.isStoredAsSubDirectories());
    return builder.build().toByteArray();
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 79 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestHBaseStoreCached method getPartitions.

@Test
public void getPartitions() throws Exception {
    String dbName = "default";
    String tableName = "manyParts";
    int startTime = (int) (System.currentTimeMillis() / 1000);
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema("col1", "int", "nocomment"));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
    List<FieldSchema> partCols = new ArrayList<FieldSchema>();
    partCols.add(new FieldSchema("pc", "string", ""));
    Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, emptyParameters, null, null, null);
    store.createTable(table);
    List<String> partVals = Arrays.asList("alan", "bob", "carl", "doug", "ethan");
    for (String val : partVals) {
        List<String> vals = new ArrayList<String>();
        vals.add(val);
        StorageDescriptor psd = new StorageDescriptor(sd);
        psd.setLocation("file:/tmp/pc=" + val);
        Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, emptyParameters);
        store.addPartition(part);
        Partition p = store.getPartition(dbName, tableName, vals);
        Assert.assertEquals("file:/tmp/pc=" + val, p.getSd().getLocation());
    }
    List<Partition> parts = store.getPartitions(dbName, tableName, -1);
    Assert.assertEquals(5, parts.size());
    String[] pv = new String[5];
    for (int i = 0; i < 5; i++) pv[i] = parts.get(i).getValues().get(0);
    Arrays.sort(pv);
    Assert.assertArrayEquals(pv, partVals.toArray(new String[5]));
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Test(org.junit.Test)

Example 80 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestHBaseStoreCached method booleanTableStatistics.

// Due to the way our mock stuff works, we can only insert one column at a time, so we'll test
// each stat type separately.  We'll test them together in hte integration tests.
@Test
public void booleanTableStatistics() throws Exception {
    long now = System.currentTimeMillis();
    String dbname = "default";
    String tableName = "statstable";
    String boolcol = "boolcol";
    int startTime = (int) (System.currentTimeMillis() / 1000);
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema(boolcol, "boolean", "nocomment"));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
    Table table = new Table(tableName, dbname, "me", startTime, startTime, 0, sd, null, emptyParameters, null, null, null);
    store.createTable(table);
    long trues = 37;
    long falses = 12;
    long booleanNulls = 2;
    ColumnStatistics stats = new ColumnStatistics();
    ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
    desc.setLastAnalyzed(now);
    desc.setDbName(dbname);
    desc.setTableName(tableName);
    desc.setIsTblLevel(true);
    stats.setStatsDesc(desc);
    ColumnStatisticsObj obj = new ColumnStatisticsObj();
    obj.setColName(boolcol);
    obj.setColType("boolean");
    ColumnStatisticsData data = new ColumnStatisticsData();
    BooleanColumnStatsData boolData = new BooleanColumnStatsData();
    boolData.setNumTrues(trues);
    boolData.setNumFalses(falses);
    boolData.setNumNulls(booleanNulls);
    data.setBooleanStats(boolData);
    obj.setStatsData(data);
    stats.addToStatsObj(obj);
    store.updateTableColumnStatistics(stats);
    stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol));
    Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(dbname, stats.getStatsDesc().getDbName());
    Assert.assertEquals(tableName, stats.getStatsDesc().getTableName());
    Assert.assertTrue(stats.getStatsDesc().isIsTblLevel());
    Assert.assertEquals(1, stats.getStatsObjSize());
    ColumnStatisticsData colData = obj.getStatsData();
    Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, colData.getSetField());
    boolData = colData.getBooleanStats();
    Assert.assertEquals(trues, boolData.getNumTrues());
    Assert.assertEquals(falses, boolData.getNumFalses());
    Assert.assertEquals(booleanNulls, boolData.getNumNulls());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Aggregations

SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)152 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)137 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)115 Table (org.apache.hadoop.hive.metastore.api.Table)114 ArrayList (java.util.ArrayList)112 Test (org.junit.Test)105 Partition (org.apache.hadoop.hive.metastore.api.Partition)65 HashMap (java.util.HashMap)44 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)31 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)31 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)31 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)31 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)30 List (java.util.List)26 Order (org.apache.hadoop.hive.metastore.api.Order)25 Database (org.apache.hadoop.hive.metastore.api.Database)22 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 MetastoreCheckinTest (org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)13 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)13 NotificationEvent (org.apache.hadoop.hive.metastore.api.NotificationEvent)12