Search in sources :

Example 46 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestHBaseStore method skewInfo.

@Test
public void skewInfo() throws Exception {
    String tableName = "mytable";
    int startTime = (int) (System.currentTimeMillis() / 1000);
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema("col1", "int", ""));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", true, 0, serde, null, null, emptyParameters);
    Map<List<String>, String> map = new HashMap<List<String>, String>();
    map.put(Arrays.asList("col3"), "col4");
    SkewedInfo skew = new SkewedInfo(Arrays.asList("col1"), Arrays.asList(Arrays.asList("col2")), map);
    sd.setSkewedInfo(skew);
    Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, emptyParameters, null, null, null);
    store.createTable(table);
    Table t = store.getTable("default", tableName);
    Assert.assertEquals(1, t.getSd().getColsSize());
    Assert.assertEquals("col1", t.getSd().getCols().get(0).getName());
    Assert.assertEquals("int", t.getSd().getCols().get(0).getType());
    Assert.assertEquals("", t.getSd().getCols().get(0).getComment());
    Assert.assertEquals("serde", t.getSd().getSerdeInfo().getName());
    Assert.assertEquals("seriallib", t.getSd().getSerdeInfo().getSerializationLib());
    Assert.assertEquals("file:/tmp", t.getSd().getLocation());
    Assert.assertEquals("input", t.getSd().getInputFormat());
    Assert.assertEquals("output", t.getSd().getOutputFormat());
    Assert.assertTrue(t.getSd().isCompressed());
    Assert.assertEquals(0, t.getSd().getNumBuckets());
    Assert.assertEquals(0, t.getSd().getSortColsSize());
    Assert.assertEquals("me", t.getOwner());
    Assert.assertEquals("default", t.getDbName());
    Assert.assertEquals(tableName, t.getTableName());
    Assert.assertEquals(0, t.getParametersSize());
    skew = t.getSd().getSkewedInfo();
    Assert.assertNotNull(skew);
    Assert.assertEquals(1, skew.getSkewedColNamesSize());
    Assert.assertEquals("col1", skew.getSkewedColNames().get(0));
    Assert.assertEquals(1, skew.getSkewedColValuesSize());
    Assert.assertEquals("col2", skew.getSkewedColValues().get(0).get(0));
    Assert.assertEquals(1, skew.getSkewedColValueLocationMapsSize());
    Assert.assertEquals("col4", skew.getSkewedColValueLocationMaps().get(Arrays.asList("col3")));
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 47 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestHBaseStore method createMultiColumnTable.

private Table createMultiColumnTable(String tblName, String... types) throws Exception {
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    for (int i = 0; i < types.length; i++) cols.add(new FieldSchema("col" + i, types[i], ""));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    Map<String, String> params = new HashMap<String, String>();
    params.put("key", "value");
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17, serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params);
    int currentTime = (int) (System.currentTimeMillis() / 1000);
    Table table = new Table(tblName, DB, "me", currentTime, currentTime, 0, sd, cols, emptyParameters, null, null, null);
    store.createTable(table);
    return table;
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 48 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project incubator-atlas by apache.

the class HiveMetaStoreBridge method fillStorageDesc.

public Referenceable fillStorageDesc(StorageDescriptor storageDesc, String tableQualifiedName, String sdQualifiedName, Id tableId) throws AtlasHookException {
    LOG.debug("Filling storage descriptor information for {}", storageDesc);
    Referenceable sdReferenceable = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.getName());
    sdReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, sdQualifiedName);
    SerDeInfo serdeInfo = storageDesc.getSerdeInfo();
    LOG.debug("serdeInfo = {}", serdeInfo);
    // SkewedInfo skewedInfo = storageDesc.getSkewedInfo();
    String serdeInfoName = HiveDataTypes.HIVE_SERDE.getName();
    Struct serdeInfoStruct = new Struct(serdeInfoName);
    serdeInfoStruct.set(AtlasClient.NAME, serdeInfo.getName());
    serdeInfoStruct.set("serializationLib", serdeInfo.getSerializationLib());
    serdeInfoStruct.set(PARAMETERS, serdeInfo.getParameters());
    sdReferenceable.set("serdeInfo", serdeInfoStruct);
    sdReferenceable.set(STORAGE_NUM_BUCKETS, storageDesc.getNumBuckets());
    sdReferenceable.set(STORAGE_IS_STORED_AS_SUB_DIRS, storageDesc.isStoredAsSubDirectories());
    List<Struct> sortColsStruct = new ArrayList<>();
    for (Order sortcol : storageDesc.getSortCols()) {
        String hiveOrderName = HiveDataTypes.HIVE_ORDER.getName();
        Struct colStruct = new Struct(hiveOrderName);
        colStruct.set("col", sortcol.getCol());
        colStruct.set("order", sortcol.getOrder());
        sortColsStruct.add(colStruct);
    }
    if (sortColsStruct.size() > 0) {
        sdReferenceable.set("sortCols", sortColsStruct);
    }
    sdReferenceable.set(LOCATION, storageDesc.getLocation());
    sdReferenceable.set("inputFormat", storageDesc.getInputFormat());
    sdReferenceable.set("outputFormat", storageDesc.getOutputFormat());
    sdReferenceable.set("compressed", storageDesc.isCompressed());
    if (storageDesc.getBucketCols().size() > 0) {
        sdReferenceable.set("bucketCols", storageDesc.getBucketCols());
    }
    sdReferenceable.set(PARAMETERS, storageDesc.getParameters());
    sdReferenceable.set("storedAsSubDirectories", storageDesc.isStoredAsSubDirectories());
    sdReferenceable.set(TABLE, tableId);
    return sdReferenceable;
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) Referenceable(org.apache.atlas.typesystem.Referenceable) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) Struct(org.apache.atlas.typesystem.Struct)

Example 49 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project metacat by Netflix.

the class HiveConnectorInfoConverter method fromStorageInfo.

private StorageDescriptor fromStorageInfo(final StorageInfo storageInfo, final List<FieldSchema> cols) {
    if (storageInfo == null) {
        return new StorageDescriptor(Collections.emptyList(), "", null, null, false, 0, new SerDeInfo("", null, new HashMap<>()), Collections.emptyList(), Collections.emptyList(), new HashMap<>());
    }
    // Set all required fields to a non-null value
    final String inputFormat = storageInfo.getInputFormat();
    final String location = notNull(storageInfo.getUri()) ? storageInfo.getUri() : "";
    final String outputFormat = storageInfo.getOutputFormat();
    final Map<String, String> sdParams = notNull(storageInfo.getParameters()) ? storageInfo.getParameters() : new HashMap<>();
    final Map<String, String> serdeParams = notNull(storageInfo.getSerdeInfoParameters()) ? storageInfo.getSerdeInfoParameters() : new HashMap<>();
    final String serializationLib = storageInfo.getSerializationLib();
    return new StorageDescriptor(cols, location, inputFormat, outputFormat, false, 0, new SerDeInfo("", serializationLib, serdeParams), Collections.emptyList(), Collections.emptyList(), sdParams);
}
Also used : HashMap(java.util.HashMap) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 50 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestDbNotificationListener method exchangePartition.

@Test
public void exchangePartition() throws Exception {
    String dbName = "default";
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema("col1", "int", "nocomment"));
    List<FieldSchema> partCols = new ArrayList<FieldSchema>();
    partCols.add(new FieldSchema("part", "int", ""));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd1 = new StorageDescriptor(cols, "file:/tmp/1", "input", "output", false, 0, serde, null, null, emptyParameters);
    Table tab1 = new Table("tab1", dbName, "me", startTime, startTime, 0, sd1, partCols, emptyParameters, null, null, null);
    msClient.createTable(tab1);
    NotificationEventResponse rsp = msClient.getNextNotification(firstEventId, 0, null);
    // add_table
    assertEquals(1, rsp.getEventsSize());
    StorageDescriptor sd2 = new StorageDescriptor(cols, "file:/tmp/2", "input", "output", false, 0, serde, null, null, emptyParameters);
    Table tab2 = new Table("tab2", dbName, "me", startTime, startTime, 0, sd2, partCols, emptyParameters, null, null, // add_table
    null);
    msClient.createTable(tab2);
    rsp = msClient.getNextNotification(firstEventId + 1, 0, null);
    assertEquals(1, rsp.getEventsSize());
    StorageDescriptor sd1part = new StorageDescriptor(cols, "file:/tmp/1/part=1", "input", "output", false, 0, serde, null, null, emptyParameters);
    StorageDescriptor sd2part = new StorageDescriptor(cols, "file:/tmp/1/part=2", "input", "output", false, 0, serde, null, null, emptyParameters);
    StorageDescriptor sd3part = new StorageDescriptor(cols, "file:/tmp/1/part=3", "input", "output", false, 0, serde, null, null, emptyParameters);
    Partition part1 = new Partition(Arrays.asList("1"), "default", tab1.getTableName(), startTime, startTime, sd1part, emptyParameters);
    Partition part2 = new Partition(Arrays.asList("2"), "default", tab1.getTableName(), startTime, startTime, sd2part, emptyParameters);
    Partition part3 = new Partition(Arrays.asList("3"), "default", tab1.getTableName(), startTime, startTime, sd3part, emptyParameters);
    msClient.add_partitions(Arrays.asList(part1, part2, part3));
    rsp = msClient.getNextNotification(firstEventId + 2, 0, null);
    // add_partition
    assertEquals(1, rsp.getEventsSize());
    msClient.exchange_partition(ImmutableMap.of("part", "1"), dbName, tab1.getTableName(), dbName, tab2.getTableName());
    rsp = msClient.getNextNotification(firstEventId + 3, 0, null);
    assertEquals(2, rsp.getEventsSize());
    NotificationEvent event = rsp.getEvents().get(0);
    assertEquals(firstEventId + 4, event.getEventId());
    assertTrue(event.getEventTime() >= startTime);
    assertEquals(EventType.ADD_PARTITION.toString(), event.getEventType());
    assertEquals(dbName, event.getDbName());
    assertEquals(tab2.getTableName(), event.getTableName());
    // Parse the message field
    AddPartitionMessage addPtnMsg = md.getAddPartitionMessage(event.getMessage());
    assertEquals(dbName, addPtnMsg.getDB());
    assertEquals(tab2.getTableName(), addPtnMsg.getTable());
    Iterator<Partition> ptnIter = addPtnMsg.getPartitionObjs().iterator();
    assertEquals(TableType.MANAGED_TABLE.toString(), addPtnMsg.getTableType());
    assertTrue(ptnIter.hasNext());
    Partition msgPart = ptnIter.next();
    assertEquals(part1.getValues(), msgPart.getValues());
    assertEquals(dbName, msgPart.getDbName());
    assertEquals(tab2.getTableName(), msgPart.getTableName());
    event = rsp.getEvents().get(1);
    assertEquals(firstEventId + 5, event.getEventId());
    assertTrue(event.getEventTime() >= startTime);
    assertEquals(EventType.DROP_PARTITION.toString(), event.getEventType());
    assertEquals(dbName, event.getDbName());
    assertEquals(tab1.getTableName(), event.getTableName());
    // Parse the message field
    DropPartitionMessage dropPtnMsg = md.getDropPartitionMessage(event.getMessage());
    assertEquals(dbName, dropPtnMsg.getDB());
    assertEquals(tab1.getTableName(), dropPtnMsg.getTable());
    assertEquals(TableType.MANAGED_TABLE.toString(), dropPtnMsg.getTableType());
    Iterator<Map<String, String>> parts = dropPtnMsg.getPartitions().iterator();
    assertTrue(parts.hasNext());
    assertEquals(part1.getValues(), Lists.newArrayList(parts.next().values()));
    // Verify the eventID was passed to the non-transactional listener
    MockMetaStoreEventListener.popAndVerifyLastEventId(EventType.DROP_PARTITION, firstEventId + 5);
    MockMetaStoreEventListener.popAndVerifyLastEventId(EventType.ADD_PARTITION, firstEventId + 4);
    MockMetaStoreEventListener.popAndVerifyLastEventId(EventType.ADD_PARTITION, firstEventId + 3);
    MockMetaStoreEventListener.popAndVerifyLastEventId(EventType.CREATE_TABLE, firstEventId + 2);
    MockMetaStoreEventListener.popAndVerifyLastEventId(EventType.CREATE_TABLE, firstEventId + 1);
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) NotificationEvent(org.apache.hadoop.hive.metastore.api.NotificationEvent) NotificationEventResponse(org.apache.hadoop.hive.metastore.api.NotificationEventResponse) DropPartitionMessage(org.apache.hadoop.hive.metastore.messaging.DropPartitionMessage) AddPartitionMessage(org.apache.hadoop.hive.metastore.messaging.AddPartitionMessage) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Aggregations

SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)152 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)137 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)115 Table (org.apache.hadoop.hive.metastore.api.Table)114 ArrayList (java.util.ArrayList)112 Test (org.junit.Test)105 Partition (org.apache.hadoop.hive.metastore.api.Partition)65 HashMap (java.util.HashMap)44 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)31 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)31 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)31 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)31 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)30 List (java.util.List)26 Order (org.apache.hadoop.hive.metastore.api.Order)25 Database (org.apache.hadoop.hive.metastore.api.Database)22 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 MetastoreCheckinTest (org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)13 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)13 NotificationEvent (org.apache.hadoop.hive.metastore.api.NotificationEvent)12