Search in sources :

Example 6 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class HBaseUtils method hashStorageDescriptor.

/**
   * Produce a hash for the storage descriptor
   * @param sd storage descriptor to hash
   * @param md message descriptor to use to generate the hash
   * @return the hash as a byte array
   */
static byte[] hashStorageDescriptor(StorageDescriptor sd, MessageDigest md) {
    // Note all maps and lists have to be absolutely sorted.  Otherwise we'll produce different
    // results for hashes based on the OS or JVM being used.
    md.reset();
    for (FieldSchema fs : sd.getCols()) {
        md.update(fs.getName().getBytes(ENCODING));
        md.update(fs.getType().getBytes(ENCODING));
        if (fs.getComment() != null)
            md.update(fs.getComment().getBytes(ENCODING));
    }
    if (sd.getInputFormat() != null) {
        md.update(sd.getInputFormat().getBytes(ENCODING));
    }
    if (sd.getOutputFormat() != null) {
        md.update(sd.getOutputFormat().getBytes(ENCODING));
    }
    md.update(sd.isCompressed() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING));
    md.update(Integer.toString(sd.getNumBuckets()).getBytes(ENCODING));
    if (sd.getSerdeInfo() != null) {
        SerDeInfo serde = sd.getSerdeInfo();
        if (serde.getName() != null) {
            md.update(serde.getName().getBytes(ENCODING));
        }
        if (serde.getSerializationLib() != null) {
            md.update(serde.getSerializationLib().getBytes(ENCODING));
        }
        if (serde.getParameters() != null) {
            SortedMap<String, String> params = new TreeMap<>(serde.getParameters());
            for (Map.Entry<String, String> param : params.entrySet()) {
                md.update(param.getKey().getBytes(ENCODING));
                md.update(param.getValue().getBytes(ENCODING));
            }
        }
    }
    if (sd.getBucketCols() != null) {
        SortedSet<String> bucketCols = new TreeSet<>(sd.getBucketCols());
        for (String bucket : bucketCols) md.update(bucket.getBytes(ENCODING));
    }
    if (sd.getSortCols() != null) {
        SortedSet<Order> orders = new TreeSet<>(sd.getSortCols());
        for (Order order : orders) {
            md.update(order.getCol().getBytes(ENCODING));
            md.update(Integer.toString(order.getOrder()).getBytes(ENCODING));
        }
    }
    if (sd.getSkewedInfo() != null) {
        SkewedInfo skewed = sd.getSkewedInfo();
        if (skewed.getSkewedColNames() != null) {
            SortedSet<String> colnames = new TreeSet<>(skewed.getSkewedColNames());
            for (String colname : colnames) md.update(colname.getBytes(ENCODING));
        }
        if (skewed.getSkewedColValues() != null) {
            SortedSet<String> sortedOuterList = new TreeSet<>();
            for (List<String> innerList : skewed.getSkewedColValues()) {
                SortedSet<String> sortedInnerList = new TreeSet<>(innerList);
                sortedOuterList.add(StringUtils.join(sortedInnerList, "."));
            }
            for (String colval : sortedOuterList) md.update(colval.getBytes(ENCODING));
        }
        if (skewed.getSkewedColValueLocationMaps() != null) {
            SortedMap<String, String> sortedMap = new TreeMap<>();
            for (Map.Entry<List<String>, String> smap : skewed.getSkewedColValueLocationMaps().entrySet()) {
                SortedSet<String> sortedKey = new TreeSet<>(smap.getKey());
                sortedMap.put(StringUtils.join(sortedKey, "."), smap.getValue());
            }
            for (Map.Entry<String, String> e : sortedMap.entrySet()) {
                md.update(e.getKey().getBytes(ENCODING));
                md.update(e.getValue().getBytes(ENCODING));
            }
        }
    }
    return md.digest();
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ByteString(com.google.protobuf.ByteString) TreeMap(java.util.TreeMap) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) TreeSet(java.util.TreeSet) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 7 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestHBaseStoreBitVector method createMockTable.

private Table createMockTable(String name, String type) throws Exception {
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema(name, type, ""));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    Map<String, String> params = new HashMap<String, String>();
    params.put("key", "value");
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17, serde, new ArrayList<String>(), new ArrayList<Order>(), params);
    int currentTime = (int) (System.currentTimeMillis() / 1000);
    Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols, emptyParameters, null, null, null);
    store.createTable(table);
    return table;
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 8 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestHBaseStoreCached method createTable.

@Test
public void createTable() throws Exception {
    String tableName = "mytable";
    int startTime = (int) (System.currentTimeMillis() / 1000);
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema("col1", "int", "nocomment"));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
    Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, emptyParameters, null, null, null);
    store.createTable(table);
    Table t = store.getTable("default", tableName);
    Assert.assertEquals(1, t.getSd().getColsSize());
    Assert.assertEquals("col1", t.getSd().getCols().get(0).getName());
    Assert.assertEquals("int", t.getSd().getCols().get(0).getType());
    Assert.assertEquals("nocomment", t.getSd().getCols().get(0).getComment());
    Assert.assertEquals("serde", t.getSd().getSerdeInfo().getName());
    Assert.assertEquals("seriallib", t.getSd().getSerdeInfo().getSerializationLib());
    Assert.assertEquals("file:/tmp", t.getSd().getLocation());
    Assert.assertEquals("input", t.getSd().getInputFormat());
    Assert.assertEquals("output", t.getSd().getOutputFormat());
    Assert.assertEquals("me", t.getOwner());
    Assert.assertEquals("default", t.getDbName());
    Assert.assertEquals(tableName, t.getTableName());
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Test(org.junit.Test)

Example 9 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestHBaseStoreCached method createPartition.

@Test
public void createPartition() throws Exception {
    String dbName = "default";
    String tableName = "myparttable";
    int startTime = (int) (System.currentTimeMillis() / 1000);
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema("col1", "int", "nocomment"));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
    List<FieldSchema> partCols = new ArrayList<FieldSchema>();
    partCols.add(new FieldSchema("pc", "string", ""));
    Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, emptyParameters, null, null, null);
    store.createTable(table);
    List<String> vals = Arrays.asList("fred");
    StorageDescriptor psd = new StorageDescriptor(sd);
    psd.setLocation("file:/tmp/pc=fred");
    Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, emptyParameters);
    store.addPartition(part);
    Partition p = store.getPartition(dbName, tableName, vals);
    Assert.assertEquals(1, p.getSd().getColsSize());
    Assert.assertEquals("col1", p.getSd().getCols().get(0).getName());
    Assert.assertEquals("int", p.getSd().getCols().get(0).getType());
    Assert.assertEquals("nocomment", p.getSd().getCols().get(0).getComment());
    Assert.assertEquals("serde", p.getSd().getSerdeInfo().getName());
    Assert.assertEquals("seriallib", p.getSd().getSerdeInfo().getSerializationLib());
    Assert.assertEquals("file:/tmp/pc=fred", p.getSd().getLocation());
    Assert.assertEquals("input", p.getSd().getInputFormat());
    Assert.assertEquals("output", p.getSd().getOutputFormat());
    Assert.assertEquals(dbName, p.getDbName());
    Assert.assertEquals(tableName, p.getTableName());
    Assert.assertEquals(1, p.getValuesSize());
    Assert.assertEquals("fred", p.getValues().get(0));
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Test(org.junit.Test)

Example 10 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestHBaseStoreCached method dropPartition.

@Test
public void dropPartition() throws Exception {
    String dbName = "default";
    String tableName = "myparttable2";
    int startTime = (int) (System.currentTimeMillis() / 1000);
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema("col1", "int", "nocomment"));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
    List<FieldSchema> partCols = new ArrayList<FieldSchema>();
    partCols.add(new FieldSchema("pc", "string", ""));
    Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, emptyParameters, null, null, null);
    store.createTable(table);
    List<String> vals = Arrays.asList("fred");
    StorageDescriptor psd = new StorageDescriptor(sd);
    psd.setLocation("file:/tmp/pc=fred");
    Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, emptyParameters);
    store.addPartition(part);
    Assert.assertNotNull(store.getPartition(dbName, tableName, vals));
    store.dropPartition(dbName, tableName, vals);
    thrown.expect(NoSuchObjectException.class);
    store.getPartition(dbName, tableName, vals);
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Test(org.junit.Test)

Aggregations

SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)185 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)162 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)126 Table (org.apache.hadoop.hive.metastore.api.Table)124 ArrayList (java.util.ArrayList)121 Test (org.junit.Test)110 Partition (org.apache.hadoop.hive.metastore.api.Partition)65 HashMap (java.util.HashMap)51 Order (org.apache.hadoop.hive.metastore.api.Order)32 List (java.util.List)30 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)28 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)28 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)28 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)28 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)27 Database (org.apache.hadoop.hive.metastore.api.Database)22 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)22 SkewedInfo (org.apache.hadoop.hive.metastore.api.SkewedInfo)17 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)13 Path (org.apache.hadoop.fs.Path)12