Search in sources :

Example 61 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestTablesCreateDropAlterTruncate method testCreateTableDefaultLocationInSpecificDatabase.

@Test
public void testCreateTableDefaultLocationInSpecificDatabase() throws Exception {
    Table table = new Table();
    StorageDescriptor sd = new StorageDescriptor();
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    table.setDbName(OTHER_DATABASE);
    table.setTableName("test_table_2");
    cols.add(new FieldSchema("column_name", "int", null));
    sd.setCols(cols);
    sd.setSerdeInfo(new SerDeInfo());
    table.setSd(sd);
    client.createTable(table);
    Table createdTable = client.getTable(table.getDbName(), table.getTableName());
    Assert.assertEquals("Storage descriptor location", metaStore.getWarehouseRoot() + "/" + table.getDbName() + ".db/" + table.getTableName(), createdTable.getSd().getLocation());
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) Test(org.junit.Test) MetastoreCheckinTest(org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)

Example 62 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestHCatUtil method testGetTableSchemaWithPtnColsSerDeReportedFields.

/**
 * Hive represents tables in two ways:
 * <ul>
 *   <li>org.apache.hadoop.hive.metastore.api.Table - exactly whats stored in the metastore</li>
 *   <li>org.apache.hadoop.hive.ql.metadata.Table - adds business logic over api.Table</li>
 * </ul>
 * Here we check SerDe-reported fields are included in the table schema.
 */
@Test
public void testGetTableSchemaWithPtnColsSerDeReportedFields() throws IOException {
    Map<String, String> parameters = Maps.newHashMap();
    parameters.put(serdeConstants.SERIALIZATION_CLASS, "org.apache.hadoop.hive.serde2.thrift.test.IntString");
    parameters.put(serdeConstants.SERIALIZATION_FORMAT, "org.apache.thrift.protocol.TBinaryProtocol");
    SerDeInfo serDeInfo = new SerDeInfo(null, "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer", parameters);
    // StorageDescriptor has an empty list of fields - SerDe will report them.
    StorageDescriptor sd = new StorageDescriptor(new ArrayList<FieldSchema>(), "location", "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", false, -1, serDeInfo, new ArrayList<String>(), new ArrayList<Order>(), new HashMap<String, String>());
    org.apache.hadoop.hive.metastore.api.Table apiTable = new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", 0, 0, 0, sd, new ArrayList<FieldSchema>(), new HashMap<String, String>(), "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name());
    Table table = new Table(apiTable);
    List<HCatFieldSchema> expectedHCatSchema = Lists.newArrayList(new HCatFieldSchema("myint", HCatFieldSchema.Type.INT, null), new HCatFieldSchema("mystring", HCatFieldSchema.Type.STRING, null), new HCatFieldSchema("underscore_int", HCatFieldSchema.Type.INT, null));
    Assert.assertEquals(new HCatSchema(expectedHCatSchema), HCatUtil.getTableSchemaWithPtnCols(table));
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) Table(org.apache.hadoop.hive.ql.metadata.Table) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) Test(org.junit.Test)

Example 63 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class TestHCatOutputFormat method initTable.

private void initTable() throws Exception {
    try {
        client.dropTable(dbName, tblName);
    } catch (Exception e) {
    }
    try {
        client.dropDatabase(dbName);
    } catch (Exception e) {
    }
    client.createDatabase(new Database(dbName, "", null, null));
    assertNotNull((client.getDatabase(dbName).getLocationUri()));
    List<FieldSchema> fields = new ArrayList<FieldSchema>();
    fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, ""));
    Table tbl = new Table();
    tbl.setDbName(dbName);
    tbl.setTableName(tblName);
    StorageDescriptor sd = new StorageDescriptor();
    sd.setCols(Lists.newArrayList(new FieldSchema("data_column", serdeConstants.STRING_TYPE_NAME, "")));
    tbl.setSd(sd);
    sd.setInputFormat(RCFileInputFormat.class.getName());
    sd.setOutputFormat(RCFileOutputFormat.class.getName());
    sd.setParameters(new HashMap<String, String>());
    sd.getParameters().put("test_param_1", "Use this for comments etc");
    // sd.setBucketCols(new ArrayList<String>(2));
    // sd.getBucketCols().add("name");
    sd.setSerdeInfo(new SerDeInfo());
    sd.getSerdeInfo().setName(tbl.getTableName());
    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
    sd.getSerdeInfo().setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
    tbl.setPartitionKeys(fields);
    Map<String, String> tableParams = new HashMap<String, String>();
    tableParams.put("hcat.testarg", "testArgValue");
    tbl.setParameters(tableParams);
    client.createTable(tbl);
    Path tblPath = new Path(client.getTable(dbName, tblName).getSd().getLocation());
    assertTrue(tblPath.getFileSystem(hiveConf).mkdirs(new Path(tblPath, "colname=p1")));
}
Also used : Path(org.apache.hadoop.fs.Path) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Database(org.apache.hadoop.hive.metastore.api.Database)

Example 64 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class ObjectStore method getSerDeInfo.

@Override
public SerDeInfo getSerDeInfo(String serDeName) throws NoSuchObjectException, MetaException {
    boolean committed = false;
    try {
        openTransaction();
        MSerDeInfo mSerDeInfo = getMSerDeInfo(serDeName);
        if (mSerDeInfo == null) {
            throw new NoSuchObjectException("No SerDe named " + serDeName);
        }
        SerDeInfo serde = convertToSerDeInfo(mSerDeInfo);
        committed = commitTransaction();
        return serde;
    } finally {
        if (!committed)
            rollbackTransaction();
        ;
    }
}
Also used : MSerDeInfo(org.apache.hadoop.hive.metastore.model.MSerDeInfo) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MSerDeInfo(org.apache.hadoop.hive.metastore.model.MSerDeInfo)

Example 65 with SerDeInfo

use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.

the class MetaStoreUtils method hashStorageDescriptor.

/**
 * Produce a hash for the storage descriptor
 * @param sd storage descriptor to hash
 * @param md message descriptor to use to generate the hash
 * @return the hash as a byte array
 */
public static synchronized byte[] hashStorageDescriptor(StorageDescriptor sd, MessageDigest md) {
    // Note all maps and lists have to be absolutely sorted.  Otherwise we'll produce different
    // results for hashes based on the OS or JVM being used.
    md.reset();
    // In case cols are null
    if (sd.getCols() != null) {
        for (FieldSchema fs : sd.getCols()) {
            md.update(fs.getName().getBytes(ENCODING));
            md.update(fs.getType().getBytes(ENCODING));
            if (fs.getComment() != null) {
                md.update(fs.getComment().getBytes(ENCODING));
            }
        }
    }
    if (sd.getInputFormat() != null) {
        md.update(sd.getInputFormat().getBytes(ENCODING));
    }
    if (sd.getOutputFormat() != null) {
        md.update(sd.getOutputFormat().getBytes(ENCODING));
    }
    md.update(sd.isCompressed() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING));
    md.update(Integer.toString(sd.getNumBuckets()).getBytes(ENCODING));
    if (sd.getSerdeInfo() != null) {
        SerDeInfo serde = sd.getSerdeInfo();
        if (serde.getName() != null) {
            md.update(serde.getName().getBytes(ENCODING));
        }
        if (serde.getSerializationLib() != null) {
            md.update(serde.getSerializationLib().getBytes(ENCODING));
        }
        if (serde.getParameters() != null) {
            SortedMap<String, String> params = new TreeMap<>(serde.getParameters());
            for (Map.Entry<String, String> param : params.entrySet()) {
                md.update(param.getKey().getBytes(ENCODING));
                md.update(param.getValue().getBytes(ENCODING));
            }
        }
    }
    if (sd.getBucketCols() != null) {
        List<String> bucketCols = new ArrayList<>(sd.getBucketCols());
        for (String bucket : bucketCols) {
            md.update(bucket.getBytes(ENCODING));
        }
    }
    if (sd.getSortCols() != null) {
        SortedSet<Order> orders = new TreeSet<>(sd.getSortCols());
        for (Order order : orders) {
            md.update(order.getCol().getBytes(ENCODING));
            md.update(Integer.toString(order.getOrder()).getBytes(ENCODING));
        }
    }
    if (sd.getSkewedInfo() != null) {
        SkewedInfo skewed = sd.getSkewedInfo();
        if (skewed.getSkewedColNames() != null) {
            SortedSet<String> colnames = new TreeSet<>(skewed.getSkewedColNames());
            for (String colname : colnames) {
                md.update(colname.getBytes(ENCODING));
            }
        }
        if (skewed.getSkewedColValues() != null) {
            SortedSet<String> sortedOuterList = new TreeSet<>();
            for (List<String> innerList : skewed.getSkewedColValues()) {
                SortedSet<String> sortedInnerList = new TreeSet<>(innerList);
                sortedOuterList.add(org.apache.commons.lang.StringUtils.join(sortedInnerList, "."));
            }
            for (String colval : sortedOuterList) {
                md.update(colval.getBytes(ENCODING));
            }
        }
        if (skewed.getSkewedColValueLocationMaps() != null) {
            SortedMap<String, String> sortedMap = new TreeMap<>();
            for (Map.Entry<List<String>, String> smap : skewed.getSkewedColValueLocationMaps().entrySet()) {
                SortedSet<String> sortedKey = new TreeSet<>(smap.getKey());
                sortedMap.put(org.apache.commons.lang.StringUtils.join(sortedKey, "."), smap.getValue());
            }
            for (Map.Entry<String, String> e : sortedMap.entrySet()) {
                md.update(e.getKey().getBytes(ENCODING));
                md.update(e.getValue().getBytes(ENCODING));
            }
        }
        md.update(sd.isStoredAsSubDirectories() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING));
    }
    return md.digest();
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) TreeSet(java.util.TreeSet) MachineList(org.apache.hadoop.util.MachineList) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Aggregations

SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)152 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)137 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)115 Table (org.apache.hadoop.hive.metastore.api.Table)114 ArrayList (java.util.ArrayList)112 Test (org.junit.Test)105 Partition (org.apache.hadoop.hive.metastore.api.Partition)65 HashMap (java.util.HashMap)44 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)31 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)31 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)31 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)31 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)30 List (java.util.List)26 Order (org.apache.hadoop.hive.metastore.api.Order)25 Database (org.apache.hadoop.hive.metastore.api.Database)22 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 MetastoreCheckinTest (org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)13 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)13 NotificationEvent (org.apache.hadoop.hive.metastore.api.NotificationEvent)12