Search in sources :

Example 91 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class TestHCatUtil method testGetTableSchemaWithPtnColsSerDeReportedFields.

/**
 * Hive represents tables in two ways:
 * <ul>
 *   <li>org.apache.hadoop.hive.metastore.api.Table - exactly whats stored in the metastore</li>
 *   <li>org.apache.hadoop.hive.ql.metadata.Table - adds business logic over api.Table</li>
 * </ul>
 * Here we check SerDe-reported fields are included in the table schema.
 */
@Test
public void testGetTableSchemaWithPtnColsSerDeReportedFields() throws IOException {
    Map<String, String> parameters = Maps.newHashMap();
    parameters.put(serdeConstants.SERIALIZATION_CLASS, "org.apache.hadoop.hive.serde2.thrift.test.IntString");
    parameters.put(serdeConstants.SERIALIZATION_FORMAT, "org.apache.thrift.protocol.TBinaryProtocol");
    SerDeInfo serDeInfo = new SerDeInfo(null, "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer", parameters);
    // StorageDescriptor has an empty list of fields - SerDe will report them.
    StorageDescriptor sd = new StorageDescriptor(new ArrayList<FieldSchema>(), "location", "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", false, -1, serDeInfo, new ArrayList<String>(), new ArrayList<Order>(), new HashMap<String, String>());
    org.apache.hadoop.hive.metastore.api.Table apiTable = new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", 0, 0, 0, sd, new ArrayList<FieldSchema>(), new HashMap<String, String>(), "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name());
    Table table = new Table(apiTable);
    List<HCatFieldSchema> expectedHCatSchema = Lists.newArrayList(new HCatFieldSchema("myint", HCatFieldSchema.Type.INT, null), new HCatFieldSchema("mystring", HCatFieldSchema.Type.STRING, null), new HCatFieldSchema("underscore_int", HCatFieldSchema.Type.INT, null));
    Assert.assertEquals(new HCatSchema(expectedHCatSchema), HCatUtil.getTableSchemaWithPtnCols(table));
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) Table(org.apache.hadoop.hive.ql.metadata.Table) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) Test(org.junit.Test)

Example 92 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class AlterTableSetSerdePropsOperation method doAlteration.

@Override
protected void doAlteration(Table table, Partition partition) throws HiveException {
    StorageDescriptor sd = getStorageDescriptor(table, partition);
    sd.getSerdeInfo().getParameters().putAll(desc.getProps());
}
Also used : StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 93 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class AlterTableNotClusteredOperation method doAlteration.

@Override
protected void doAlteration(Table table, Partition partition) throws HiveException {
    StorageDescriptor sd = getStorageDescriptor(table, partition);
    sd.setBucketCols(new ArrayList<String>());
    // -1 buckets means to turn off bucketing
    sd.setNumBuckets(-1);
    sd.setSortCols(new ArrayList<Order>());
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 94 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class AlterTableNotSortedOperation method doAlteration.

@Override
protected void doAlteration(Table table, Partition partition) throws HiveException {
    StorageDescriptor sd = getStorageDescriptor(table, partition);
    sd.setSortCols(new ArrayList<Order>());
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 95 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class HCatOutputFormat method setOutput.

/**
 * Set the information about the output to write for the job. This queries the metadata server
 * to find the StorageHandler to use for the table.  It throws an error if the
 * partition is already published.
 * @param conf the Configuration object
 * @param credentials the Credentials object
 * @param outputJobInfo the table output information for the job
 * @throws IOException the exception in communicating with the metadata server
 */
@SuppressWarnings("unchecked")
public static void setOutput(Configuration conf, Credentials credentials, OutputJobInfo outputJobInfo) throws IOException {
    IMetaStoreClient client = null;
    try {
        HiveConf hiveConf = HCatUtil.getHiveConf(conf);
        client = HCatUtil.getHiveMetastoreClient(hiveConf);
        Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), outputJobInfo.getTableName());
        StorageDescriptor sd = table.getTTable().getSd();
        if (sd.isCompressed()) {
            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported");
        }
        if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) {
            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported");
        }
        if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported");
        }
        if (AcidUtils.isTransactionalTable(table)) {
            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a transactional table " + table.getFullyQualifiedName() + " from Pig/Mapreduce is not supported");
        }
        // Set up a common id hash for this job, so that when we create any temporary directory
        // later on, it is guaranteed to be unique.
        String idHash;
        DecimalFormat df = new DecimalFormat("#.####################");
        if ((idHash = conf.get(HCatConstants.HCAT_OUTPUT_ID_HASH)) == null) {
            idHash = String.valueOf(df.format(Math.random()));
        }
        conf.set(HCatConstants.HCAT_OUTPUT_ID_HASH, idHash);
        if (table.getTTable().getPartitionKeysSize() == 0) {
            if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) {
                // attempt made to save partition values in non-partitioned table - throw error.
                throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Partition values specified for non-partitioned table");
            }
            // non-partitioned table
            outputJobInfo.setPartitionValues(new HashMap<String, String>());
        } else {
            // partitioned table, we expect partition values
            // convert user specified map to have lower case key names
            Map<String, String> valueMap = new HashMap<String, String>();
            if (outputJobInfo.getPartitionValues() != null) {
                for (Map.Entry<String, String> entry : outputJobInfo.getPartitionValues().entrySet()) {
                    valueMap.put(entry.getKey().toLowerCase(), entry.getValue());
                }
            }
            if ((outputJobInfo.getPartitionValues() == null) || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) {
                // dynamic partition usecase - partition values were null, or not all were specified
                // need to figure out which keys are not specified.
                List<String> dynamicPartitioningKeys = new ArrayList<String>();
                boolean firstItem = true;
                for (FieldSchema fs : table.getPartitionKeys()) {
                    if (!valueMap.containsKey(fs.getName().toLowerCase())) {
                        dynamicPartitioningKeys.add(fs.getName().toLowerCase());
                    }
                }
                if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) {
                    // If this isn't equal, then bogus key values have been inserted, error out.
                    throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified");
                }
                outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys);
                String dynHash;
                if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) {
                    dynHash = String.valueOf(Math.random());
                }
                conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash);
                // if custom pattern is set in case of dynamic partitioning, configure custom path
                String customPattern = conf.get(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN);
                if (customPattern != null) {
                    HCatFileUtil.setCustomPath(customPattern, outputJobInfo);
                }
            }
            outputJobInfo.setPartitionValues(valueMap);
        }
        // To get around hbase failure on single node, see BUG-4383
        conf.set("dfs.client.read.shortcircuit", "false");
        HCatSchema tableSchema = HCatUtil.extractSchema(table);
        StorerInfo storerInfo = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters());
        List<String> partitionCols = new ArrayList<String>();
        for (FieldSchema schema : table.getPartitionKeys()) {
            partitionCols.add(schema.getName());
        }
        HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo);
        // Serialize the output info into the configuration
        outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
        outputJobInfo.setOutputSchema(tableSchema);
        harRequested = getHarRequested(hiveConf);
        outputJobInfo.setHarRequested(harRequested);
        maxDynamicPartitions = getMaxDynamicPartitions(hiveConf);
        outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions);
        HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo);
        Path tblPath = new Path(table.getTTable().getSd().getLocation());
        /*  Set the umask in conf such that files/dirs get created with table-dir
      * permissions. Following three assumptions are made:
      * 1. Actual files/dirs creation is done by RecordWriter of underlying
      * output format. It is assumed that they use default permissions while creation.
      * 2. Default Permissions = FsPermission.getDefault() = 777.
      * 3. UMask is honored by underlying filesystem.
      */
        FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask(tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission()));
        if (Security.getInstance().isSecurityEnabled()) {
            Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested);
        }
    } catch (Exception e) {
        if (e instanceof HCatException) {
            throw (HCatException) e;
        } else {
            throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e);
        }
    } finally {
        HCatUtil.closeHiveClientQuietly(client);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) Table(org.apache.hadoop.hive.ql.metadata.Table) HashMap(java.util.HashMap) DecimalFormat(java.text.DecimalFormat) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) HCatException(org.apache.hive.hcatalog.common.HCatException) ArrayList(java.util.ArrayList) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)284 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)163 Table (org.apache.hadoop.hive.metastore.api.Table)159 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)155 ArrayList (java.util.ArrayList)134 Test (org.junit.Test)131 Partition (org.apache.hadoop.hive.metastore.api.Partition)97 HashMap (java.util.HashMap)61 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)38 List (java.util.List)35 Order (org.apache.hadoop.hive.metastore.api.Order)33 Path (org.apache.hadoop.fs.Path)30 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)30 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)30 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)29 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)29 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)27 Database (org.apache.hadoop.hive.metastore.api.Database)25 SkewedInfo (org.apache.hadoop.hive.metastore.api.SkewedInfo)23 IOException (java.io.IOException)15