Search in sources :

Example 56 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class TestHiveMetaTool method setUp.

@Override
protected void setUp() throws Exception {
    super.setUp();
    try {
        HiveConf hiveConf = new HiveConf(HiveMetaTool.class);
        client = new HiveMetaStoreClient(hiveConf);
        // Setup output stream to redirect output to
        os = new ByteArrayOutputStream();
        ps = new PrintStream(os);
        // create a dummy database and a couple of dummy tables
        Database db = new Database();
        db.setName(dbName);
        client.dropTable(dbName, tblName);
        client.dropTable(dbName, badTblName);
        dropDatabase(dbName);
        client.createDatabase(db);
        locationUri = db.getLocationUri();
        String avroUri = "hdfs://nn.example.com/warehouse/hive/ab.avsc";
        String badAvroUri = new String("hdfs:/hive");
        client.dropType(typeName);
        Type typ1 = new Type();
        typ1.setName(typeName);
        typ1.setFields(new ArrayList<FieldSchema>(2));
        typ1.getFields().add(new FieldSchema("name", serdeConstants.STRING_TYPE_NAME, ""));
        typ1.getFields().add(new FieldSchema("income", serdeConstants.INT_TYPE_NAME, ""));
        client.createType(typ1);
        Table tbl = new Table();
        tbl.setDbName(dbName);
        tbl.setTableName(tblName);
        Map<String, String> parameters = new HashMap<>();
        parameters.put(AvroSerdeUtils.SCHEMA_URL, avroUri);
        tbl.setParameters(parameters);
        StorageDescriptor sd = new StorageDescriptor();
        tbl.setSd(sd);
        sd.setCols(typ1.getFields());
        sd.setCompressed(false);
        sd.setNumBuckets(1);
        sd.setParameters(new HashMap<String, String>());
        sd.getParameters().put("test_param_1", "Use this for comments etc");
        sd.setBucketCols(new ArrayList<String>(2));
        sd.getBucketCols().add("name");
        sd.setSerdeInfo(new SerDeInfo());
        sd.getSerdeInfo().setName(tbl.getTableName());
        sd.getSerdeInfo().setParameters(new HashMap<String, String>());
        sd.getSerdeInfo().getParameters().put(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1");
        sd.getParameters().put(AvroSerdeUtils.SCHEMA_URL, avroUri);
        sd.getSerdeInfo().setSerializationLib(org.apache.hadoop.hive.serde2.avro.AvroSerDe.class.getName());
        sd.setInputFormat(AvroContainerInputFormat.class.getName());
        sd.setOutputFormat(AvroContainerOutputFormat.class.getName());
        tbl.setPartitionKeys(new ArrayList<FieldSchema>());
        client.createTable(tbl);
        // create a table with bad avro uri
        tbl = new Table();
        tbl.setDbName(dbName);
        tbl.setTableName(badTblName);
        sd = new StorageDescriptor();
        tbl.setSd(sd);
        sd.setCols(typ1.getFields());
        sd.setCompressed(false);
        sd.setNumBuckets(1);
        sd.setParameters(new HashMap<String, String>());
        sd.getParameters().put("test_param_1", "Use this for comments etc");
        sd.setBucketCols(new ArrayList<String>(2));
        sd.getBucketCols().add("name");
        sd.setSerdeInfo(new SerDeInfo());
        sd.getSerdeInfo().setName(tbl.getTableName());
        sd.getSerdeInfo().setParameters(new HashMap<String, String>());
        sd.getSerdeInfo().getParameters().put(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1");
        sd.getParameters().put(AvroSerdeUtils.SCHEMA_URL, badAvroUri);
        sd.getSerdeInfo().setSerializationLib(org.apache.hadoop.hive.serde2.avro.AvroSerDe.class.getName());
        sd.setInputFormat(AvroContainerInputFormat.class.getName());
        sd.setOutputFormat(AvroContainerOutputFormat.class.getName());
        tbl.setPartitionKeys(new ArrayList<FieldSchema>());
        client.createTable(tbl);
        client.close();
    } catch (Exception e) {
        System.err.println("Unable to setup the hive metatool test");
        System.err.println(StringUtils.stringifyException(e));
        throw new Exception(e);
    }
}
Also used : PrintStream(java.io.PrintStream) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ByteArrayOutputStream(java.io.ByteArrayOutputStream) AvroContainerInputFormat(org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) Type(org.apache.hadoop.hive.metastore.api.Type) AvroContainerOutputFormat(org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat) Database(org.apache.hadoop.hive.metastore.api.Database) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Example 57 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class TestCachedStore method createTestTbl.

private Table createTestTbl(String dbName, String tblName, String tblOwner, List<FieldSchema> cols, List<FieldSchema> ptnCols) {
    String serdeLocation = "file:/tmp";
    Map<String, String> serdeParams = new HashMap<>();
    Map<String, String> tblParams = new HashMap<>();
    SerDeInfo serdeInfo = new SerDeInfo("serde", "seriallib", new HashMap<>());
    StorageDescriptor sd = new StorageDescriptor(cols, serdeLocation, "input", "output", false, 0, serdeInfo, null, null, serdeParams);
    sd.setStoredAsSubDirectories(false);
    Table tbl = new Table(tblName, dbName, tblOwner, 0, 0, 0, sd, ptnCols, tblParams, null, null, TableType.MANAGED_TABLE.toString());
    return tbl;
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 58 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.

the class FileOutputCommitterContainer method constructPartition.

/**
 * Generate partition metadata object to be used to add to metadata.
 * @param context The job context.
 * @param jobInfo The OutputJobInfo.
 * @param partLocnRoot The table-equivalent location root of the partition
 *                       (temporary dir if dynamic partition, table dir if static)
 * @param dynPartPath The path of dynamic partition which is created
 * @param partKVs The keyvalue pairs that form the partition
 * @param outputSchema The output schema for the partition
 * @param params The parameters to store inside the partition
 * @param table The Table metadata object under which this Partition will reside
 * @param fs FileSystem object to operate on the underlying filesystem
 * @param grpName Group name that owns the table dir
 * @param perms FsPermission that's the default permission of the table dir.
 * @return Constructed Partition metadata object
 * @throws java.io.IOException
 */
private Partition constructPartition(JobContext context, OutputJobInfo jobInfo, String partLocnRoot, String dynPartPath, Map<String, String> partKVs, HCatSchema outputSchema, Map<String, String> params, Table table, FileSystem fs, String grpName, FsPermission perms) throws IOException {
    Partition partition = new Partition();
    partition.setDbName(table.getDbName());
    partition.setTableName(table.getTableName());
    partition.setSd(new StorageDescriptor(table.getTTable().getSd()));
    List<FieldSchema> fields = new ArrayList<FieldSchema>();
    for (HCatFieldSchema fieldSchema : outputSchema.getFields()) {
        fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema));
    }
    partition.getSd().setCols(fields);
    partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs));
    partition.setParameters(params);
    // Sets permissions and group name on partition dirs and files.
    Path partPath;
    if (customDynamicLocationUsed) {
        partPath = new Path(dynPartPath);
    } else if (!dynamicPartitioningUsed && Boolean.parseBoolean((String) table.getProperty("EXTERNAL")) && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) {
        // Now, we need to de-scratchify this location - i.e., get rid of any
        // _SCRATCH[\d].?[\d]+ from the location.
        String jobLocation = jobInfo.getLocation();
        String finalLocn = jobLocation.replaceAll(Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+", "");
        partPath = new Path(finalLocn);
    } else {
        partPath = new Path(partLocnRoot);
        int i = 0;
        for (FieldSchema partKey : table.getPartitionKeys()) {
            if (i++ != 0) {
                // Attempt to make the path in case it does not exist before we check
                fs.mkdirs(partPath);
                applyGroupAndPerms(fs, partPath, perms, grpName, false);
            }
            partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs);
        }
    }
    // Apply the group and permissions to the leaf partition and files.
    // Need not bother in case of HDFS as permission is taken care of by setting UMask
    // Attempt to make the path in case it does not exist before we check
    fs.mkdirs(partPath);
    if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) {
        applyGroupAndPerms(fs, partPath, perms, grpName, true);
    }
    // Set the location in the StorageDescriptor
    if (dynamicPartitioningUsed) {
        String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs, jobInfo);
        if (harProcessor.isEnabled()) {
            harProcessor.exec(context, partition, partPath);
            partition.getSd().setLocation(harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination)));
        } else {
            partition.getSd().setLocation(dynamicPartitionDestination);
        }
    } else {
        partition.getSd().setLocation(partPath.toString());
    }
    return partition;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 59 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project incubator-gobblin by apache.

the class HiveMetaStoreUtils method getStorageDescriptor.

private static StorageDescriptor getStorageDescriptor(HiveRegistrationUnit unit) {
    State props = unit.getStorageProps();
    StorageDescriptor sd = new StorageDescriptor();
    sd.setParameters(getParameters(props));
    sd.setCols(getFieldSchemas(unit));
    if (unit.getLocation().isPresent()) {
        sd.setLocation(unit.getLocation().get());
    }
    if (unit.getInputFormat().isPresent()) {
        sd.setInputFormat(unit.getInputFormat().get());
    }
    if (unit.getOutputFormat().isPresent()) {
        sd.setOutputFormat(unit.getOutputFormat().get());
    }
    if (unit.getIsCompressed().isPresent()) {
        sd.setCompressed(unit.getIsCompressed().get());
    }
    if (unit.getNumBuckets().isPresent()) {
        sd.setNumBuckets(unit.getNumBuckets().get());
    }
    if (unit.getBucketColumns().isPresent()) {
        sd.setBucketCols(unit.getBucketColumns().get());
    }
    if (unit.getIsStoredAsSubDirs().isPresent()) {
        sd.setStoredAsSubDirectories(unit.getIsStoredAsSubDirs().get());
    }
    sd.setSerdeInfo(getSerDeInfo(unit));
    return sd;
}
Also used : State(org.apache.gobblin.configuration.State) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 60 with StorageDescriptor

use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project incubator-gobblin by apache.

the class HiveSchemaEvolutionTest method createEvolvedDestinationTable.

private Optional<Table> createEvolvedDestinationTable(String tableName, String dbName, String location, boolean withComment) {
    List<FieldSchema> cols = new ArrayList<>();
    // Existing columns that match avroToOrcSchemaEvolutionTest/source_schema_evolution_enabled.ddl
    cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldString", "string", withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldString" : ""));
    cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldInt", "int", withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldInt" : ""));
    cols.add(new FieldSchema("parentFieldRecord__nestedFieldString", "string", withComment ? "from flatten_source parentFieldRecord.nestedFieldString" : ""));
    // The following column is skipped (simulating un-evolved schema):
    // Column name   : parentFieldRecord__nestedFieldInt
    // Column type   : int
    // Column comment: from flatten_source parentFieldRecord.nestedFieldInt
    cols.add(new FieldSchema("parentFieldInt", "int", withComment ? "from flatten_source parentFieldInt" : ""));
    // Extra schema
    cols.add(new FieldSchema("parentFieldRecord__nestedFieldString2", "string", withComment ? "from flatten_source parentFieldRecord.nestedFieldString2" : ""));
    String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
    String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
    StorageDescriptor storageDescriptor = new StorageDescriptor(cols, location, inputFormat, outputFormat, false, 0, new SerDeInfo(), null, Lists.<Order>newArrayList(), null);
    Table table = new Table(tableName, dbName, "ketl_dev", 0, 0, 0, storageDescriptor, Lists.<FieldSchema>newArrayList(), Maps.<String, String>newHashMap(), "", "", "");
    return Optional.of(table);
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Aggregations

StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)284 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)163 Table (org.apache.hadoop.hive.metastore.api.Table)159 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)155 ArrayList (java.util.ArrayList)134 Test (org.junit.Test)131 Partition (org.apache.hadoop.hive.metastore.api.Partition)97 HashMap (java.util.HashMap)61 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)38 List (java.util.List)35 Order (org.apache.hadoop.hive.metastore.api.Order)33 Path (org.apache.hadoop.fs.Path)30 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)30 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)30 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)29 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)29 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)27 Database (org.apache.hadoop.hive.metastore.api.Database)25 SkewedInfo (org.apache.hadoop.hive.metastore.api.SkewedInfo)23 IOException (java.io.IOException)15