use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class TestHiveMetaTool method setUp.
@Override
protected void setUp() throws Exception {
super.setUp();
try {
HiveConf hiveConf = new HiveConf(HiveMetaTool.class);
client = new HiveMetaStoreClient(hiveConf);
// Setup output stream to redirect output to
os = new ByteArrayOutputStream();
ps = new PrintStream(os);
// create a dummy database and a couple of dummy tables
Database db = new Database();
db.setName(dbName);
client.dropTable(dbName, tblName);
client.dropTable(dbName, badTblName);
dropDatabase(dbName);
client.createDatabase(db);
locationUri = db.getLocationUri();
String avroUri = "hdfs://nn.example.com/warehouse/hive/ab.avsc";
String badAvroUri = new String("hdfs:/hive");
client.dropType(typeName);
Type typ1 = new Type();
typ1.setName(typeName);
typ1.setFields(new ArrayList<FieldSchema>(2));
typ1.getFields().add(new FieldSchema("name", serdeConstants.STRING_TYPE_NAME, ""));
typ1.getFields().add(new FieldSchema("income", serdeConstants.INT_TYPE_NAME, ""));
client.createType(typ1);
Table tbl = new Table();
tbl.setDbName(dbName);
tbl.setTableName(tblName);
Map<String, String> parameters = new HashMap<>();
parameters.put(AvroSerdeUtils.SCHEMA_URL, avroUri);
tbl.setParameters(parameters);
StorageDescriptor sd = new StorageDescriptor();
tbl.setSd(sd);
sd.setCols(typ1.getFields());
sd.setCompressed(false);
sd.setNumBuckets(1);
sd.setParameters(new HashMap<String, String>());
sd.getParameters().put("test_param_1", "Use this for comments etc");
sd.setBucketCols(new ArrayList<String>(2));
sd.getBucketCols().add("name");
sd.setSerdeInfo(new SerDeInfo());
sd.getSerdeInfo().setName(tbl.getTableName());
sd.getSerdeInfo().setParameters(new HashMap<String, String>());
sd.getSerdeInfo().getParameters().put(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1");
sd.getParameters().put(AvroSerdeUtils.SCHEMA_URL, avroUri);
sd.getSerdeInfo().setSerializationLib(org.apache.hadoop.hive.serde2.avro.AvroSerDe.class.getName());
sd.setInputFormat(AvroContainerInputFormat.class.getName());
sd.setOutputFormat(AvroContainerOutputFormat.class.getName());
tbl.setPartitionKeys(new ArrayList<FieldSchema>());
client.createTable(tbl);
// create a table with bad avro uri
tbl = new Table();
tbl.setDbName(dbName);
tbl.setTableName(badTblName);
sd = new StorageDescriptor();
tbl.setSd(sd);
sd.setCols(typ1.getFields());
sd.setCompressed(false);
sd.setNumBuckets(1);
sd.setParameters(new HashMap<String, String>());
sd.getParameters().put("test_param_1", "Use this for comments etc");
sd.setBucketCols(new ArrayList<String>(2));
sd.getBucketCols().add("name");
sd.setSerdeInfo(new SerDeInfo());
sd.getSerdeInfo().setName(tbl.getTableName());
sd.getSerdeInfo().setParameters(new HashMap<String, String>());
sd.getSerdeInfo().getParameters().put(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1");
sd.getParameters().put(AvroSerdeUtils.SCHEMA_URL, badAvroUri);
sd.getSerdeInfo().setSerializationLib(org.apache.hadoop.hive.serde2.avro.AvroSerDe.class.getName());
sd.setInputFormat(AvroContainerInputFormat.class.getName());
sd.setOutputFormat(AvroContainerOutputFormat.class.getName());
tbl.setPartitionKeys(new ArrayList<FieldSchema>());
client.createTable(tbl);
client.close();
} catch (Exception e) {
System.err.println("Unable to setup the hive metatool test");
System.err.println(StringUtils.stringifyException(e));
throw new Exception(e);
}
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class TestCachedStore method createTestTbl.
private Table createTestTbl(String dbName, String tblName, String tblOwner, List<FieldSchema> cols, List<FieldSchema> ptnCols) {
String serdeLocation = "file:/tmp";
Map<String, String> serdeParams = new HashMap<>();
Map<String, String> tblParams = new HashMap<>();
SerDeInfo serdeInfo = new SerDeInfo("serde", "seriallib", new HashMap<>());
StorageDescriptor sd = new StorageDescriptor(cols, serdeLocation, "input", "output", false, 0, serdeInfo, null, null, serdeParams);
sd.setStoredAsSubDirectories(false);
Table tbl = new Table(tblName, dbName, tblOwner, 0, 0, 0, sd, ptnCols, tblParams, null, null, TableType.MANAGED_TABLE.toString());
return tbl;
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project hive by apache.
the class FileOutputCommitterContainer method constructPartition.
/**
* Generate partition metadata object to be used to add to metadata.
* @param context The job context.
* @param jobInfo The OutputJobInfo.
* @param partLocnRoot The table-equivalent location root of the partition
* (temporary dir if dynamic partition, table dir if static)
* @param dynPartPath The path of dynamic partition which is created
* @param partKVs The keyvalue pairs that form the partition
* @param outputSchema The output schema for the partition
* @param params The parameters to store inside the partition
* @param table The Table metadata object under which this Partition will reside
* @param fs FileSystem object to operate on the underlying filesystem
* @param grpName Group name that owns the table dir
* @param perms FsPermission that's the default permission of the table dir.
* @return Constructed Partition metadata object
* @throws java.io.IOException
*/
private Partition constructPartition(JobContext context, OutputJobInfo jobInfo, String partLocnRoot, String dynPartPath, Map<String, String> partKVs, HCatSchema outputSchema, Map<String, String> params, Table table, FileSystem fs, String grpName, FsPermission perms) throws IOException {
Partition partition = new Partition();
partition.setDbName(table.getDbName());
partition.setTableName(table.getTableName());
partition.setSd(new StorageDescriptor(table.getTTable().getSd()));
List<FieldSchema> fields = new ArrayList<FieldSchema>();
for (HCatFieldSchema fieldSchema : outputSchema.getFields()) {
fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema));
}
partition.getSd().setCols(fields);
partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs));
partition.setParameters(params);
// Sets permissions and group name on partition dirs and files.
Path partPath;
if (customDynamicLocationUsed) {
partPath = new Path(dynPartPath);
} else if (!dynamicPartitioningUsed && Boolean.parseBoolean((String) table.getProperty("EXTERNAL")) && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) {
// Now, we need to de-scratchify this location - i.e., get rid of any
// _SCRATCH[\d].?[\d]+ from the location.
String jobLocation = jobInfo.getLocation();
String finalLocn = jobLocation.replaceAll(Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+", "");
partPath = new Path(finalLocn);
} else {
partPath = new Path(partLocnRoot);
int i = 0;
for (FieldSchema partKey : table.getPartitionKeys()) {
if (i++ != 0) {
// Attempt to make the path in case it does not exist before we check
fs.mkdirs(partPath);
applyGroupAndPerms(fs, partPath, perms, grpName, false);
}
partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs);
}
}
// Apply the group and permissions to the leaf partition and files.
// Need not bother in case of HDFS as permission is taken care of by setting UMask
// Attempt to make the path in case it does not exist before we check
fs.mkdirs(partPath);
if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) {
applyGroupAndPerms(fs, partPath, perms, grpName, true);
}
// Set the location in the StorageDescriptor
if (dynamicPartitioningUsed) {
String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs, jobInfo);
if (harProcessor.isEnabled()) {
harProcessor.exec(context, partition, partPath);
partition.getSd().setLocation(harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination)));
} else {
partition.getSd().setLocation(dynamicPartitionDestination);
}
} else {
partition.getSd().setLocation(partPath.toString());
}
return partition;
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project incubator-gobblin by apache.
the class HiveMetaStoreUtils method getStorageDescriptor.
private static StorageDescriptor getStorageDescriptor(HiveRegistrationUnit unit) {
State props = unit.getStorageProps();
StorageDescriptor sd = new StorageDescriptor();
sd.setParameters(getParameters(props));
sd.setCols(getFieldSchemas(unit));
if (unit.getLocation().isPresent()) {
sd.setLocation(unit.getLocation().get());
}
if (unit.getInputFormat().isPresent()) {
sd.setInputFormat(unit.getInputFormat().get());
}
if (unit.getOutputFormat().isPresent()) {
sd.setOutputFormat(unit.getOutputFormat().get());
}
if (unit.getIsCompressed().isPresent()) {
sd.setCompressed(unit.getIsCompressed().get());
}
if (unit.getNumBuckets().isPresent()) {
sd.setNumBuckets(unit.getNumBuckets().get());
}
if (unit.getBucketColumns().isPresent()) {
sd.setBucketCols(unit.getBucketColumns().get());
}
if (unit.getIsStoredAsSubDirs().isPresent()) {
sd.setStoredAsSubDirectories(unit.getIsStoredAsSubDirs().get());
}
sd.setSerdeInfo(getSerDeInfo(unit));
return sd;
}
use of org.apache.hadoop.hive.metastore.api.StorageDescriptor in project incubator-gobblin by apache.
the class HiveSchemaEvolutionTest method createEvolvedDestinationTable.
private Optional<Table> createEvolvedDestinationTable(String tableName, String dbName, String location, boolean withComment) {
List<FieldSchema> cols = new ArrayList<>();
// Existing columns that match avroToOrcSchemaEvolutionTest/source_schema_evolution_enabled.ddl
cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldString", "string", withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldString" : ""));
cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldInt", "int", withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldInt" : ""));
cols.add(new FieldSchema("parentFieldRecord__nestedFieldString", "string", withComment ? "from flatten_source parentFieldRecord.nestedFieldString" : ""));
// The following column is skipped (simulating un-evolved schema):
// Column name : parentFieldRecord__nestedFieldInt
// Column type : int
// Column comment: from flatten_source parentFieldRecord.nestedFieldInt
cols.add(new FieldSchema("parentFieldInt", "int", withComment ? "from flatten_source parentFieldInt" : ""));
// Extra schema
cols.add(new FieldSchema("parentFieldRecord__nestedFieldString2", "string", withComment ? "from flatten_source parentFieldRecord.nestedFieldString2" : ""));
String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
StorageDescriptor storageDescriptor = new StorageDescriptor(cols, location, inputFormat, outputFormat, false, 0, new SerDeInfo(), null, Lists.<Order>newArrayList(), null);
Table table = new Table(tableName, dbName, "ketl_dev", 0, 0, 0, storageDescriptor, Lists.<FieldSchema>newArrayList(), Maps.<String, String>newHashMap(), "", "", "");
return Optional.of(table);
}
Aggregations